smtp/address: refactor

This commit is contained in:
Zack Schuster 2020-06-16 23:32:18 -07:00
parent 9b2a2e15e6
commit a1b317e74b
1 changed files with 110 additions and 199 deletions

View File

@ -1,23 +1,4 @@
/* interface AddressToken {
* Operator tokens and which tokens are expected to end the sequence
*/
const OPERATORS = {
'"': '"',
'(': ')',
'<': '>',
',': '',
// Groups are ended by semicolons
':': ';',
// Semicolons are not a legal delimiter per the RFC2822 grammar other
// than for terminating a group, but they are also not valid for any
// other use in this context. Given that some mail clients have
// historically allowed the semicolon as a delimiter equivalent to the
// comma in their UI, it makes sense to treat them the same as a comma
// when used outside of a group.
';': '',
};
interface TokenizerNode {
type: 'operator' | 'text'; type: 'operator' | 'text';
value: string; value: string;
} }
@ -28,134 +9,79 @@ export interface AddressObject {
group?: AddressObject[]; group?: AddressObject[];
} }
/** /*
* Creates a Tokenizer object for tokenizing address field strings * Operator tokens and which tokens are expected to end the sequence
*
* @constructor
* @param {String} str Address field string
*/ */
class Tokenizer { const OPERATORS = new Map([
private operatorExpecting = ''; ['"', '"'],
private node?: TokenizerNode; ['(', ')'],
private escaped = false; ['<', '>'],
private list: TokenizerNode[] = []; [',', ''],
private str: string; // Groups are ended by semicolons
[':', ';'],
// Semicolons are not a legal delimiter per the RFC2822 grammar other
// than for terminating a group, but they are also not valid for any
// other use in this context. Given that some mail clients have
// historically allowed the semicolon as a delimiter equivalent to the
// comma in their UI, it makes sense to treat them the same as a comma
// when used outside of a group.
[';', ''],
]);
constructor(str: string | string[] = '') { /**
this.str = str.toString(); * Tokenizes the original input string
} *
* @param {string | string[] | undefined} address string(s) to tokenize
* @return {AddressToken[]} An array of operator|text tokens
*/
function tokenizeAddress(address: string | string[] = '') {
const tokens: AddressToken[] = [];
let token: AddressToken | undefined = undefined;
let operator: string | undefined = undefined;
/** for (const character of address.toString()) {
* Tokenizes the original input string if ((operator?.length ?? 0) > 0 && character === operator) {
* tokens.push({ type: 'operator', value: character });
* @return {Array} An array of operator|text tokens token = undefined;
*/ operator = undefined;
public tokenize() { } else if ((operator?.length ?? 0) === 0 && OPERATORS.has(character)) {
let chr; tokens.push({ type: 'operator', value: character });
const list: TokenizerNode[] = []; token = undefined;
operator = OPERATORS.get(character);
for (let i = 0, len = this.str.length; i < len; i++) { } else {
chr = this.str.charAt(i); if (token == null) {
this.checkChar(chr); token = { type: 'text', value: character };
} tokens.push(token);
} else {
for (const node of this.list) { token.value += character;
node.value = (node.value || '').toString().trim();
if (node.value) {
list.push(node);
} }
} }
return list;
} }
/** return tokens
* Checks if a character is an operator or text and acts accordingly .map((x) => {
* x.value = x.value.trim();
* @param {string} chr Character from the address field return x;
* @returns {void} })
*/ .filter((x) => x.value.length > 0);
public checkChar(chr: string) {
if ((chr in OPERATORS || chr === '\\') && this.escaped) {
this.escaped = false;
} else if (this.operatorExpecting && chr === this.operatorExpecting) {
this.node = {
type: 'operator',
value: chr,
};
this.list.push(this.node);
this.node = undefined;
this.operatorExpecting = '';
this.escaped = false;
return;
} else if (!this.operatorExpecting && chr in OPERATORS) {
this.node = {
type: 'operator',
value: chr,
};
this.list.push(this.node);
this.node = undefined;
this.operatorExpecting = OPERATORS[chr as keyof typeof OPERATORS];
this.escaped = false;
return;
}
if (!this.escaped && chr === '\\') {
this.escaped = true;
return;
}
if (!this.node) {
this.node = {
type: 'text',
value: '',
};
this.list.push(this.node);
}
if (this.escaped && chr !== '\\') {
this.node.value += '\\';
}
this.node.value += chr;
this.escaped = false;
}
} }
/** /**
* Converts tokens for a single address into an address object * Converts tokens for a single address into an address object
* *
* @param {TokenizerNode[]} tokens Tokens object * @param {AddressToken[]} tokens Tokens object
* @return {AddressObject[]} addresses object array * @return {AddressObject[]} addresses object array
*/ */
function handleAddress(tokens: TokenizerNode[]) { function convertAddressTokens(tokens: AddressToken[]) {
let isGroup = false; const addressObjects: AddressObject[] = [];
let state = 'text'; const groups: string[] = [];
let address: AddressObject;
let addresses: string[] = []; let addresses: string[] = [];
let comments: string[] = []; let comments: string[] = [];
let texts: string[] = []; let texts: string[] = [];
const groups: string[] = []; let state = 'text';
const addressObjects: AddressObject[] = []; let isGroup = false;
function handleToken(token: AddressToken) {
const data: {
address: string;
comment: string;
group: string;
text: string;
} = {
address: '',
comment: '',
group: '',
text: '',
};
// Filter out <addresses>, (comments) and regular text
for (let i = 0, len = tokens.length; i < len; i++) {
const token = tokens[i];
if (token.type === 'operator') { if (token.type === 'operator') {
switch (token.value) { switch (token.value) {
case '<': case '<':
@ -172,37 +98,39 @@ function handleAddress(tokens: TokenizerNode[]) {
state = 'text'; state = 'text';
break; break;
} }
} else { } else if (token.value.length > 0) {
if (token.value) { switch (state) {
switch (state) { case 'address':
case 'address': addresses.push(token.value);
addresses.push(token.value); break;
break; case 'comment':
case 'comment': comments.push(token.value);
comments.push(token.value); break;
break; case 'group':
case 'group': groups.push(token.value);
groups.push(token.value); break;
break; default:
default: texts.push(token.value);
texts.push(token.value); break;
break;
}
} }
} }
} }
// Filter out <addresses>, (comments) and regular text
for (const token of tokens) {
handleToken(token);
}
// If there is no text but a comment, replace the two // If there is no text but a comment, replace the two
if (texts.length === 0 && comments.length > 0) { if (texts.length === 0 && comments.length > 0) {
texts = [...comments]; texts = [...comments];
comments = []; comments = [];
} }
// http://tools.ietf.org/html/rfc2822#appendix-A.1.3
if (isGroup) { if (isGroup) {
// http://tools.ietf.org/html/rfc2822#appendix-A.1.3
data.text = texts.join(' ');
addressObjects.push({ addressObjects.push({
name: data.text || undefined, name: texts.length === 0 ? undefined : texts.join(' '),
group: groups.length > 0 ? addressparser(groups.join(',')) : [], group: groups.length > 0 ? addressparser(groups.join(',')) : [],
}); });
} else { } else {
@ -215,22 +143,21 @@ function handleAddress(tokens: TokenizerNode[]) {
} }
} }
const _regexHandler = function (address: string) {
if (addresses.length === 0) {
addresses = [address.trim()];
return ' ';
} else {
return address;
}
};
// still no address // still no address
if (addresses.length === 0) { if (addresses.length === 0) {
for (let i = texts.length - 1; i >= 0; i--) { for (let i = texts.length - 1; i >= 0; i--) {
texts[i] = texts[i] texts[i] = texts[i]
.replace(/\s*\b[^@\s]+@[^@\s]+\b\s*/, _regexHandler) .replace(/\s*\b[^@\s]+@[^@\s]+\b\s*/, (address: string) => {
if (addresses.length === 0) {
addresses = [address.trim()];
return ' ';
} else {
return address;
}
})
.trim(); .trim();
if (addresses.length) {
if (addresses.length > 0) {
break; break;
} }
} }
@ -245,30 +172,25 @@ function handleAddress(tokens: TokenizerNode[]) {
// Keep only the first address occurence, push others to regular text // Keep only the first address occurence, push others to regular text
if (addresses.length > 1) { if (addresses.length > 1) {
texts = texts.concat(addresses.splice(1)); texts = [...texts, ...addresses.splice(1)];
} }
// Join values with spaces if (addresses.length === 0 && isGroup) {
data.text = texts.join(' ');
data.address = addresses.join(' ');
if (!data.address && isGroup) {
return []; return [];
} else { } else {
address = { // Join values with spaces
address: data.address || data.text || '', let address = addresses.join(' ');
name: data.text || data.address || '', let name = texts.length === 0 ? address : texts.join(' ');
};
if (address.address === address.name) { if (address === name) {
if ((address.address || '').match(/@/)) { if (address.match(/@/)) {
address.name = ''; name = '';
} else { } else {
address.address = ''; address = '';
} }
} }
addressObjects.push(address); addressObjects.push({ address, name });
} }
} }
@ -286,41 +208,30 @@ function handleAddress(tokens: TokenizerNode[]) {
* *
* [{name: "Name", address: "address@domain"}] * [{name: "Name", address: "address@domain"}]
* *
* @param {string} str Address field * @param {string | string[] | undefined} address Address field
* @return {AddressObject[]} An array of address objects * @return {AddressObject[]} An array of address objects
*/ */
export function addressparser(str?: string | string[]) { export function addressparser(address?: string | string[]) {
const tokenizer = new Tokenizer(str); const addresses: AddressObject[] = [];
const tokens = tokenizer.tokenize(); let tokens: AddressToken[] = [];
const addresses: TokenizerNode[][] = []; for (const token of tokenizeAddress(address)) {
let address: TokenizerNode[] = [];
let parsedAddresses: AddressObject[] = [];
for (const token of tokens) {
if ( if (
token.type === 'operator' && token.type === 'operator' &&
(token.value === ',' || token.value === ';') (token.value === ',' || token.value === ';')
) { ) {
if (address.length) { if (tokens.length > 0) {
addresses.push(address); addresses.push(...convertAddressTokens(tokens));
} }
address = []; tokens = [];
} else { } else {
address.push(token); tokens.push(token);
} }
} }
if (address.length) { if (tokens.length > 0) {
addresses.push(address); addresses.push(...convertAddressTokens(tokens));
} }
for (const address of addresses) { return addresses;
const handled = handleAddress(address);
if (handled.length) {
parsedAddresses = parsedAddresses.concat(handled);
}
}
return parsedAddresses;
} }