diff --git a/smtp/address.ts b/smtp/address.ts index 94e0958..df79904 100644 --- a/smtp/address.ts +++ b/smtp/address.ts @@ -1,23 +1,4 @@ -/* - * Operator tokens and which tokens are expected to end the sequence - */ -const OPERATORS = { - '"': '"', - '(': ')', - '<': '>', - ',': '', - // Groups are ended by semicolons - ':': ';', - // Semicolons are not a legal delimiter per the RFC2822 grammar other - // than for terminating a group, but they are also not valid for any - // other use in this context. Given that some mail clients have - // historically allowed the semicolon as a delimiter equivalent to the - // comma in their UI, it makes sense to treat them the same as a comma - // when used outside of a group. - ';': '', -}; - -interface TokenizerNode { +interface AddressToken { type: 'operator' | 'text'; value: string; } @@ -28,134 +9,79 @@ export interface AddressObject { group?: AddressObject[]; } -/** - * Creates a Tokenizer object for tokenizing address field strings - * - * @constructor - * @param {String} str Address field string +/* + * Operator tokens and which tokens are expected to end the sequence */ -class Tokenizer { - private operatorExpecting = ''; - private node?: TokenizerNode; - private escaped = false; - private list: TokenizerNode[] = []; - private str: string; +const OPERATORS = new Map([ + ['"', '"'], + ['(', ')'], + ['<', '>'], + [',', ''], + // Groups are ended by semicolons + [':', ';'], + // Semicolons are not a legal delimiter per the RFC2822 grammar other + // than for terminating a group, but they are also not valid for any + // other use in this context. Given that some mail clients have + // historically allowed the semicolon as a delimiter equivalent to the + // comma in their UI, it makes sense to treat them the same as a comma + // when used outside of a group. + [';', ''], +]); - constructor(str: string | string[] = '') { - this.str = str.toString(); - } +/** + * Tokenizes the original input string + * + * @param {string | string[] | undefined} address string(s) to tokenize + * @return {AddressToken[]} An array of operator|text tokens + */ +function tokenizeAddress(address: string | string[] = '') { + const tokens: AddressToken[] = []; + let token: AddressToken | undefined = undefined; + let operator: string | undefined = undefined; - /** - * Tokenizes the original input string - * - * @return {Array} An array of operator|text tokens - */ - public tokenize() { - let chr; - const list: TokenizerNode[] = []; - - for (let i = 0, len = this.str.length; i < len; i++) { - chr = this.str.charAt(i); - this.checkChar(chr); - } - - for (const node of this.list) { - node.value = (node.value || '').toString().trim(); - if (node.value) { - list.push(node); + for (const character of address.toString()) { + if ((operator?.length ?? 0) > 0 && character === operator) { + tokens.push({ type: 'operator', value: character }); + token = undefined; + operator = undefined; + } else if ((operator?.length ?? 0) === 0 && OPERATORS.has(character)) { + tokens.push({ type: 'operator', value: character }); + token = undefined; + operator = OPERATORS.get(character); + } else { + if (token == null) { + token = { type: 'text', value: character }; + tokens.push(token); + } else { + token.value += character; } } - - return list; } - /** - * Checks if a character is an operator or text and acts accordingly - * - * @param {string} chr Character from the address field - * @returns {void} - */ - public checkChar(chr: string) { - if ((chr in OPERATORS || chr === '\\') && this.escaped) { - this.escaped = false; - } else if (this.operatorExpecting && chr === this.operatorExpecting) { - this.node = { - type: 'operator', - value: chr, - }; - this.list.push(this.node); - this.node = undefined; - this.operatorExpecting = ''; - this.escaped = false; - return; - } else if (!this.operatorExpecting && chr in OPERATORS) { - this.node = { - type: 'operator', - value: chr, - }; - this.list.push(this.node); - this.node = undefined; - this.operatorExpecting = OPERATORS[chr as keyof typeof OPERATORS]; - this.escaped = false; - return; - } - - if (!this.escaped && chr === '\\') { - this.escaped = true; - return; - } - - if (!this.node) { - this.node = { - type: 'text', - value: '', - }; - this.list.push(this.node); - } - - if (this.escaped && chr !== '\\') { - this.node.value += '\\'; - } - - this.node.value += chr; - this.escaped = false; - } + return tokens + .map((x) => { + x.value = x.value.trim(); + return x; + }) + .filter((x) => x.value.length > 0); } + /** * Converts tokens for a single address into an address object * - * @param {TokenizerNode[]} tokens Tokens object + * @param {AddressToken[]} tokens Tokens object * @return {AddressObject[]} addresses object array */ -function handleAddress(tokens: TokenizerNode[]) { - let isGroup = false; - let state = 'text'; - - let address: AddressObject; - +function convertAddressTokens(tokens: AddressToken[]) { + const addressObjects: AddressObject[] = []; + const groups: string[] = []; let addresses: string[] = []; let comments: string[] = []; let texts: string[] = []; - const groups: string[] = []; - const addressObjects: AddressObject[] = []; - - const data: { - address: string; - comment: string; - group: string; - text: string; - } = { - address: '', - comment: '', - group: '', - text: '', - }; - - // Filter out , (comments) and regular text - for (let i = 0, len = tokens.length; i < len; i++) { - const token = tokens[i]; - + let state = 'text'; + let isGroup = false; + function handleToken(token: AddressToken) { if (token.type === 'operator') { switch (token.value) { case '<': @@ -172,37 +98,39 @@ function handleAddress(tokens: TokenizerNode[]) { state = 'text'; break; } - } else { - if (token.value) { - switch (state) { - case 'address': - addresses.push(token.value); - break; - case 'comment': - comments.push(token.value); - break; - case 'group': - groups.push(token.value); - break; - default: - texts.push(token.value); - break; - } + } else if (token.value.length > 0) { + switch (state) { + case 'address': + addresses.push(token.value); + break; + case 'comment': + comments.push(token.value); + break; + case 'group': + groups.push(token.value); + break; + default: + texts.push(token.value); + break; } } } + // Filter out , (comments) and regular text + for (const token of tokens) { + handleToken(token); + } + // If there is no text but a comment, replace the two if (texts.length === 0 && comments.length > 0) { texts = [...comments]; comments = []; } + // http://tools.ietf.org/html/rfc2822#appendix-A.1.3 if (isGroup) { - // http://tools.ietf.org/html/rfc2822#appendix-A.1.3 - data.text = texts.join(' '); addressObjects.push({ - name: data.text || undefined, + name: texts.length === 0 ? undefined : texts.join(' '), group: groups.length > 0 ? addressparser(groups.join(',')) : [], }); } else { @@ -215,22 +143,21 @@ function handleAddress(tokens: TokenizerNode[]) { } } - const _regexHandler = function (address: string) { - if (addresses.length === 0) { - addresses = [address.trim()]; - return ' '; - } else { - return address; - } - }; - // still no address if (addresses.length === 0) { for (let i = texts.length - 1; i >= 0; i--) { texts[i] = texts[i] - .replace(/\s*\b[^@\s]+@[^@\s]+\b\s*/, _regexHandler) + .replace(/\s*\b[^@\s]+@[^@\s]+\b\s*/, (address: string) => { + if (addresses.length === 0) { + addresses = [address.trim()]; + return ' '; + } else { + return address; + } + }) .trim(); - if (addresses.length) { + + if (addresses.length > 0) { break; } } @@ -245,30 +172,25 @@ function handleAddress(tokens: TokenizerNode[]) { // Keep only the first address occurence, push others to regular text if (addresses.length > 1) { - texts = texts.concat(addresses.splice(1)); + texts = [...texts, ...addresses.splice(1)]; } - // Join values with spaces - data.text = texts.join(' '); - data.address = addresses.join(' '); - - if (!data.address && isGroup) { + if (addresses.length === 0 && isGroup) { return []; } else { - address = { - address: data.address || data.text || '', - name: data.text || data.address || '', - }; + // Join values with spaces + let address = addresses.join(' '); + let name = texts.length === 0 ? address : texts.join(' '); - if (address.address === address.name) { - if ((address.address || '').match(/@/)) { - address.name = ''; + if (address === name) { + if (address.match(/@/)) { + name = ''; } else { - address.address = ''; + address = ''; } } - addressObjects.push(address); + addressObjects.push({ address, name }); } } @@ -286,41 +208,30 @@ function handleAddress(tokens: TokenizerNode[]) { * * [{name: "Name", address: "address@domain"}] * - * @param {string} str Address field + * @param {string | string[] | undefined} address Address field * @return {AddressObject[]} An array of address objects */ -export function addressparser(str?: string | string[]) { - const tokenizer = new Tokenizer(str); - const tokens = tokenizer.tokenize(); +export function addressparser(address?: string | string[]) { + const addresses: AddressObject[] = []; + let tokens: AddressToken[] = []; - const addresses: TokenizerNode[][] = []; - let address: TokenizerNode[] = []; - let parsedAddresses: AddressObject[] = []; - - for (const token of tokens) { + for (const token of tokenizeAddress(address)) { if ( token.type === 'operator' && (token.value === ',' || token.value === ';') ) { - if (address.length) { - addresses.push(address); + if (tokens.length > 0) { + addresses.push(...convertAddressTokens(tokens)); } - address = []; + tokens = []; } else { - address.push(token); + tokens.push(token); } } - if (address.length) { - addresses.push(address); + if (tokens.length > 0) { + addresses.push(...convertAddressTokens(tokens)); } - for (const address of addresses) { - const handled = handleAddress(address); - if (handled.length) { - parsedAddresses = parsedAddresses.concat(handled); - } - } - - return parsedAddresses; + return addresses; }