From e575f1cb19721d249d5ce36c510907637c770de1 Mon Sep 17 00:00:00 2001 From: Zack Schuster Date: Tue, 16 Jun 2020 20:11:19 -0700 Subject: [PATCH] smtp: inline addressparser code --- email.ts | 1 + smtp/address.ts | 326 ++++++++++++++++++++++++++++++++++++++++++++++++ smtp/client.ts | 4 +- smtp/message.ts | 3 +- test/address.ts | 148 ++++++++++++++++++++++ 5 files changed, 478 insertions(+), 4 deletions(-) create mode 100644 smtp/address.ts create mode 100644 test/address.ts diff --git a/email.ts b/email.ts index 93652eb..90950f5 100644 --- a/email.ts +++ b/email.ts @@ -1,3 +1,4 @@ +export * from './smtp/address'; export * from './smtp/client'; export * from './smtp/connection'; export * from './smtp/date'; diff --git a/smtp/address.ts b/smtp/address.ts new file mode 100644 index 0000000..94e0958 --- /dev/null +++ b/smtp/address.ts @@ -0,0 +1,326 @@ +/* + * Operator tokens and which tokens are expected to end the sequence + */ +const OPERATORS = { + '"': '"', + '(': ')', + '<': '>', + ',': '', + // Groups are ended by semicolons + ':': ';', + // Semicolons are not a legal delimiter per the RFC2822 grammar other + // than for terminating a group, but they are also not valid for any + // other use in this context. Given that some mail clients have + // historically allowed the semicolon as a delimiter equivalent to the + // comma in their UI, it makes sense to treat them the same as a comma + // when used outside of a group. + ';': '', +}; + +interface TokenizerNode { + type: 'operator' | 'text'; + value: string; +} + +export interface AddressObject { + address?: string; + name?: string; + group?: AddressObject[]; +} + +/** + * Creates a Tokenizer object for tokenizing address field strings + * + * @constructor + * @param {String} str Address field string + */ +class Tokenizer { + private operatorExpecting = ''; + private node?: TokenizerNode; + private escaped = false; + private list: TokenizerNode[] = []; + private str: string; + + constructor(str: string | string[] = '') { + this.str = str.toString(); + } + + /** + * Tokenizes the original input string + * + * @return {Array} An array of operator|text tokens + */ + public tokenize() { + let chr; + const list: TokenizerNode[] = []; + + for (let i = 0, len = this.str.length; i < len; i++) { + chr = this.str.charAt(i); + this.checkChar(chr); + } + + for (const node of this.list) { + node.value = (node.value || '').toString().trim(); + if (node.value) { + list.push(node); + } + } + + return list; + } + + /** + * Checks if a character is an operator or text and acts accordingly + * + * @param {string} chr Character from the address field + * @returns {void} + */ + public checkChar(chr: string) { + if ((chr in OPERATORS || chr === '\\') && this.escaped) { + this.escaped = false; + } else if (this.operatorExpecting && chr === this.operatorExpecting) { + this.node = { + type: 'operator', + value: chr, + }; + this.list.push(this.node); + this.node = undefined; + this.operatorExpecting = ''; + this.escaped = false; + return; + } else if (!this.operatorExpecting && chr in OPERATORS) { + this.node = { + type: 'operator', + value: chr, + }; + this.list.push(this.node); + this.node = undefined; + this.operatorExpecting = OPERATORS[chr as keyof typeof OPERATORS]; + this.escaped = false; + return; + } + + if (!this.escaped && chr === '\\') { + this.escaped = true; + return; + } + + if (!this.node) { + this.node = { + type: 'text', + value: '', + }; + this.list.push(this.node); + } + + if (this.escaped && chr !== '\\') { + this.node.value += '\\'; + } + + this.node.value += chr; + this.escaped = false; + } +} +/** + * Converts tokens for a single address into an address object + * + * @param {TokenizerNode[]} tokens Tokens object + * @return {AddressObject[]} addresses object array + */ +function handleAddress(tokens: TokenizerNode[]) { + let isGroup = false; + let state = 'text'; + + let address: AddressObject; + + let addresses: string[] = []; + let comments: string[] = []; + let texts: string[] = []; + + const groups: string[] = []; + const addressObjects: AddressObject[] = []; + + const data: { + address: string; + comment: string; + group: string; + text: string; + } = { + address: '', + comment: '', + group: '', + text: '', + }; + + // Filter out , (comments) and regular text + for (let i = 0, len = tokens.length; i < len; i++) { + const token = tokens[i]; + + if (token.type === 'operator') { + switch (token.value) { + case '<': + state = 'address'; + break; + case '(': + state = 'comment'; + break; + case ':': + state = 'group'; + isGroup = true; + break; + default: + state = 'text'; + break; + } + } else { + if (token.value) { + switch (state) { + case 'address': + addresses.push(token.value); + break; + case 'comment': + comments.push(token.value); + break; + case 'group': + groups.push(token.value); + break; + default: + texts.push(token.value); + break; + } + } + } + } + + // If there is no text but a comment, replace the two + if (texts.length === 0 && comments.length > 0) { + texts = [...comments]; + comments = []; + } + + if (isGroup) { + // http://tools.ietf.org/html/rfc2822#appendix-A.1.3 + data.text = texts.join(' '); + addressObjects.push({ + name: data.text || undefined, + group: groups.length > 0 ? addressparser(groups.join(',')) : [], + }); + } else { + // If no address was found, try to detect one from regular text + if (addresses.length === 0 && texts.length > 0) { + for (let i = texts.length - 1; i >= 0; i--) { + if (texts[i].match(/^[^@\s]+@[^@\s]+$/)) { + addresses = texts.splice(i, 1); + break; + } + } + + const _regexHandler = function (address: string) { + if (addresses.length === 0) { + addresses = [address.trim()]; + return ' '; + } else { + return address; + } + }; + + // still no address + if (addresses.length === 0) { + for (let i = texts.length - 1; i >= 0; i--) { + texts[i] = texts[i] + .replace(/\s*\b[^@\s]+@[^@\s]+\b\s*/, _regexHandler) + .trim(); + if (addresses.length) { + break; + } + } + } + } + + // If there's still is no text but a comment exixts, replace the two + if (texts.length === 0 && comments.length > 0) { + texts = [...comments]; + comments = []; + } + + // Keep only the first address occurence, push others to regular text + if (addresses.length > 1) { + texts = texts.concat(addresses.splice(1)); + } + + // Join values with spaces + data.text = texts.join(' '); + data.address = addresses.join(' '); + + if (!data.address && isGroup) { + return []; + } else { + address = { + address: data.address || data.text || '', + name: data.text || data.address || '', + }; + + if (address.address === address.name) { + if ((address.address || '').match(/@/)) { + address.name = ''; + } else { + address.address = ''; + } + } + + addressObjects.push(address); + } + } + + return addressObjects; +} + +/** + * Parses structured e-mail addresses from an address field + * + * Example: + * + * "Name " + * + * will be converted to + * + * [{name: "Name", address: "address@domain"}] + * + * @param {string} str Address field + * @return {AddressObject[]} An array of address objects + */ +export function addressparser(str?: string | string[]) { + const tokenizer = new Tokenizer(str); + const tokens = tokenizer.tokenize(); + + const addresses: TokenizerNode[][] = []; + let address: TokenizerNode[] = []; + let parsedAddresses: AddressObject[] = []; + + for (const token of tokens) { + if ( + token.type === 'operator' && + (token.value === ',' || token.value === ';') + ) { + if (address.length) { + addresses.push(address); + } + address = []; + } else { + address.push(token); + } + } + + if (address.length) { + addresses.push(address); + } + + for (const address of addresses) { + const handled = handleAddress(address); + if (handled.length) { + parsedAddresses = parsedAddresses.concat(handled); + } + } + + return parsedAddresses; +} diff --git a/smtp/client.ts b/smtp/client.ts index e75f5fe..e1d1774 100644 --- a/smtp/client.ts +++ b/smtp/client.ts @@ -1,4 +1,4 @@ -import addressparser from 'addressparser'; +import { addressparser } from './address'; import { Message } from './message'; import type { MessageAttachment, MessageHeaders } from './message'; import { SMTPConnection, SMTPState } from './connection'; @@ -120,7 +120,7 @@ export class SMTPClient { const parsedReturnPath = addressparser(returnPath); if (parsedReturnPath.length > 0) { const [{ address: returnPathAddress }] = parsedReturnPath; - stack.returnPath = returnPathAddress; + stack.returnPath = returnPathAddress as string; } } diff --git a/smtp/message.ts b/smtp/message.ts index dc6c73d..395ecfe 100644 --- a/smtp/message.ts +++ b/smtp/message.ts @@ -3,8 +3,7 @@ import type { PathLike, ReadStream } from 'fs'; import { hostname } from 'os'; import { Stream } from 'stream'; -import addressparser from 'addressparser'; - +import { addressparser } from './address'; import { getRFC2822Date } from './date'; import { mimeWordEncode } from './mime'; diff --git a/test/address.ts b/test/address.ts new file mode 100644 index 0000000..f935fd1 --- /dev/null +++ b/test/address.ts @@ -0,0 +1,148 @@ +import test from 'ava'; +import { addressparser } from '../email'; + +test('addressparser should handle single address correctly', async (t) => { + t.deepEqual(addressparser('andris@tr.ee'), [ + { address: 'andris@tr.ee', name: '' }, + ]); +}); + +test('addressparser should handle multiple addresses correctly', async (t) => { + t.deepEqual(addressparser('andris@tr.ee, andris@example.com'), [ + { address: 'andris@tr.ee', name: '' }, + { address: 'andris@example.com', name: '' }, + ]); +}); + +test('addressparser should handle unquoted name correctly', async (t) => { + t.deepEqual(addressparser('andris '), [ + { name: 'andris', address: 'andris@tr.ee' }, + ]); +}); + +test('addressparser should handle quoted name correctly', async (t) => { + t.deepEqual(addressparser('"reinman, andris" '), [ + { name: 'reinman, andris', address: 'andris@tr.ee' }, + ]); +}); + +test('addressparser should handle quoted semicolons correctly', async (t) => { + t.deepEqual(addressparser('"reinman; andris" '), [ + { name: 'reinman; andris', address: 'andris@tr.ee' }, + ]); +}); + +test('addressparser should handle unquoted name, unquoted address correctly', async (t) => { + t.deepEqual(addressparser('andris andris@tr.ee'), [ + { name: 'andris', address: 'andris@tr.ee' }, + ]); +}); + +test('addressparser should handle empty group correctly', async (t) => { + t.deepEqual(addressparser('Undisclosed:;'), [ + { name: 'Undisclosed', group: [] }, + ]); +}); + +test('addressparser should handle address group correctly', async (t) => { + t.deepEqual(addressparser('Disclosed:andris@tr.ee, andris@example.com;'), [ + { + name: 'Disclosed', + group: [ + { address: 'andris@tr.ee', name: '' }, + { address: 'andris@example.com', name: '' }, + ], + }, + ]); +}); + +test('addressparser should handle semicolon as a delimiter', async (t) => { + t.deepEqual(addressparser('andris@tr.ee; andris@example.com;'), [ + { address: 'andris@tr.ee', name: '' }, + { address: 'andris@example.com', name: '' }, + ]); +}); + +test('addressparser should handle mixed group correctly', async (t) => { + t.deepEqual( + addressparser( + 'Test User , Disclosed:andris@tr.ee, andris@example.com;,,,, Undisclosed:;' + ), + [ + { address: 'test.user@mail.ee', name: 'Test User' }, + { + name: 'Disclosed', + group: [ + { address: 'andris@tr.ee', name: '' }, + { address: 'andris@example.com', name: '' }, + ], + }, + { name: 'Undisclosed', group: [] }, + ] + ); +}); + +test('addressparser semicolon as delimiter should not break group parsing ', async (t) => { + t.deepEqual( + addressparser( + 'Test User ; Disclosed:andris@tr.ee, andris@example.com;,,,, Undisclosed:; bob@example.com;' + ), + [ + { address: 'test.user@mail.ee', name: 'Test User' }, + { + name: 'Disclosed', + group: [ + { + address: 'andris@tr.ee', + name: '', + }, + { + address: 'andris@example.com', + name: '', + }, + ], + }, + { name: 'Undisclosed', group: [] }, + { address: 'bob@example.com', name: '' }, + ] + ); +}); + +test('addressparser should handle name from comment correctly', async (t) => { + t.deepEqual(addressparser('andris@tr.ee (andris)'), [ + { name: 'andris', address: 'andris@tr.ee' }, + ]); +}); + +test('addressparser should handle skip comment correctly', async (t) => { + t.deepEqual(addressparser('andris@tr.ee (reinman) andris'), [ + { name: 'andris', address: 'andris@tr.ee' }, + ]); +}); + +test('addressparser should handle missing address correctly', async (t) => { + t.deepEqual(addressparser('andris'), [{ name: 'andris', address: '' }]); +}); + +test('addressparser should handle apostrophe in name correctly', async (t) => { + t.deepEqual(addressparser("O'Neill"), [{ name: "O'Neill", address: '' }]); +}); + +test('addressparser should handle particularly bad input, unescaped colon correctly', async (t) => { + t.deepEqual( + addressparser( + 'FirstName Surname-WithADash :: Company ' + ), + [ + { + name: 'FirstName Surname-WithADash', + group: [ + { + name: undefined, + group: [{ address: 'firstname@company.com', name: 'Company' }], + }, + ], + }, + ] + ); +});