smtp/address: refactor

2020-06-16 23:32:18 -07:00 · 2020-06-16 23:32:18 -07:00 · a1b317e74b
parent 9b2a2e15e6
commit a1b317e74b
1 changed files with 110 additions and 199 deletions
--- a/smtp/address.ts
+++ b/smtp/address.ts
@ -1,23 +1,4 @@
-/*
- * Operator tokens and which tokens are expected to end the sequence
- */
-const OPERATORS = {
-	'"': '"',
-	'(': ')',
-	'<': '>',
-	',': '',
-	// Groups are ended by semicolons
-	':': ';',
-	// Semicolons are not a legal delimiter per the RFC2822 grammar other
-	// than for terminating a group, but they are also not valid for any
-	// other use in this context.  Given that some mail clients have
-	// historically allowed the semicolon as a delimiter equivalent to the
-	// comma in their UI, it makes sense to treat them the same as a comma
-	// when used outside of a group.
-	';': '',
-};
-
-interface TokenizerNode {
+interface AddressToken {
 	type: 'operator' | 'text';
 	value: string;
 }
@ -28,134 +9,79 @@ export interface AddressObject {
 	group?: AddressObject[];
 }

-/**
- * Creates a Tokenizer object for tokenizing address field strings
- *
- * @constructor
- * @param {String} str Address field string
+/*
+ * Operator tokens and which tokens are expected to end the sequence
 */
-class Tokenizer {
-	private operatorExpecting = '';
-	private node?: TokenizerNode;
-	private escaped = false;
-	private list: TokenizerNode[] = [];
-	private str: string;
+const OPERATORS = new Map([
+	['"', '"'],
+	['(', ')'],
+	['<', '>'],
+	[',', ''],
+	// Groups are ended by semicolons
+	[':', ';'],
+	// Semicolons are not a legal delimiter per the RFC2822 grammar other
+	// than for terminating a group, but they are also not valid for any
+	// other use in this context.  Given that some mail clients have
+	// historically allowed the semicolon as a delimiter equivalent to the
+	// comma in their UI, it makes sense to treat them the same as a comma
+	// when used outside of a group.
+	[';', ''],
+]);

-	constructor(str: string | string[] = '') {
-		this.str = str.toString();
-	}
+/**
+ * Tokenizes the original input string
+ *
+ * @param {string | string[] | undefined} address string(s) to tokenize
+ * @return {AddressToken[]} An array of operator|text tokens
+ */
+function tokenizeAddress(address: string | string[] = '') {
+	const tokens: AddressToken[] = [];
+	let token: AddressToken | undefined = undefined;
+	let operator: string | undefined = undefined;

-	/**
-	 * Tokenizes the original input string
-	 *
-	 * @return {Array} An array of operator|text tokens
-	 */
-	public tokenize() {
-		let chr;
-		const list: TokenizerNode[] = [];
-
-		for (let i = 0, len = this.str.length; i < len; i++) {
-			chr = this.str.charAt(i);
-			this.checkChar(chr);
-		}
-
-		for (const node of this.list) {
-			node.value = (node.value || '').toString().trim();
-			if (node.value) {
-				list.push(node);
+	for (const character of address.toString()) {
+		if ((operator?.length ?? 0) > 0 && character === operator) {
+			tokens.push({ type: 'operator', value: character });
+			token = undefined;
+			operator = undefined;
+		} else if ((operator?.length ?? 0) === 0 && OPERATORS.has(character)) {
+			tokens.push({ type: 'operator', value: character });
+			token = undefined;
+			operator = OPERATORS.get(character);
+		} else {
+			if (token == null) {
+				token = { type: 'text', value: character };
+				tokens.push(token);
+			} else {
+				token.value += character;
 			}
 		}
-
-		return list;
 	}

-	/**
-	 * Checks if a character is an operator or text and acts accordingly
-	 *
-	 * @param {string} chr Character from the address field
-	 * @returns {void}
-	 */
-	public checkChar(chr: string) {
-		if ((chr in OPERATORS || chr === '\\') && this.escaped) {
-			this.escaped = false;
-		} else if (this.operatorExpecting && chr === this.operatorExpecting) {
-			this.node = {
-				type: 'operator',
-				value: chr,
-			};
-			this.list.push(this.node);
-			this.node = undefined;
-			this.operatorExpecting = '';
-			this.escaped = false;
-			return;
-		} else if (!this.operatorExpecting && chr in OPERATORS) {
-			this.node = {
-				type: 'operator',
-				value: chr,
-			};
-			this.list.push(this.node);
-			this.node = undefined;
-			this.operatorExpecting = OPERATORS[chr as keyof typeof OPERATORS];
-			this.escaped = false;
-			return;
-		}
-
-		if (!this.escaped && chr === '\\') {
-			this.escaped = true;
-			return;
-		}
-
-		if (!this.node) {
-			this.node = {
-				type: 'text',
-				value: '',
-			};
-			this.list.push(this.node);
-		}
-
-		if (this.escaped && chr !== '\\') {
-			this.node.value += '\\';
-		}
-
-		this.node.value += chr;
-		this.escaped = false;
-	}
+	return tokens
+		.map((x) => {
+			x.value = x.value.trim();
+			return x;
+		})
+		.filter((x) => x.value.length > 0);
 }
+
 /**
 * Converts tokens for a single address into an address object
 *
- * @param {TokenizerNode[]} tokens Tokens object
+ * @param {AddressToken[]} tokens Tokens object
 * @return {AddressObject[]} addresses object array
 */
-function handleAddress(tokens: TokenizerNode[]) {
-	let isGroup = false;
-	let state = 'text';
-
-	let address: AddressObject;
-
+function convertAddressTokens(tokens: AddressToken[]) {
+	const addressObjects: AddressObject[] = [];
+	const groups: string[] = [];
 	let addresses: string[] = [];
 	let comments: string[] = [];
 	let texts: string[] = [];

-	const groups: string[] = [];
-	const addressObjects: AddressObject[] = [];
-
-	const data: {
-		address: string;
-		comment: string;
-		group: string;
-		text: string;
-	} = {
-		address: '',
-		comment: '',
-		group: '',
-		text: '',
-	};
-
-	// Filter out <addresses>, (comments) and regular text
-	for (let i = 0, len = tokens.length; i < len; i++) {
-		const token = tokens[i];
-
+	let state = 'text';
+	let isGroup = false;
+	function handleToken(token: AddressToken) {
 		if (token.type === 'operator') {
 			switch (token.value) {
 				case '<':
@ -172,37 +98,39 @@ function handleAddress(tokens: TokenizerNode[]) {
 					state = 'text';
 					break;
 			}
-		} else {
-			if (token.value) {
-				switch (state) {
-					case 'address':
-						addresses.push(token.value);
-						break;
-					case 'comment':
-						comments.push(token.value);
-						break;
-					case 'group':
-						groups.push(token.value);
-						break;
-					default:
-						texts.push(token.value);
-						break;
-				}
+		} else if (token.value.length > 0) {
+			switch (state) {
+				case 'address':
+					addresses.push(token.value);
+					break;
+				case 'comment':
+					comments.push(token.value);
+					break;
+				case 'group':
+					groups.push(token.value);
+					break;
+				default:
+					texts.push(token.value);
+					break;
 			}
 		}
 	}

+	// Filter out <addresses>, (comments) and regular text
+	for (const token of tokens) {
+		handleToken(token);
+	}
+
 	// If there is no text but a comment, replace the two
 	if (texts.length === 0 && comments.length > 0) {
 		texts = [...comments];
 		comments = [];
 	}

+	// http://tools.ietf.org/html/rfc2822#appendix-A.1.3
 	if (isGroup) {
-		// http://tools.ietf.org/html/rfc2822#appendix-A.1.3
-		data.text = texts.join(' ');
 		addressObjects.push({
-			name: data.text || undefined,
+			name: texts.length === 0 ? undefined : texts.join(' '),
 			group: groups.length > 0 ? addressparser(groups.join(',')) : [],
 		});
 	} else {
@ -215,22 +143,21 @@ function handleAddress(tokens: TokenizerNode[]) {
 				}
 			}

-			const _regexHandler = function (address: string) {
-				if (addresses.length === 0) {
-					addresses = [address.trim()];
-					return ' ';
-				} else {
-					return address;
-				}
-			};
-
 			// still no address
 			if (addresses.length === 0) {
 				for (let i = texts.length - 1; i >= 0; i--) {
 					texts[i] = texts[i]
-						.replace(/\s*\b[^@\s]+@[^@\s]+\b\s*/, _regexHandler)
+						.replace(/\s*\b[^@\s]+@[^@\s]+\b\s*/, (address: string) => {
+							if (addresses.length === 0) {
+								addresses = [address.trim()];
+								return ' ';
+							} else {
+								return address;
+							}
+						})
 						.trim();
-					if (addresses.length) {
+
+					if (addresses.length > 0) {
 						break;
 					}
 				}
@ -245,30 +172,25 @@ function handleAddress(tokens: TokenizerNode[]) {

 		// Keep only the first address occurence, push others to regular text
 		if (addresses.length > 1) {
-			texts = texts.concat(addresses.splice(1));
+			texts = [...texts, ...addresses.splice(1)];
 		}

-		// Join values with spaces
-		data.text = texts.join(' ');
-		data.address = addresses.join(' ');
-
-		if (!data.address && isGroup) {
+		if (addresses.length === 0 && isGroup) {
 			return [];
 		} else {
-			address = {
-				address: data.address || data.text || '',
-				name: data.text || data.address || '',
-			};
+			// Join values with spaces
+			let address = addresses.join(' ');
+			let name = texts.length === 0 ? address : texts.join(' ');

-			if (address.address === address.name) {
-				if ((address.address || '').match(/@/)) {
-					address.name = '';
+			if (address === name) {
+				if (address.match(/@/)) {
+					name = '';
 				} else {
-					address.address = '';
+					address = '';
 				}
 			}

-			addressObjects.push(address);
+			addressObjects.push({ address, name });
 		}
 	}

@ -286,41 +208,30 @@ function handleAddress(tokens: TokenizerNode[]) {
 *
 *     [{name: "Name", address: "address@domain"}]
 *
- * @param {string} str Address field
+ * @param {string | string[] | undefined} address Address field
 * @return {AddressObject[]} An array of address objects
 */
-export function addressparser(str?: string | string[]) {
-	const tokenizer = new Tokenizer(str);
-	const tokens = tokenizer.tokenize();
+export function addressparser(address?: string | string[]) {
+	const addresses: AddressObject[] = [];
+	let tokens: AddressToken[] = [];

-	const addresses: TokenizerNode[][] = [];
-	let address: TokenizerNode[] = [];
-	let parsedAddresses: AddressObject[] = [];
-
-	for (const token of tokens) {
+	for (const token of tokenizeAddress(address)) {
 		if (
 			token.type === 'operator' &&
 			(token.value === ',' || token.value === ';')
 		) {
-			if (address.length) {
-				addresses.push(address);
+			if (tokens.length > 0) {
+				addresses.push(...convertAddressTokens(tokens));
 			}
-			address = [];
+			tokens = [];
 		} else {
-			address.push(token);
+			tokens.push(token);
 		}
 	}

-	if (address.length) {
-		addresses.push(address);
+	if (tokens.length > 0) {
+		addresses.push(...convertAddressTokens(tokens));
 	}

-	for (const address of addresses) {
-		const handled = handleAddress(address);
-		if (handled.length) {
-			parsedAddresses = parsedAddresses.concat(handled);
-		}
-	}
-
-	return parsedAddresses;
+	return addresses;
 }