Chore: Refactor string-utils to TypeScript (#9869)

2025-02-19 20:00:20 +02:00 · 2024-02-07 06:16:54 -08:00 · 2024-02-07 06:16:54 -08:00 · 01ec640bdb
commit 01ec640bdb
parent c1e5adf658
6 changed files with 103 additions and 106 deletions
--- a/.eslintignore
+++ b/.eslintignore
@ -984,6 +984,8 @@ packages/lib/services/synchronizer/utils/syncDeleteStep.js
 packages/lib/services/synchronizer/utils/types.js
 packages/lib/shim-init-node.js
 packages/lib/shim.js
+packages/lib/string-utils.test.js
+packages/lib/string-utils.js
 packages/lib/testing/syncTargetUtils.js
 packages/lib/testing/test-utils-synchronizer.js
 packages/lib/testing/test-utils.js
--- a/.gitignore
+++ b/.gitignore
@ -964,6 +964,8 @@ packages/lib/services/synchronizer/utils/syncDeleteStep.js
 packages/lib/services/synchronizer/utils/types.js
 packages/lib/shim-init-node.js
 packages/lib/shim.js
+packages/lib/string-utils.test.js
+packages/lib/string-utils.js
 packages/lib/testing/syncTargetUtils.js
 packages/lib/testing/test-utils-synchronizer.js
 packages/lib/testing/test-utils.js
--- a/packages/lib/StringUtils.test.js
+++ b/packages/lib/StringUtils.test.js
@ -1,86 +0,0 @@
-/* eslint-disable no-unused-vars */
-
-const { splitCommandBatch } = require('./string-utils');
-const StringUtils = require('./string-utils');
-
-describe('StringUtils', () => {
-
-
-
-	it('should surround keywords with strings', (async () => {
-		const testCases = [
-			[[], 'test', 'a', 'b', null, 'test'],
-			[['test'], 'test', 'a', 'b', null, 'atestb'],
-			[['test'], 'Test', 'a', 'b', null, 'aTestb'],
-			[['te[]st'], 'Te[]st', 'a', 'b', null, 'aTe[]stb'],
-			// [['test1', 'test2'], 'bla test1 blabla test1 bla test2 not this one - test22', 'a', 'b', 'bla atest1b blabla atest1b bla atest2b not this one - test22'],
-			[['test1', 'test2'], 'bla test1 test1 bla test2', '<span class="highlighted-keyword">', '</span>', null, 'bla <span class="highlighted-keyword">test1</span> <span class="highlighted-keyword">test1</span> bla <span class="highlighted-keyword">test2</span>'],
-			// [[{ type:'regex', value:'test.*?'}], 'bla test1 test1 bla test2 test tttest', 'a', 'b', 'bla atest1b atest1b bla atest2b atestb tttest'],
-			[['test'], 'testTest', 'a', 'b', { escapeHtml: true }, 'atestbaTestb'],
-			[['test'], 'test test Test', 'a', 'b', { escapeHtml: true }, 'atestb atestb aTestb'],
-			[['d'], 'dfasdf', '[', ']', { escapeHtml: true }, '[d]fas[d]f'],
-			[[{ scriptType: 'en', type: 'regex', value: 'd*', valueRegex: 'd[^ \t\n\r,\\.,\\+\\-\\*\\?\\!\\=\\{\\}\\<\\>\\|\\:"\'\\(\\)\\[\\]]*?' }], 'dfasdf', '[', ']', { escapeHtml: true }, '[d]fas[d]f'],
-			[['zzz'], 'zzz<img src=q onerror=eval("require(\'child_process\').exec(\'mate-calc\');");>', 'a', 'b', { escapeHtml: true }, 'azzzb&lt;img src=q onerror=eval(&quot;require(&apos;child_process&apos;).exec(&apos;mate-calc&apos;);&quot;);&gt;'],
-		];
-
-		for (let i = 0; i < testCases.length; i++) {
-			const t = testCases[i];
-
-			const keywords = t[0];
-			const input = t[1];
-			const prefix = t[2];
-			const suffix = t[3];
-			const options = t[4];
-			const expected = t[5];
-
-			const actual = StringUtils.surroundKeywords(keywords, input, prefix, suffix, options);
-
-			expect(actual).toBe(expected, `Test case ${i}`);
-		}
-	}));
-
-	it('should find the next whitespace character', (async () => {
-		const testCases = [
-			['', [[0, 0]]],
-			['Joplin', [[0, 6], [3, 6], [6, 6]]],
-			['Joplin is a free, open source\n note taking and *to-do* application', [[0, 6], [12, 17], [23, 29], [48, 54]]],
-		];
-
-		// eslint-disable-next-line github/array-foreach -- Old code before rule was applied
-		testCases.forEach((t, i) => {
-			const str = t[0];
-			// eslint-disable-next-line github/array-foreach -- Old code before rule was applied
-			t[1].forEach((pair, j) => {
-				const begin = pair[0];
-				const expected = pair[1];
-
-				const actual = StringUtils.nextWhitespaceIndex(str, begin);
-				expect(actual).toBe(expected, `Test string ${i} - case ${j}`);
-			});
-		});
-	}));
-
-	it('should split the command batch by newlines not inside quotes', (async () => {
-		const eol = '\n';
-		const testCases = [
-			['',
-				['']],
-			['command1',
-				['command1']],
-			['command1 arg1 arg2 arg3',
-				['command1 arg1 arg2 arg3']],
-			[`command1 arg1 'arg2${eol}continue' arg3`,
-				[`command1 arg1 'arg2${eol}continue' arg3`]],
-			[`command1 arg1 'arg2${eol}continue'${eol}command2${eol}command3 'arg1${eol}continue${eol}continue' arg2 arg3`,
-				[`command1 arg1 'arg2${eol}continue'`, 'command2', `command3 'arg1${eol}continue${eol}continue' arg2 arg3`]],
-			[`command1 arg\\1 'arg2${eol}continue\\'continue' arg3`,
-				[`command1 arg\\1 'arg2${eol}continue\\'continue' arg3`]],
-		];
-
-		// eslint-disable-next-line github/array-foreach -- Old code before rule was applied
-		testCases.forEach((t) => {
-			expect(splitCommandBatch(t[0])).toEqual(t[1]);
-		});
-	}));
-
-});
--- a/packages/lib/string-utils-common.js
+++ b/packages/lib/string-utils-common.js
@ -1,3 +1,7 @@
+// Leave this file as JavaScript -- our current TypeScript configuration
+// generates code that tries to access modules/exports, which is incompatible
+// with browser environments.
+
 function pregQuote(str, delimiter = '') {
 	return (`${str}`).replace(new RegExp(`[.\\\\+*?\\[\\^\\]$(){}=!<>|:\\${delimiter || ''}-]`, 'g'), '\\$&');
 }
--- a/packages/lib/string-utils.test.ts
+++ b/packages/lib/string-utils.test.ts
@ -0,0 +1,65 @@
+import { splitCommandBatch } from './string-utils';
+import * as StringUtils from './string-utils';
+
+describe('string-utils', () => {
+
+	test.each([
+		[[], 'test', 'a', 'b', null, 'test'],
+		[['test'], 'test', 'a', 'b', null, 'atestb'],
+		[['test'], 'Test', 'a', 'b', null, 'aTestb'],
+		[['te[]st'], 'Te[]st', 'a', 'b', null, 'aTe[]stb'],
+		// [['test1', 'test2'], 'bla test1 blabla test1 bla test2 not this one - test22', 'a', 'b', 'bla atest1b blabla atest1b bla atest2b not this one - test22'],
+		[['test1', 'test2'], 'bla test1 test1 bla test2', '<span class="highlighted-keyword">', '</span>', null, 'bla <span class="highlighted-keyword">test1</span> <span class="highlighted-keyword">test1</span> bla <span class="highlighted-keyword">test2</span>'],
+		// [[{ type:'regex', value:'test.*?'}], 'bla test1 test1 bla test2 test tttest', 'a', 'b', 'bla atest1b atest1b bla atest2b atestb tttest'],
+		[['test'], 'testTest', 'a', 'b', { escapeHtml: true }, 'atestbaTestb'],
+		[['test'], 'test test Test', 'a', 'b', { escapeHtml: true }, 'atestb atestb aTestb'],
+		[['d'], 'dfasdf', '[', ']', { escapeHtml: true }, '[d]fas[d]f'],
+		[
+			[{
+				scriptType: 'en',
+				type: 'regex',
+				value: 'd*',
+				valueRegex: 'd[^ \t\n\r,\\.,\\+\\-\\*\\?\\!\\=\\{\\}\\<\\>\\|\\:"\'\\(\\)\\[\\]]*?',
+			} as StringUtils.KeywordObjectType],
+			'dfasdf', '[', ']', { escapeHtml: true }, '[d]fas[d]f',
+		],
+		[['zzz'], 'zzz<img src=q onerror=eval("require(\'child_process\').exec(\'mate-calc\');");>', 'a', 'b', { escapeHtml: true }, 'azzzb&lt;img src=q onerror=eval(&quot;require(&apos;child_process&apos;).exec(&apos;mate-calc&apos;);&quot;);&gt;'],
+	])('should surround keywords with strings (case %#)', (async (keywords, input, prefix, suffix, options, expected) => {
+		const actual = StringUtils.surroundKeywords(keywords, input, prefix, suffix, options);
+
+		expect(actual).toBe(expected);
+	}));
+
+	test.each([
+		['', [[0, 0]]],
+		['Joplin', [[0, 6], [3, 6], [6, 6]]],
+		['Joplin is a free, open source\n note taking and *to-do* application', [[0, 6], [12, 17], [23, 29], [48, 54]]],
+	])('should find the next whitespace character in string %s', (async (str, testCases) => {
+		for (const range of testCases) {
+			const begin = range[0];
+			const expected = range[1];
+
+			const actual = StringUtils.nextWhitespaceIndex(str, begin);
+			expect(actual).toBe(expected);
+		}
+	}));
+
+	const eol = '\n';
+	test.each([
+		['',
+			['']],
+		['command1',
+			['command1']],
+		['command1 arg1 arg2 arg3',
+			['command1 arg1 arg2 arg3']],
+		[`command1 arg1 'arg2${eol}continue' arg3`,
+			[`command1 arg1 'arg2${eol}continue' arg3`]],
+		[`command1 arg1 'arg2${eol}continue'${eol}command2${eol}command3 'arg1${eol}continue${eol}continue' arg2 arg3`,
+			[`command1 arg1 'arg2${eol}continue'`, 'command2', `command3 'arg1${eol}continue${eol}continue' arg2 arg3`]],
+		[`command1 arg\\1 'arg2${eol}continue\\'continue' arg3`,
+			[`command1 arg\\1 'arg2${eol}continue\\'continue' arg3`]],
+	])('should split the command batch by newlines not inside quotes (case %#)', (async (batch, expected) => {
+		expect(splitCommandBatch(batch)).toEqual(expected);
+	}));
+
+});
--- a/packages/lib/string-utils.ts
+++ b/packages/lib/string-utils.ts
@ -2,6 +2,9 @@ const Entities = require('html-entities').AllHtmlEntities;
 const htmlentities = new Entities().encode;
 const stringUtilsCommon = require('./string-utils-common.js');

+export const pregQuote = stringUtilsCommon.pregQuote;
+export const replaceRegexDiacritics = stringUtilsCommon.replaceRegexDiacritics;
+
 const defaultDiacriticsRemovalMap = [
 	{ base: 'A', letters: /[\u0041\u24B6\uFF21\u00C0\u00C1\u00C2\u1EA6\u1EA4\u1EAA\u1EA8\u00C3\u0100\u0102\u1EB0\u1EAE\u1EB4\u1EB2\u0226\u01E0\u00C4\u01DE\u1EA2\u00C5\u01FA\u01CD\u0200\u0202\u1EA0\u1EAC\u1EB6\u1E00\u0104\u023A\u2C6F]/g },
 	{ base: 'AA', letters: /[\uA732]/g },
@ -89,7 +92,7 @@ const defaultDiacriticsRemovalMap = [
 	{ base: 'z', letters: /[\u007A\u24E9\uFF5A\u017A\u1E91\u017C\u017E\u1E93\u1E95\u01B6\u0225\u0240\u2C6C\uA763]/g },
 ];

-function removeDiacritics(str) {
+export function removeDiacritics(str: string) {
 	for (let i = 0; i < defaultDiacriticsRemovalMap.length; i++) {
 		str = str.replace(defaultDiacriticsRemovalMap[i].letters, defaultDiacriticsRemovalMap[i].base);
 	}
@ -97,7 +100,7 @@ function removeDiacritics(str) {
 	return str;
 }

-function escapeFilename(s, maxLength = 32) {
+export function escapeFilename(s: string, maxLength = 32) {
 	let output = removeDiacritics(s);
 	output = output.replace('\n\r', ' ');
 	output = output.replace('\r\n', ' ');
@ -116,7 +119,7 @@ function escapeFilename(s, maxLength = 32) {
 	return output.substr(0, maxLength);
 }

-function wrap(text, indent, width) {
+export function wrap(text: string, indent: string, width: number) {
 	const wrap_ = require('word-wrap');

 	return wrap_(text, {
@ -125,7 +128,7 @@ function wrap(text, indent, width) {
 	});
 }

-function commandArgumentsToString(args) {
+export function commandArgumentsToString(args: string[]) {
 	const output = [];
 	for (let i = 0; i < args.length; i++) {
 		let arg = args[i];
@ -138,7 +141,7 @@ function commandArgumentsToString(args) {
 	return output.join(' ');
 }

-function splitCommandBatch(commandBatch) {
+export function splitCommandBatch(commandBatch: string) {
 	const commandLines = [];
 	const eol = '\n';

@ -191,7 +194,7 @@ function splitCommandBatch(commandBatch) {
 	return commandLines;
 }

-function padLeft(string, length, padString) {
+export function padLeft(string: string, length: number, padString: string) {
 	if (!string) return '';

 	while (string.length < length) {
@ -201,16 +204,16 @@ function padLeft(string, length, padString) {
 	return string;
 }

-function toTitleCase(string) {
+export function toTitleCase(string: string) {
 	if (!string) return string;
 	return string.charAt(0).toUpperCase() + string.slice(1);
 }

-function urlDecode(string) {
+export function urlDecode(string: string) {
 	return decodeURIComponent((`${string}`).replace(/\+/g, '%20'));
 }

-function escapeHtml(s) {
+export function escapeHtml(s: string) {
 	return s
 		.replace(/&/g, '&amp;')
 		.replace(/</g, '&lt;')
@ -221,8 +224,15 @@ function escapeHtml(s) {

 // keywords can either be a list of strings, or a list of objects with the format:
 // { value: 'actualkeyword', type: 'regex/string' }
+export type KeywordObjectType = { value: string; type: 'string' }|{ valueRegex: string; type: 'regex' };
+export type KeywordType = string[]|KeywordObjectType[];
+
+interface SurroundKeywordOptions {
+	escapeHtml: boolean;
+}
+
 // The function surrounds the keywords wherever they are, even within other words.
-function surroundKeywords(keywords, text, prefix, suffix, options = null) {
+export function surroundKeywords(keywords: KeywordType, text: string, prefix: string, suffix: string, options: SurroundKeywordOptions|null = null) {
 	options = { escapeHtml: false, ...options };

 	text = options.escapeHtml ? htmlentities(text) : text;
@ -231,12 +241,13 @@ function surroundKeywords(keywords, text, prefix, suffix, options = null) {

 	let regexString = keywords
 		.map(k => {
-			if (k.type === 'regex') {
-				return stringUtilsCommon.replaceRegexDiacritics(k.valueRegex);
+			let regex;
+			if (typeof k === 'string' || k.type === 'string') {
+				regex = stringUtilsCommon.pregQuote(typeof k === 'string' ? k : k.value);
 			} else {
-				const value = typeof k === 'string' ? k : k.value;
-				return stringUtilsCommon.replaceRegexDiacritics(stringUtilsCommon.pregQuote(value));
+				regex = k.valueRegex;
 			}
+			return stringUtilsCommon.replaceRegexDiacritics(regex);
 		})
 		.join('|');
 	regexString = `(${regexString})`;
@ -244,18 +255,18 @@ function surroundKeywords(keywords, text, prefix, suffix, options = null) {
 	return text.replace(re, `${prefix}$1${suffix}`);
 }

-function substrWithEllipsis(s, start, length) {
+export function substrWithEllipsis(s: string, start: number, length: number) {
 	if (s.length <= length) return s;
 	return `${s.substr(start, length - 3)}...`;
 }

-function nextWhitespaceIndex(s, begin) {
+export function nextWhitespaceIndex(s: string, begin: number) {
 	// returns index of the next whitespace character
 	const i = s.slice(begin).search(/\s/);
 	return i < 0 ? s.length : begin + i;
 }

-function camelCaseToDash(s) {
+export function camelCaseToDash(s: string) {
 	const output = [];
 	for (let i = 0; i < s.length; i++) {
 		const c = s[i];
@ -270,7 +281,7 @@ function camelCaseToDash(s) {
 	return output.join('');
 }

-function formatCssSize(v) {
+export function formatCssSize(v: string) {
 	if (typeof v === 'string') {
 		if (v.includes('px') || v.includes('em') || v.includes('%')) return v;
 	}
@ -282,7 +293,7 @@ const REGEX_CHINESE = /[\u4e00-\u9fff]|[\u3400-\u4dbf]|[\u{20000}-\u{2a6df}]|[\u
 const REGEX_KOREAN = /[\uac00-\ud7af]|[\u1100-\u11ff]|[\u3130-\u318f]|[\ua960-\ua97f]|[\ud7b0-\ud7ff]/;
 const REGEX_THAI = /[\u0e00-\u0e7f]/;

-function scriptType(s) {
+export function scriptType(s: string) {
 	// A string entirely with Chinese character will be detected as Japanese too
 	// so Chinese detection must go first.
 	if (REGEX_CHINESE.test(s)) return 'zh';
@ -292,4 +303,3 @@ function scriptType(s) {
 	return 'en';
 }

-module.exports = { formatCssSize, camelCaseToDash, removeDiacritics, substrWithEllipsis, nextWhitespaceIndex, escapeFilename, wrap, splitCommandBatch, padLeft, toTitleCase, urlDecode, escapeHtml, surroundKeywords, scriptType, commandArgumentsToString, ...stringUtilsCommon };