diff --git a/.eslintignore b/.eslintignore index 618271e60..22f33b046 100644 --- a/.eslintignore +++ b/.eslintignore @@ -1927,6 +1927,9 @@ packages/renderer/headerAnchor.js.map packages/renderer/htmlUtils.d.ts packages/renderer/htmlUtils.js packages/renderer/htmlUtils.js.map +packages/renderer/htmlUtils.test.d.ts +packages/renderer/htmlUtils.test.js +packages/renderer/htmlUtils.test.js.map packages/renderer/index.d.ts packages/renderer/index.js packages/renderer/index.js.map diff --git a/.gitignore b/.gitignore index 5fb8565d4..3f8358219 100644 --- a/.gitignore +++ b/.gitignore @@ -1917,6 +1917,9 @@ packages/renderer/headerAnchor.js.map packages/renderer/htmlUtils.d.ts packages/renderer/htmlUtils.js packages/renderer/htmlUtils.js.map +packages/renderer/htmlUtils.test.d.ts +packages/renderer/htmlUtils.test.js +packages/renderer/htmlUtils.test.js.map packages/renderer/index.d.ts packages/renderer/index.js packages/renderer/index.js.map diff --git a/packages/app-desktop/plugins/GotoAnything.tsx b/packages/app-desktop/plugins/GotoAnything.tsx index 54908bd87..19c9455a0 100644 --- a/packages/app-desktop/plugins/GotoAnything.tsx +++ b/packages/app-desktop/plugins/GotoAnything.tsx @@ -19,6 +19,7 @@ const { mergeOverlappingIntervals } = require('@joplin/lib/ArrayUtils.js'); import markupLanguageUtils from '../utils/markupLanguageUtils'; import focusEditorIfEditorCommand from '@joplin/lib/services/commands/focusEditorIfEditorCommand'; import Logger from '@joplin/lib/Logger'; +import { MarkupToHtml } from '@joplin/renderer'; const logger = Logger.create('GotoAnything'); @@ -81,7 +82,7 @@ class Dialog extends React.PureComponent { private inputRef: any; private itemListRef: any; private listUpdateIID_: any; - private markupToHtml_: any; + private markupToHtml_: MarkupToHtml; private userCallback_: any = null; constructor(props: Props) { diff --git a/packages/lib/htmlUtils.ts b/packages/lib/htmlUtils.ts index 02e48cb69..fff72e3ec 100644 --- a/packages/lib/htmlUtils.ts +++ b/packages/lib/htmlUtils.ts @@ -1,7 +1,6 @@ const urlUtils = require('./urlUtils.js'); const Entities = require('html-entities').AllHtmlEntities; const htmlentities = new Entities().encode; -const htmlparser2 = require('@joplin/fork-htmlparser2'); const { escapeHtml } = require('./string-utils.js'); // [\s\S] instead of . for multiline matching @@ -138,40 +137,6 @@ class HtmlUtils { return output.join(' '); } - public stripHtml(html: string) { - const output: string[] = []; - - const tagStack: any[] = []; - - const currentTag = () => { - if (!tagStack.length) return ''; - return tagStack[tagStack.length - 1]; - }; - - const disallowedTags = ['script', 'style', 'head', 'iframe', 'frameset', 'frame', 'object', 'base']; - - const parser = new htmlparser2.Parser({ - - onopentag: (name: string) => { - tagStack.push(name.toLowerCase()); - }, - - ontext: (decodedText: string) => { - if (disallowedTags.includes(currentTag())) return; - output.push(decodedText); - }, - - onclosetag: (name: string) => { - if (currentTag() === name.toLowerCase()) tagStack.pop(); - }, - - }, { decodeEntities: true }); - - parser.write(html); - parser.end(); - - return output.join('').replace(/\s+/g, ' '); - } } export default new HtmlUtils(); diff --git a/packages/renderer/htmlUtils.test.ts b/packages/renderer/htmlUtils.test.ts new file mode 100644 index 000000000..35c075a19 --- /dev/null +++ b/packages/renderer/htmlUtils.test.ts @@ -0,0 +1,32 @@ +import htmlUtils from './htmlUtils'; + +describe('htmlUtils', () => { + + test('should strip off HTML', () => { + const testCases = [ + [ + '', + '', + ], + [ + 'test', + 'test', + ], + [ + 'Joplin®', + 'JoplinĀ®', + ], + [ + '<b>test</b>', + '<b>test</b>', + ], + ]; + + for (const t of testCases) { + const [input, expected] = t; + const actual = htmlUtils.stripHtml(input); + expect(actual).toBe(expected); + } + }); + +}); diff --git a/packages/renderer/htmlUtils.ts b/packages/renderer/htmlUtils.ts index 0e24f21be..546239cdb 100644 --- a/packages/renderer/htmlUtils.ts +++ b/packages/renderer/htmlUtils.ts @@ -97,8 +97,7 @@ class HtmlUtils { return selfClosingElements.includes(tagName.toLowerCase()); } - // TODO: copied from @joplin/lib - stripHtml(html: string) { + public stripHtml(html: string) { const output: string[] = []; const tagStack: string[] = []; @@ -130,7 +129,14 @@ class HtmlUtils { parser.write(html); parser.end(); - return output.join('').replace(/\s+/g, ' '); + // In general, we want to get back plain text from this function, so all + // HTML entities are decoded. Howver, to prevent XSS attacks, we + // re-encode all the "<" characters, which should break any attempt to + // inject HTML tags. + + return output.join('') + .replace(/\s+/g, ' ') + .replace(/