diff --git a/CliClient/tests/md_to_html/sanitize_2.html b/CliClient/tests/md_to_html/sanitize_2.html new file mode 100644 index 000000000..7b0e5c380 --- /dev/null +++ b/CliClient/tests/md_to_html/sanitize_2.html @@ -0,0 +1 @@ +

Testing inline text

diff --git a/CliClient/tests/md_to_html/sanitize_2.md b/CliClient/tests/md_to_html/sanitize_2.md new file mode 100644 index 000000000..6bcba0641 --- /dev/null +++ b/CliClient/tests/md_to_html/sanitize_2.md @@ -0,0 +1 @@ +Testing **inline** text \ No newline at end of file diff --git a/ReactNativeClient/lib/joplin-renderer/MdToHtml/rules/sanitize_html.ts b/ReactNativeClient/lib/joplin-renderer/MdToHtml/rules/sanitize_html.ts index 2ecb46a81..cc92bbe11 100644 --- a/ReactNativeClient/lib/joplin-renderer/MdToHtml/rules/sanitize_html.ts +++ b/ReactNativeClient/lib/joplin-renderer/MdToHtml/rules/sanitize_html.ts @@ -1,6 +1,20 @@ const md5 = require('md5'); const htmlUtils = require('../../htmlUtils'); +function getOpenTagName(html:string):string { + const m = html.toLowerCase().match(/<([a-z]+)(\s|>)/); + if (!m || m.length < 2) return null; + return m[1]; +} + +function isSelfClosedTag(html:string):boolean { + return html.substr(-2) === '/>'; +} + +function stripOffClosingTag(html:string, tagName:string):string { + return html.substr(0, html.length - tagName.length - 3); +} + // @ts-ignore: Keep the function signature as-is despite unusued arguments function installRule(markdownIt:any, mdOptions:any, ruleOptions:any, context:any) { markdownIt.core.ruler.push('sanitize_html', (state:any) => { @@ -18,8 +32,51 @@ function installRule(markdownIt:any, mdOptions:any, ruleOptions:any, context:any const cacheKey = md5(escape(token.content)); let sanitizedContent = context.cache.get(cacheKey); + // For html_inline, the content is only a fragment of HTML, as it will be rendered, but + // it's not necessarily valid HTML. For example this HTML: + // + // Testing + // + // will be rendered as three tokens: + // + // html_inline: + // text: Testing + // html_inline: + // + // The problem for us is that when we pass this HTML fragment to the sanitize function + // it is going to turn it into valid HTML. Thus: + // + // "" becomes "" + // "" becomes "" + // + // So the result would be "Testing" + // + // Because of this, we need to be careful with html_inline: + // + // 0. Check if it's an opening or closing tag - only opening ones need to be processed + // 1. Sanitize the fragment + // 2. Strip off the closing tag that was added + // + // Also self-closing tags need to be handled. + // + // html_block is not a problem as the whole content is valid HTML. + if (!sanitizedContent) { - sanitizedContent = htmlUtils.sanitizeHtml(token.content); + if (token.type === 'html_inline') { + const openTagName = getOpenTagName(token.content); + const isSelfClosed = isSelfClosedTag(token.content); + + if (!openTagName) { + sanitizedContent = token.content; + } else { + sanitizedContent = htmlUtils.sanitizeHtml(token.content); + if (!isSelfClosed) { + sanitizedContent = stripOffClosingTag(sanitizedContent, openTagName); + } + } + } else { // html_block + sanitizedContent = htmlUtils.sanitizeHtml(token.content); + } } token.content = sanitizedContent; diff --git a/gulpfile.js b/gulpfile.js index 8aa0d50f4..ade134051 100644 --- a/gulpfile.js +++ b/gulpfile.js @@ -32,6 +32,7 @@ const updateIgnoredTypeScriptBuildTask = async function() { '**/.git/**', '**/ElectronClient/lib/**', '**/CliClient/build/lib/**', + '**/CliClient/tests-build/lib/**', '**/ElectronClient/dist/**', ], }).map(f => f.substr(__dirname.length + 1));