1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-06-21 23:17:42 +02:00

All: Fixes #2667: Fixed sanitize function so that it does not break HTML

This commit is contained in:
Laurent Cozic
2020-03-06 00:54:21 +00:00
parent 120aa066c2
commit 71d1e6eb82
33 changed files with 479 additions and 203 deletions

View File

@ -2,12 +2,32 @@ const Entities = require('html-entities').AllHtmlEntities;
const htmlentities = new Entities().encode;
// [\s\S] instead of . for multiline matching
const NodeHtmlParser = require('node-html-parser');
// https://stackoverflow.com/a/16119722/561309
const imageRegex = /<img([\s\S]*?)src=["']([\s\S]*?)["']([\s\S]*?)>/gi;
const JS_EVENT_NAMES = ['onabort', 'onafterprint', 'onbeforeprint', 'onbeforeunload', 'onblur', 'oncanplay', 'oncanplaythrough', 'onchange', 'onclick', 'oncontextmenu', 'oncopy', 'oncuechange', 'oncut', 'ondblclick', 'ondrag', 'ondragend', 'ondragenter', 'ondragleave', 'ondragover', 'ondragstart', 'ondrop', 'ondurationchange', 'onemptied', 'onended', 'onerror', 'onfocus', 'onhashchange', 'oninput', 'oninvalid', 'onkeydown', 'onkeypress', 'onkeyup', 'onload', 'onloadeddata', 'onloadedmetadata', 'onloadstart', 'onmessage', 'onmousedown', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup', 'onmousewheel', 'onoffline', 'ononline', 'onpagehide', 'onpageshow', 'onpaste', 'onpause', 'onplay', 'onplaying', 'onpopstate', 'onprogress', 'onratechange', 'onreset', 'onresize', 'onscroll', 'onsearch', 'onseeked', 'onseeking', 'onselect', 'onstalled', 'onstorage', 'onsubmit', 'onsuspend', 'ontimeupdate', 'ontoggle', 'onunload', 'onvolumechange', 'onwaiting', 'onwheel'];
const selfClosingElements = [
'area',
'base',
'basefont',
'br',
'col',
'command',
'embed',
'frame',
'hr',
'img',
'input',
'isindex',
'keygen',
'link',
'meta',
'param',
'source',
'track',
'wbr',
];
class HtmlUtils {
attributesHtml(attr) {
@ -46,31 +66,64 @@ class HtmlUtils {
});
}
sanitizeHtml(html) {
const walkHtmlNodes = (nodes) => {
if (!nodes || !nodes.length) return;
isSelfClosingTag(tagName) {
return selfClosingElements.includes(tagName.toLowerCase());
}
for (const node of nodes) {
for (const attr in node.attributes) {
if (!node.attributes.hasOwnProperty(attr)) continue;
if (JS_EVENT_NAMES.includes(attr)) node.setAttribute(attr, '');
}
walkHtmlNodes(node.childNodes);
}
sanitizeHtml(html) {
const htmlparser2 = require('htmlparser2');
const output = [];
const tagStack = [];
const currentTag = () => {
if (!tagStack.length) return '';
return tagStack[tagStack.length - 1];
};
// Need to wrap in div, otherwise elements at the root will be skipped
// The DIV tags are removed below
const dom = NodeHtmlParser.parse(`<div>${html}</div>`, {
script: false,
style: true,
pre: true,
comment: false,
});
const disallowedTags = ['script', 'iframe', 'frameset', 'frame', 'object'];
walkHtmlNodes([dom]);
const output = dom.toString();
return output.substr(5, output.length - 11);
const parser = new htmlparser2.Parser({
onopentag: (name, attrs) => {
tagStack.push(name.toLowerCase());
if (disallowedTags.includes(currentTag())) return;
attrs = Object.assign({}, attrs);
for (const eventName of JS_EVENT_NAMES) {
delete attrs[eventName];
}
let attrHtml = this.attributesHtml(attrs);
if (attrHtml) attrHtml = ` ${attrHtml}`;
const closingSign = this.isSelfClosingTag(name) ? '/>' : '>';
output.push(`<${name}${attrHtml}${closingSign}`);
},
ontext: (decodedText) => {
if (disallowedTags.includes(currentTag())) return;
output.push(htmlentities(decodedText));
},
onclosetag: (name) => {
const current = currentTag();
if (current === name.toLowerCase()) tagStack.pop();
if (disallowedTags.includes(current)) return;
if (this.isSelfClosingTag(name)) return;
output.push(`</${name}>`);
},
}, { decodeEntities: true });
parser.write(html);
parser.end();
return output.join('');
}