2020-01-30 23:05:23 +02:00
|
|
|
const Entities = require('html-entities').AllHtmlEntities;
|
|
|
|
const htmlentities = new Entities().encode;
|
|
|
|
|
|
|
|
// [\s\S] instead of . for multiline matching
|
|
|
|
// https://stackoverflow.com/a/16119722/561309
|
|
|
|
const imageRegex = /<img([\s\S]*?)src=["']([\s\S]*?)["']([\s\S]*?)>/gi;
|
2020-02-14 01:59:23 +02:00
|
|
|
const JS_EVENT_NAMES = ['onabort', 'onafterprint', 'onbeforeprint', 'onbeforeunload', 'onblur', 'oncanplay', 'oncanplaythrough', 'onchange', 'onclick', 'oncontextmenu', 'oncopy', 'oncuechange', 'oncut', 'ondblclick', 'ondrag', 'ondragend', 'ondragenter', 'ondragleave', 'ondragover', 'ondragstart', 'ondrop', 'ondurationchange', 'onemptied', 'onended', 'onerror', 'onfocus', 'onhashchange', 'oninput', 'oninvalid', 'onkeydown', 'onkeypress', 'onkeyup', 'onload', 'onloadeddata', 'onloadedmetadata', 'onloadstart', 'onmessage', 'onmousedown', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup', 'onmousewheel', 'onoffline', 'ononline', 'onpagehide', 'onpageshow', 'onpaste', 'onpause', 'onplay', 'onplaying', 'onpopstate', 'onprogress', 'onratechange', 'onreset', 'onresize', 'onscroll', 'onsearch', 'onseeked', 'onseeking', 'onselect', 'onstalled', 'onstorage', 'onsubmit', 'onsuspend', 'ontimeupdate', 'ontoggle', 'onunload', 'onvolumechange', 'onwaiting', 'onwheel'];
|
2020-01-30 23:05:23 +02:00
|
|
|
|
2020-03-06 02:54:21 +02:00
|
|
|
const selfClosingElements = [
|
|
|
|
'area',
|
|
|
|
'base',
|
|
|
|
'basefont',
|
|
|
|
'br',
|
|
|
|
'col',
|
|
|
|
'command',
|
|
|
|
'embed',
|
|
|
|
'frame',
|
|
|
|
'hr',
|
|
|
|
'img',
|
|
|
|
'input',
|
|
|
|
'isindex',
|
|
|
|
'keygen',
|
|
|
|
'link',
|
|
|
|
'meta',
|
|
|
|
'param',
|
|
|
|
'source',
|
|
|
|
'track',
|
|
|
|
'wbr',
|
|
|
|
];
|
|
|
|
|
2020-01-30 23:05:23 +02:00
|
|
|
class HtmlUtils {
|
|
|
|
|
|
|
|
attributesHtml(attr) {
|
|
|
|
const output = [];
|
|
|
|
|
|
|
|
for (const n in attr) {
|
|
|
|
if (!attr.hasOwnProperty(n)) continue;
|
|
|
|
output.push(`${n}="${htmlentities(attr[n])}"`);
|
|
|
|
}
|
|
|
|
|
|
|
|
return output.join(' ');
|
|
|
|
}
|
|
|
|
|
|
|
|
processImageTags(html, callback) {
|
|
|
|
if (!html) return '';
|
|
|
|
|
|
|
|
return html.replace(imageRegex, (v, before, src, after) => {
|
|
|
|
const action = callback({ src: src });
|
|
|
|
|
|
|
|
if (!action) return `<img${before}src="${src}"${after}>`;
|
|
|
|
|
|
|
|
if (action.type === 'replaceElement') {
|
|
|
|
return action.html;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (action.type === 'replaceSource') {
|
|
|
|
return `<img${before}src="${action.src}"${after}>`;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (action.type === 'setAttributes') {
|
|
|
|
const attrHtml = this.attributesHtml(action.attrs);
|
|
|
|
return `<img${before}${attrHtml}${after}>`;
|
|
|
|
}
|
|
|
|
|
|
|
|
throw new Error(`Invalid action: ${action.type}`);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2020-03-06 02:54:21 +02:00
|
|
|
isSelfClosingTag(tagName) {
|
|
|
|
return selfClosingElements.includes(tagName.toLowerCase());
|
|
|
|
}
|
|
|
|
|
2020-04-10 19:12:41 +02:00
|
|
|
sanitizeHtml(html, options = null) {
|
|
|
|
options = Object.assign({}, {
|
|
|
|
// If true, adds a "jop-noMdConv" class to all the tags.
|
|
|
|
// It can be used afterwards to restore HTML tags in Markdown.
|
|
|
|
addNoMdConvClass: false,
|
|
|
|
}, options);
|
|
|
|
|
2020-03-06 02:54:21 +02:00
|
|
|
const htmlparser2 = require('htmlparser2');
|
2020-02-14 01:59:23 +02:00
|
|
|
|
2020-03-06 02:54:21 +02:00
|
|
|
const output = [];
|
|
|
|
|
|
|
|
const tagStack = [];
|
|
|
|
|
|
|
|
const currentTag = () => {
|
|
|
|
if (!tagStack.length) return '';
|
|
|
|
return tagStack[tagStack.length - 1];
|
2020-02-14 01:59:23 +02:00
|
|
|
};
|
|
|
|
|
2020-04-13 00:00:36 +02:00
|
|
|
// The BASE tag allows changing the base URL from which files are loaded, and
|
|
|
|
// that can break several plugins, such as Katex (which needs to load CSS
|
|
|
|
// files using a relative URL). For that reason it is disabled.
|
|
|
|
// More info: https://github.com/laurent22/joplin/issues/3021
|
|
|
|
const disallowedTags = ['script', 'iframe', 'frameset', 'frame', 'object', 'base'];
|
2020-03-06 02:54:21 +02:00
|
|
|
|
|
|
|
const parser = new htmlparser2.Parser({
|
|
|
|
|
|
|
|
onopentag: (name, attrs) => {
|
|
|
|
tagStack.push(name.toLowerCase());
|
|
|
|
|
|
|
|
if (disallowedTags.includes(currentTag())) return;
|
|
|
|
|
|
|
|
attrs = Object.assign({}, attrs);
|
|
|
|
for (const eventName of JS_EVENT_NAMES) {
|
|
|
|
delete attrs[eventName];
|
|
|
|
}
|
2020-04-10 19:12:41 +02:00
|
|
|
|
|
|
|
if (options.addNoMdConvClass) {
|
|
|
|
let classAttr = attrs['class'] || '';
|
|
|
|
if (!classAttr.includes('jop-noMdConv')) {
|
|
|
|
classAttr += ' jop-noMdConv';
|
|
|
|
attrs['class'] = classAttr.trim();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-06 02:54:21 +02:00
|
|
|
let attrHtml = this.attributesHtml(attrs);
|
|
|
|
if (attrHtml) attrHtml = ` ${attrHtml}`;
|
|
|
|
const closingSign = this.isSelfClosingTag(name) ? '/>' : '>';
|
|
|
|
output.push(`<${name}${attrHtml}${closingSign}`);
|
|
|
|
},
|
|
|
|
|
|
|
|
ontext: (decodedText) => {
|
|
|
|
if (disallowedTags.includes(currentTag())) return;
|
|
|
|
|
2020-03-06 20:22:40 +02:00
|
|
|
if (currentTag() === 'style') {
|
|
|
|
// For CSS, we have to put the style as-is inside the tag because if we html-entities encode
|
|
|
|
// it, it's not going to work. But it's ok because JavaScript won't run within the style tag.
|
|
|
|
// Ideally CSS should be loaded from an external file.
|
|
|
|
output.push(decodedText);
|
|
|
|
} else {
|
|
|
|
output.push(htmlentities(decodedText));
|
|
|
|
}
|
2020-03-06 02:54:21 +02:00
|
|
|
},
|
|
|
|
|
|
|
|
onclosetag: (name) => {
|
|
|
|
const current = currentTag();
|
|
|
|
|
|
|
|
if (current === name.toLowerCase()) tagStack.pop();
|
|
|
|
|
|
|
|
if (disallowedTags.includes(current)) return;
|
|
|
|
|
|
|
|
if (this.isSelfClosingTag(name)) return;
|
|
|
|
output.push(`</${name}>`);
|
|
|
|
},
|
|
|
|
|
|
|
|
}, { decodeEntities: true });
|
|
|
|
|
|
|
|
parser.write(html);
|
|
|
|
parser.end();
|
2020-02-14 01:59:23 +02:00
|
|
|
|
2020-03-06 02:54:21 +02:00
|
|
|
return output.join('');
|
2020-02-14 01:59:23 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-01-30 23:05:23 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
const htmlUtils = new HtmlUtils();
|
|
|
|
|
|
|
|
module.exports = htmlUtils;
|