2020-11-05 18:58:23 +02:00
|
|
|
import { RuleOptions } from '../../MdToHtml';
|
2020-11-20 18:04:47 +02:00
|
|
|
import htmlUtils from '../../htmlUtils';
|
2020-10-21 01:23:55 +02:00
|
|
|
|
2020-02-14 01:59:23 +02:00
|
|
|
const md5 = require('md5');
|
|
|
|
|
2020-10-21 01:23:55 +02:00
|
|
|
export default {
|
2020-11-12 21:13:28 +02:00
|
|
|
plugin: function(markdownIt: any, ruleOptions: RuleOptions) {
|
|
|
|
markdownIt.core.ruler.push('sanitize_html', (state: any) => {
|
2020-10-21 01:23:55 +02:00
|
|
|
const tokens = state.tokens;
|
|
|
|
|
2020-11-12 21:13:28 +02:00
|
|
|
const walkHtmlTokens = (tokens: any[]) => {
|
2020-10-21 01:23:55 +02:00
|
|
|
if (!tokens || !tokens.length) return;
|
|
|
|
|
|
|
|
for (const token of tokens) {
|
|
|
|
if (!['html_block', 'html_inline'].includes(token.type)) {
|
|
|
|
walkHtmlTokens(token.children);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
const cacheKey = md5(escape(token.content));
|
|
|
|
let sanitizedContent = ruleOptions.context.cache.value(cacheKey);
|
|
|
|
|
|
|
|
// For html_inline, the content is only a fragment of HTML, as it will be rendered, but
|
|
|
|
// it's not necessarily valid HTML. For example this HTML:
|
|
|
|
//
|
|
|
|
// <a href="#">Testing</a>
|
|
|
|
//
|
|
|
|
// will be rendered as three tokens:
|
|
|
|
//
|
|
|
|
// html_inline: <a href="#">
|
|
|
|
// text: Testing
|
|
|
|
// html_inline: </a>
|
|
|
|
//
|
|
|
|
// So the sanitizeHtml function must handle this kind of non-valid HTML.
|
|
|
|
|
|
|
|
if (!sanitizedContent) {
|
|
|
|
sanitizedContent = htmlUtils.sanitizeHtml(token.content, { addNoMdConvClass: true });
|
|
|
|
}
|
|
|
|
|
|
|
|
token.content = sanitizedContent;
|
|
|
|
|
|
|
|
ruleOptions.context.cache.setValue(cacheKey, sanitizedContent, 1000 * 60 * 60);
|
2020-02-14 01:59:23 +02:00
|
|
|
walkHtmlTokens(token.children);
|
|
|
|
}
|
2020-10-21 01:23:55 +02:00
|
|
|
};
|
2020-02-14 01:59:23 +02:00
|
|
|
|
2020-10-21 01:23:55 +02:00
|
|
|
walkHtmlTokens(tokens);
|
|
|
|
});
|
|
|
|
},
|
|
|
|
};
|