import { RuleOptions } from '../../MdToHtml';
const md5 = require('md5');
const htmlUtils = require('../../htmlUtils');
export default {
plugin: function(markdownIt: any, ruleOptions: RuleOptions) {
markdownIt.core.ruler.push('sanitize_html', (state: any) => {
const tokens = state.tokens;
const walkHtmlTokens = (tokens: any[]) => {
if (!tokens || !tokens.length) return;
for (const token of tokens) {
if (!['html_block', 'html_inline'].includes(token.type)) {
walkHtmlTokens(token.children);
continue;
}
const cacheKey = md5(escape(token.content));
let sanitizedContent = ruleOptions.context.cache.value(cacheKey);
// For html_inline, the content is only a fragment of HTML, as it will be rendered, but
// it's not necessarily valid HTML. For example this HTML:
//
// Testing
//
// will be rendered as three tokens:
//
// html_inline:
// text: Testing
// html_inline:
//
// So the sanitizeHtml function must handle this kind of non-valid HTML.
if (!sanitizedContent) {
sanitizedContent = htmlUtils.sanitizeHtml(token.content, { addNoMdConvClass: true });
}
token.content = sanitizedContent;
ruleOptions.context.cache.setValue(cacheKey, sanitizedContent, 1000 * 60 * 60);
walkHtmlTokens(token.children);
}
};
walkHtmlTokens(tokens);
});
},
};