const md5 = require('md5'); const htmlUtils = require('../../htmlUtils'); function getOpenTagName(html:string):string { const m = html.toLowerCase().match(/<([a-z]+)(\s|>)/); if (!m || m.length < 2) return null; return m[1]; } function isSelfClosedTag(html:string):boolean { return html.substr(-2) === '/>'; } function stripOffClosingTag(html:string, tagName:string):string { return html.substr(0, html.length - tagName.length - 3); } // @ts-ignore: Keep the function signature as-is despite unusued arguments function installRule(markdownIt:any, mdOptions:any, ruleOptions:any, context:any) { markdownIt.core.ruler.push('sanitize_html', (state:any) => { const tokens = state.tokens; const walkHtmlTokens = (tokens:any[]) => { if (!tokens || !tokens.length) return; for (const token of tokens) { if (!['html_block', 'html_inline'].includes(token.type)) { walkHtmlTokens(token.children); continue; } const cacheKey = md5(escape(token.content)); let sanitizedContent = context.cache.get(cacheKey); // For html_inline, the content is only a fragment of HTML, as it will be rendered, but // it's not necessarily valid HTML. For example this HTML: // // Testing // // will be rendered as three tokens: // // html_inline: // text: Testing // html_inline: // // The problem for us is that when we pass this HTML fragment to the sanitize function // it is going to turn it into valid HTML. Thus: // // "" becomes "" // "" becomes "" // // So the result would be "Testing" // // Because of this, we need to be careful with html_inline: // // 0. Check if it's an opening or closing tag - only opening ones need to be processed // 1. Sanitize the fragment // 2. Strip off the closing tag that was added // // Also self-closing tags need to be handled. // // html_block is not a problem as the whole content is valid HTML. if (!sanitizedContent) { if (token.type === 'html_inline') { const openTagName = getOpenTagName(token.content); const isSelfClosed = isSelfClosedTag(token.content); if (!openTagName) { sanitizedContent = token.content; } else { sanitizedContent = htmlUtils.sanitizeHtml(token.content); if (!isSelfClosed) { sanitizedContent = stripOffClosingTag(sanitizedContent, openTagName); } } } else { // html_block sanitizedContent = htmlUtils.sanitizeHtml(token.content); } } token.content = sanitizedContent; context.cache.put(cacheKey, sanitizedContent, 1000 * 60 * 60); walkHtmlTokens(token.children); } }; walkHtmlTokens(tokens); }); } export default function(context:any, ruleOptions:any) { return function(md:any, mdOptions:any) { installRule(md, mdOptions, ruleOptions, context); }; }