const MarkdownIt = require('markdown-it');
const Entities = require('html-entities').AllHtmlEntities;
const htmlentities = (new Entities()).encode;
const Resource = require('lib/models/Resource.js');
const ObjectUtils = require('lib/ObjectUtils');
const { shim } = require('lib/shim.js');
const { _ } = require('lib/locale');
const md5 = require('md5');
const MdToHtml_Katex = require('lib/MdToHtml_Katex');
const StringUtils = require('lib/string-utils.js');
class MdToHtml {
constructor(options = null) {
if (!options) options = {};
this.loadedResources_ = {};
this.cachedContent_ = null;
this.cachedContentKey_ = null;
// Must include last "/"
this.resourceBaseUrl_ = ('resourceBaseUrl' in options) ? options.resourceBaseUrl : null;
}
makeContentKey(resources, body, style, options) {
let k = [];
for (let n in resources) {
if (!resources.hasOwnProperty(n)) continue;
const r = resources[n];
k.push(r.id);
}
k.push(md5(escape(body))); // https://github.com/pvorb/node-md5/issues/41
k.push(md5(JSON.stringify(style)));
k.push(md5(JSON.stringify(options)));
return k.join('_');
}
clearCache() {
this.cachedContent_ = null;
this.cachedContentKey_ = null;
}
renderAttrs_(attrs) {
if (!attrs) return '';
let output = [];
for (let i = 0; i < attrs.length; i++) {
const n = attrs[i][0];
const v = attrs[i].length >= 2 ? attrs[i][1] : null;
if (n === 'alt' && !v) {
continue;
} else if (n === 'src') {
output.push('src="' + htmlentities(v) + '"');
} else {
output.push(n + '="' + (v ? htmlentities(v) : '') + '"');
}
}
return output.join(' ');
}
getAttr_(attrs, name, defaultValue = null) {
for (let i = 0; i < attrs.length; i++) {
if (attrs[i][0] === name) return attrs[i].length > 1 ? attrs[i][1] : null;
}
return defaultValue;
}
setAttr_(attrs, name, value) {
for (let i = 0; i < attrs.length; i++) {
if (attrs[i][0] === name) {
attrs[i][1] = value;
return attrs;
}
}
attrs.push([name, value]);
return attrs;
}
async loadResource(id, options) {
// Initially set to to an empty object to make
// it clear that it is being loaded. Otherwise
// it sometimes results in multiple calls to
// loadResource() for the same resource.
this.loadedResources_[id] = {};
const resource = await Resource.load(id);
if (!resource) {
// Can happen for example if an image is attached to a note, but the resource hasn't
// been downloaded from the sync target yet.
console.info('Cannot load resource: ' + id);
delete this.loadedResources_[id];
return;
}
const localState = await Resource.localState(resource);
if (localState.fetch_status !== Resource.FETCH_STATUS_DONE) {
delete this.loadedResources_[id];
console.info('Resource not yet fetched: ' + id);
return;
}
this.loadedResources_[id] = resource;
if (options.onResourceLoaded) options.onResourceLoaded();
}
renderImage_(attrs, options) {
const title = this.getAttr_(attrs, 'title');
const href = this.getAttr_(attrs, 'src');
if (!Resource.isResourceUrl(href)) {
return '';
}
const resourceId = Resource.urlToId(href);
const resource = this.loadedResources_[resourceId];
if (!resource) {
this.loadResource(resourceId, options);
return '';
}
if (!resource.id) return ''; // Resource is being loaded
const mime = resource.mime ? resource.mime.toLowerCase() : '';
if (Resource.isSupportedImageMimeType(mime)) {
let src = './' + Resource.filename(resource);
if (this.resourceBaseUrl_ !== null) src = this.resourceBaseUrl_ + src;
let output = '';
return output;
}
return '[Image: ' + htmlentities(resource.title) + ' (' + htmlentities(mime) + ')]';
}
renderImageHtml_(before, src, after, options) {
const resourceId = Resource.urlToId(src);
const resource = this.loadedResources_[resourceId];
if (!resource) {
this.loadResource(resourceId, options);
return '';
}
if (!resource.id) return ''; // Resource is being loaded
const mime = resource.mime ? resource.mime.toLowerCase() : '';
if (Resource.isSupportedImageMimeType(mime)) {
let newSrc = './' + Resource.filename(resource);
if (this.resourceBaseUrl_ !== null) newSrc = this.resourceBaseUrl_ + newSrc;
let output = '';
return output;
}
return '[Image: ' + htmlentities(resource.title) + ' (' + htmlentities(mime) + ')]';
}
renderOpenLink_(attrs, options) {
let href = this.getAttr_(attrs, 'href');
const text = this.getAttr_(attrs, 'text');
const isResourceUrl = Resource.isResourceUrl(href);
const title = isResourceUrl ? this.getAttr_(attrs, 'title') : href;
let resourceIdAttr = "";
let icon = "";
let hrefAttr = '#';
if (isResourceUrl) {
const resourceId = Resource.pathToId(href);
href = "joplin://" + resourceId;
resourceIdAttr = "data-resource-id='" + resourceId + "'";
icon = '';
} else {
// If the link is a plain URL (as opposed to a resource link), set the href to the actual
// link. This allows the link to be exported too when exporting to PDF.
hrefAttr = href;
}
const js = options.postMessageSyntax + "(" + JSON.stringify(href) + "); return false;";
let output = "" + icon;
return output;
}
renderCloseLink_(attrs, options) {
return '';
}
rendererPlugin_(language) {
if (!language) return null;
if (!this.rendererPlugins_) {
this.rendererPlugins_ = {};
this.rendererPlugins_['katex'] = new MdToHtml_Katex();
}
return language in this.rendererPlugins_ ? this.rendererPlugins_[language] : null;
}
parseInlineCodeLanguage_(content) {
const m = content.match(/^\{\.([a-zA-Z0-9]+)\}/);
if (m && m.length >= 2) {
const language = m[1];
return {
language: language,
newContent: content.substr(language.length + 3),
};
}
return null;
}
urldecode_(str) {
try {
return decodeURIComponent((str+'').replace(/\+/g, '%20'));
} catch (error) {
// decodeURIComponent can throw if the string contains non-encoded data (for example "100%")
// so in this case just return the non encoded string.
return str;
}
}
renderTokens_(markdownIt, tokens, options) {
let output = [];
let previousToken = null;
let anchorAttrs = [];
let extraCssBlocks = {};
let anchorHrefs = [];
for (let i = 0; i < tokens.length; i++) {
let t = tokens[i];
const nextToken = i < tokens.length ? tokens[i+1] : null;
let tag = t.tag;
let openTag = null;
let closeTag = null;
let attrs = t.attrs ? t.attrs : [];
let tokenContent = t.content ? t.content : '';
const isCodeBlock = tag === 'code' && t.block;
const isInlineCode = t.type === 'code_inline';
const codeBlockLanguage = t && t.info ? t.info : null;
let rendererPlugin = null;
let rendererPluginOptions = { tagType: 'inline' };
let linkHref = null;
if (isCodeBlock) rendererPlugin = this.rendererPlugin_(codeBlockLanguage);
if (isInlineCode) {
openTag = null;
} else if (tag && (t.type.indexOf('html_inline') >= 0 || t.type.indexOf('html_block') >= 0)) {
openTag = null;
} else if (tag && t.type.indexOf('_open') >= 0) {
openTag = tag;
} else if (tag && t.type.indexOf('_close') >= 0) {
closeTag = tag;
} else if (tag && t.type.indexOf('inline') >= 0) {
openTag = tag;
} else if (t.type === 'link_open') {
openTag = 'a';
} else if (isCodeBlock) {
if (rendererPlugin) {
openTag = null;
} else {
openTag = 'pre';
}
}
if (openTag) {
if (openTag === 'a') {
anchorAttrs.push(attrs);
anchorHrefs.push(this.getAttr_(attrs, 'href'));
output.push(this.renderOpenLink_(attrs, options));
} else {
const attrsHtml = this.renderAttrs_(attrs);
output.push('<' + openTag + (attrsHtml ? ' ' + attrsHtml : '') + '>');
}
}
if (isCodeBlock) {
const codeAttrs = ['code'];
if (!rendererPlugin) {
if (codeBlockLanguage) codeAttrs.push(t.info); // t.info contains the language when the token is a codeblock
output.push('');
}
} else if (isInlineCode) {
const result = this.parseInlineCodeLanguage_(tokenContent);
if (result) {
rendererPlugin = this.rendererPlugin_(result.language);
tokenContent = result.newContent;
}
if (!rendererPlugin) {
output.push('
');
}
}
if (closeTag) {
if (closeTag === 'a') {
const currentAnchorAttrs = anchorAttrs.pop();
// NOTE: Disabled for now due to this:
// https://github.com/laurent22/joplin/issues/318#issuecomment-375854848
// const previousContent = output.length ? output[output.length - 1].trim() : '';
// const anchorHref = this.getAttr_(currentAnchorAttrs, 'href', '').trim();
// Optimisation: If the content of the anchor is the same as the URL, we replace the content
// by (Link). This is to shorten the text, which is important especially when the note comes
// from imported HTML, which can contain many such links and make the text unreadble. An example
// would be a movie review that has multiple links to allow a user to rate the film from 1 to 5 stars.
// In the original page, it might be rendered as stars, via CSS, but in the imported note it would look like this:
// http://example.com/rate/1 http://example.com/rate/2 http://example.com/rate/3
// http://example.com/rate/4 http://example.com/rate/5
// which would take a lot of screen space even though it doesn't matter since the user is unlikely
// to rate the film from the note. This is actually a nice example, still readable, but there is way
// worse that this in notes that come from web-clipped content.
// With this change, the links will still be preserved but displayed like
// (link) (link) (link) (link) (link)
// if (this.urldecode_(previousContent) === htmlentities(this.urldecode_(anchorHref))) {
// output.pop();
// output.push(_('(Link)'));
// }
output.push(this.renderCloseLink_(currentAnchorAttrs, options));
} else {
output.push('' + closeTag + '>');
}
}
if (rendererPlugin) {
const extraCss = rendererPlugin.extraCss();
const name = rendererPlugin.name();
if (extraCss && !(name in extraCssBlocks)) {
extraCssBlocks[name] = extraCss;
}
}
previousToken = t;
}
// Insert the extra CSS at the top of the HTML
if (!ObjectUtils.isEmpty(extraCssBlocks)) {
const temp = ['');
output = temp.concat(output);
}
return output.join('');
}
applyHighlightedKeywords_(body, keywords) {
if (!keywords.length) return body;
return StringUtils.surroundKeywords(keywords, body, '', '');
}
render(body, style, options = null) {
if (!options) options = {};
if (!options.postMessageSyntax) options.postMessageSyntax = 'postMessage';
if (!options.paddingBottom) options.paddingBottom = '0';
if (!options.highlightedKeywords) options.highlightedKeywords = [];
const cacheKey = this.makeContentKey(this.loadedResources_, body, style, options);
if (this.cachedContentKey_ === cacheKey) return this.cachedContent_;
const md = new MarkdownIt({
breaks: true,
linkify: true,
html: true,
});
body = this.applyHighlightedKeywords_(body, options.highlightedKeywords);
// Add `file:` protocol in linkify to allow text in the format of "file://..." to translate into
// file-URL links in html view
md.linkify.add('file:', {
validate: function (text, pos, self) {
var tail = text.slice(pos);
if (!self.re.file) {
// matches all local file URI on Win/Unix/MacOS systems including reserved characters in some OS (i.e. no OS specific sanity check)
self.re.file = new RegExp('^[\\/]{2,3}[\\S]+');
}
if (self.re.file.test(tail)) {
return tail.match(self.re.file)[0].length;
}
return 0;
}
});
// enable file link URLs in MarkdownIt. Keeps other URL restrictions of MarkdownIt untouched.
// Format [link name](file://...)
md.validateLink = function (url) {
var BAD_PROTO_RE = /^(vbscript|javascript|data):/;
var GOOD_DATA_RE = /^data:image\/(gif|png|jpeg|webp);/;
// url should be normalized at this point, and existing entities are decoded
var str = url.trim().toLowerCase();
return BAD_PROTO_RE.test(str) ? (GOOD_DATA_RE.test(str) ? true : false) : true;
}
// This is currently used only so that the $expression$ and $$\nexpression\n$$ blocks are translated
// to math_inline and math_block blocks. These blocks are then processed directly with the Katex
// library. It is better this way as then it is possible to conditionally load the CSS required by
// Katex and use an up-to-date version of Katex (as of 2018, the plugin is still using 0.6, which is
// buggy instead of 0.9).
md.use(require('markdown-it-katex'));
// Hack to make checkboxes clickable. Ideally, checkboxes should be parsed properly in
// renderTokens_(), but for now this hack works. Marking it with HORRIBLE_HACK so
// that it can be removed and replaced later on.
const HORRIBLE_HACK = true;
if (HORRIBLE_HACK) {
let counter = -1;
while (body.indexOf('- [ ]') >= 0 || body.indexOf('- [X]') >= 0 || body.indexOf('- [x]') >= 0) {
body = body.replace(/- \[(X| |x)\]/, function(v, p1) {
let s = p1 == ' ' ? 'NOTICK' : 'TICK';
counter++;
return '- mJOPmCHECKBOXm' + s + 'm' + counter + 'm';
});
}
}
const env = {};
const tokens = md.parse(body, env);
let renderedBody = this.renderTokens_(md, tokens, options);
// console.info(body);
// console.info(tokens);
// console.info(renderedBody);
if (HORRIBLE_HACK) {
let loopCount = 0;
while (renderedBody.indexOf('mJOPm') >= 0) {
renderedBody = renderedBody.replace(/mJOPmCHECKBOXm([A-Z]+)m(\d+)m/, function(v, type, index) {
const js = options.postMessageSyntax + "('checkboxclick:" + type + ':' + index + "'); this.classList.contains('tick') ? this.classList.remove('tick') : this.classList.add('tick'); return false;";
return '' + '' + '';
});
if (loopCount++ >= 9999) break;
}
}
renderedBody = renderedBody.replace(//g, (v, before, src, after) => {
if (!Resource.isResourceUrl(src)) return '';
return this.renderImageHtml_(before, src, after, options);
});
// To disable meta tags that would refresh the page - eg ""
// Also disable a few other tags that are likely not meant to be rendered.
// https://github.com/laurent22/joplin/issues/769
renderedBody = renderedBody.replace(/<(meta|title|body|html|script)/, '<$1');
// https://necolas.github.io/normalize.css/
const normalizeCss = `
html{line-height:1.15;-ms-text-size-adjust:100%;-webkit-text-size-adjust:100%}body{margin:0}
article,aside,footer,header,nav,section{display:block}h1{font-size:2em;margin:.67em 0}hr{box-sizing:content-box;height:0;overflow:visible}
pre{font-family:monospace,monospace;font-size:1em}a{background-color:transparent;-webkit-text-decoration-skip:objects}
b,strong{font-weight:bolder}small{font-size:80%}img{border-style:none}
`;
const fontFamily = "'Avenir', 'Arial', sans-serif";
const css = `
body {
font-size: ` + style.htmlFontSize + `;
color: ` + style.htmlColor + `;
line-height: ` + style.htmlLineHeight + `;
background-color: ` + style.htmlBackgroundColor + `;
font-family: ` + fontFamily + `;
padding-bottom: ` + options.paddingBottom + `;
/* So that, for example, highlighted text or background images are printed too, otherwise browsers tend not to print these things */
-webkit-print-color-adjust: exact;
}
p, h1, h2, h3, h4, h5, h6, ul, table {
margin-top: .6em;
margin-bottom: .65em;
}
h1, h2, h3, h4, h5, h6 {
line-height: 1.5em;
}
h1 {
font-size: 1.5em;
font-weight: bold;
border-bottom: 1px solid ` + style.htmlDividerColor + `;
padding-bottom: .3em;
}
h2 {
font-size: 1.3em;
font-weight: bold;
/ *border-bottom: 1px solid ` + style.htmlDividerColor + `;
padding-bottom: .1em; */
}
h3 {
font-size: 1.1em;
}
h4, h5, h6 {
font-size: 1em;
font-weight: bold;
}
a {
color: ` + style.htmlLinkColor + `;
}
ul, ol {
padding-left: 0;
margin-left: 1.7em;
}
li {
margin-bottom: .4em;
}
li p {
margin-bottom: 0;
}
.resource-icon {
display: inline-block;
position: relative;
top: .5em;
text-decoration: none;
width: 1.15em;
height: 1.45em;
margin-right: 0.4em;
background-color: ` + style.htmlColor + `;
/* Awesome Font file */
-webkit-mask: url("data:image/svg+xml;utf8,");
}
a.checkbox {
display: inline-block;
position: relative;
top: .5em;
text-decoration: none;
width: 1.65em; /* Need to cut a bit the right border otherwise the SVG will display a black line */
height: 1.7em;
margin-right: .3em;
background-color: ` + style.htmlColor + `;
/* Awesome Font square-o */
-webkit-mask: url("data:image/svg+xml;utf8,");
}
a.checkbox.tick {
left: .1245em; /* square-o and check-square-o aren't exactly aligned so add this extra gap to align them */
/* Awesome Font check-square-o */
-webkit-mask: url("data:image/svg+xml;utf8,");
}
blockquote {
border-left: 4px solid ` + style.htmlCodeBorderColor + `;
padding-left: 1.2em;
margin-left: 0;
opacity: .7;
}
table {
text-align: left-align;
border-collapse: collapse;
border: 1px solid ` + style.htmlCodeBorderColor + `;
background-color: ` + style.htmlBackgroundColor + `;
}
td, th {
padding: .5em 1em .5em 1em;
font-size: ` + style.htmlFontSize + `;
color: ` + style.htmlColor + `;
font-family: ` + fontFamily + `;
}
td {
border: 1px solid ` + style.htmlCodeBorderColor + `;
}
th {
border: 1px solid ` + style.htmlCodeBorderColor + `;
border-bottom: 2px solid ` + style.htmlCodeBorderColor + `;
background-color: ` + style.htmlTableBackgroundColor + `;
}
tr:nth-child(even) {
background-color: ` + style.htmlTableBackgroundColor + `;
}
hr {
border: none;
border-bottom: 2px solid ` + style.htmlDividerColor + `;
}
img {
max-width: 100%;
height: auto;
}
.inline-code {
border: 1px solid ` + style.htmlCodeBorderColor + `;
background-color: ` + style.htmlCodeBackgroundColor + `;
padding-right: .2em;
padding-left: .2em;
border-radius: .25em;
color: ` + style.htmlCodeColor + `;
font-size: ` + style.htmlCodeFontSize + `;
}
.highlighted-keyword {
background-color: #F3B717;
color: black;
}
/*
This is to fix https://github.com/laurent22/joplin/issues/764
Without this, the tag attached to an equation float at an absoluate position of the page,
instead of a position relative to the container.
*/
.katex-display>.katex>.katex-html {
position: relative;
}
@media print {
body {
height: auto !important;
}
a.checkbox.tick {
/* Checkbox ticks are displayed upside down when printed for some reason */
transform: scaleY(-1);
}
pre {
white-space: pre-wrap;
}
.code, .inline-code {
border: 1px solid #CBCBCB;
}
}
`;
// To style the checkboxes in print when webkit-print-color-adjust is not enabled.
// Keep it there for now in case that CSS parameter needs to be removed.
// a.checkbox {
// border: 1pt solid ` + style.htmlColor + `;
// border-radius: 2pt;
// width: 1em;
// height: 1em;
// line-height: 1em;
// text-align: center;
// top: .4em;
// }
// a.checkbox.tick:after {
// content: "X";
// }
// a.checkbox.tick {
// top: 0;
// left: -0.02em;
// color: ` + style.htmlColor + `;
// }
const styleHtml = '';
const output = styleHtml + renderedBody;
// console.info('');
}
}
if (t.type === 'math_inline' || t.type === 'math_block') {
rendererPlugin = this.rendererPlugin_('katex');
rendererPluginOptions = { tagType: t.type === 'math_block' ? 'block' : 'inline' };
}
if (rendererPlugin) {
rendererPlugin.loadAssets().catch((error) => {
console.warn('MdToHtml: Error loading assets for ' + rendererPlugin.name() + ': ', error.message);
});
}
if (t.type === 'image') {
if (tokenContent) attrs.push(['title', tokenContent]);
output.push(this.renderImage_(attrs, options));
} else if (t.type === 'html_inline' || t.type === 'html_block') {
output.push(t.content);
} else if (t.type === 'softbreak') {
output.push('
');
}
} else if (isInlineCode) {
if (!rendererPlugin) {
output.push('
');
} else if (t.type === 'hardbreak') {
output.push('
');
} else if (t.type === 'hr') {
output.push('
');
} else {
if (t.children) {
const parsedChildren = this.renderTokens_(markdownIt, t.children, options);
output = output.concat(parsedChildren);
} else {
if (tokenContent) {
if ((isCodeBlock || isInlineCode) && rendererPlugin) {
output = rendererPlugin.processContent(output, tokenContent, isCodeBlock ? 'block' : 'inline');
} else if (rendererPlugin) {
output = rendererPlugin.processContent(output, tokenContent, rendererPluginOptions.tagType);
} else {
output.push(htmlentities(tokenContent));
}
}
}
}
if (nextToken && nextToken.tag === 'li' && t.tag === 'p') {
closeTag = null;
} else if (t.type === 'link_close') {
closeTag = 'a';
} else if (tag && t.type.indexOf('inline') >= 0) {
closeTag = openTag;
} else if (isCodeBlock) {
if (!rendererPlugin) closeTag = openTag;
}
if (isCodeBlock) {
if (!rendererPlugin) {
output.push('