2021-01-02 18:53:59 +02:00
|
|
|
import { validateLinks } from '@joplin/renderer';
|
2018-09-29 14:15:36 +02:00
|
|
|
const stringPadding = require('string-padding');
|
2020-11-05 18:58:23 +02:00
|
|
|
const urlUtils = require('./urlUtils');
|
2018-09-24 21:15:23 +02:00
|
|
|
const MarkdownIt = require('markdown-it');
|
2018-05-23 13:14:38 +02:00
|
|
|
|
2020-06-06 17:00:20 +02:00
|
|
|
// Taken from codemirror/addon/edit/continuelist.js
|
2020-06-16 14:00:17 +02:00
|
|
|
const listRegex = /^(\s*)([*+-] \[[x ]\]\s|[*+-]\s|(\d+)([.)]\s))(\s*)/;
|
2021-04-02 05:39:42 +02:00
|
|
|
const emptyListRegex = /^(\s*)([*+-] \[[x ]\]|[*+-]|(\d+)[.)])(\s+)$/;
|
2020-06-06 17:00:20 +02:00
|
|
|
|
2021-05-27 15:24:56 +02:00
|
|
|
export enum MarkdownTableJustify {
|
|
|
|
Left = 'left',
|
|
|
|
Center = 'center',
|
|
|
|
Right = 'right,',
|
|
|
|
}
|
|
|
|
|
2020-10-09 19:35:46 +02:00
|
|
|
export interface MarkdownTableHeader {
|
2020-11-12 21:29:22 +02:00
|
|
|
name: string;
|
|
|
|
label: string;
|
|
|
|
filter?: Function;
|
2021-05-27 15:24:56 +02:00
|
|
|
disableEscape?: boolean;
|
|
|
|
justify?: MarkdownTableJustify;
|
2020-10-09 19:35:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
export interface MarkdownTableRow {
|
2020-11-12 21:29:22 +02:00
|
|
|
[key: string]: string;
|
2020-10-09 19:35:46 +02:00
|
|
|
}
|
|
|
|
|
2017-08-02 19:47:25 +02:00
|
|
|
const markdownUtils = {
|
2020-02-08 00:15:41 +02:00
|
|
|
// Titles for markdown links only need escaping for [ and ]
|
2020-11-12 21:13:28 +02:00
|
|
|
escapeTitleText(text: string) {
|
2020-02-08 00:15:41 +02:00
|
|
|
return text.replace(/(\[|\])/g, '\\$1');
|
|
|
|
},
|
|
|
|
|
2020-11-12 21:13:28 +02:00
|
|
|
escapeLinkUrl(url: string) {
|
2017-08-02 19:47:25 +02:00
|
|
|
url = url.replace(/\(/g, '%28');
|
|
|
|
url = url.replace(/\)/g, '%29');
|
2020-05-25 10:52:10 +02:00
|
|
|
url = url.replace(/ /g, '%20');
|
2017-08-02 19:47:25 +02:00
|
|
|
return url;
|
|
|
|
},
|
|
|
|
|
2021-01-12 17:29:08 +02:00
|
|
|
escapeTableCell(text: string) {
|
|
|
|
// Disable HTML code
|
|
|
|
text = text.replace(/</g, '<');
|
|
|
|
text = text.replace(/>/g, '>');
|
|
|
|
// Table cells can't contain new lines so replace with <br/>
|
|
|
|
text = text.replace(/\n/g, '<br/>');
|
|
|
|
// "|" is a reserved characters that should be escaped
|
|
|
|
text = text.replace(/\|/g, '\\|');
|
|
|
|
return text;
|
|
|
|
},
|
|
|
|
|
2021-05-15 11:12:11 +02:00
|
|
|
escapeInlineCode(text: string): string {
|
|
|
|
// https://github.com/github/markup/issues/363#issuecomment-55499909
|
|
|
|
return text.replace(/`/g, '``');
|
|
|
|
},
|
|
|
|
|
2020-11-12 21:13:28 +02:00
|
|
|
unescapeLinkUrl(url: string) {
|
2020-10-29 12:16:31 +02:00
|
|
|
url = url.replace(/%28/g, '(');
|
|
|
|
url = url.replace(/%29/g, ')');
|
|
|
|
url = url.replace(/%20/g, ' ');
|
|
|
|
return url;
|
|
|
|
},
|
|
|
|
|
2020-11-12 21:13:28 +02:00
|
|
|
prependBaseUrl(md: string, baseUrl: string) {
|
2019-07-30 09:35:42 +02:00
|
|
|
// eslint-disable-next-line no-useless-escape
|
2020-11-12 21:13:28 +02:00
|
|
|
return md.replace(/(\]\()([^\s\)]+)(.*?\))/g, (_match: any, before: string, url: string, after: string) => {
|
2018-05-23 13:14:38 +02:00
|
|
|
return before + urlUtils.prependBaseUrl(url, baseUrl) + after;
|
|
|
|
});
|
|
|
|
},
|
|
|
|
|
2020-10-29 12:16:31 +02:00
|
|
|
// Returns the **encoded** URLs, so to be useful they should be decoded again before use.
|
2022-06-20 14:56:54 +02:00
|
|
|
extractFileUrls(md: string, onlyType: string = null): Array<string> {
|
2018-09-24 21:15:23 +02:00
|
|
|
const markdownIt = new MarkdownIt();
|
2021-01-02 18:53:59 +02:00
|
|
|
markdownIt.validateLink = validateLinks; // Necessary to support file:/// links
|
2019-05-10 02:06:06 +02:00
|
|
|
|
2018-09-24 21:15:23 +02:00
|
|
|
const env = {};
|
|
|
|
const tokens = markdownIt.parse(md, env);
|
2020-11-12 21:13:28 +02:00
|
|
|
const output: string[] = [];
|
2018-09-24 21:15:23 +02:00
|
|
|
|
2022-06-20 14:56:54 +02:00
|
|
|
let linkType = onlyType;
|
|
|
|
if (linkType === 'pdf') linkType = 'link_open';
|
|
|
|
|
2020-11-12 21:13:28 +02:00
|
|
|
const searchUrls = (tokens: any[]) => {
|
2018-09-24 21:15:23 +02:00
|
|
|
for (let i = 0; i < tokens.length; i++) {
|
|
|
|
const token = tokens[i];
|
2022-07-23 11:33:12 +02:00
|
|
|
if ((!onlyType && (token.type === 'link_open' || token.type === 'image')) || (!!onlyType && token.type === onlyType) || (onlyType === 'pdf' && token.type === 'link_open')) {
|
2022-06-20 14:56:54 +02:00
|
|
|
// Pdf embeds are a special case, they are represented as 'link_open' tokens but are marked with 'embedded_pdf' as link name by the parser
|
|
|
|
// We are making sure if its in the proper pdf link format, only then we add it to the list
|
|
|
|
if (onlyType === 'pdf' && !(tokens.length > i + 1 && tokens[i + 1].type === 'text' && tokens[i + 1].content === 'embedded_pdf')) continue;
|
2018-09-24 21:15:23 +02:00
|
|
|
for (let j = 0; j < token.attrs.length; j++) {
|
|
|
|
const a = token.attrs[j];
|
2021-05-19 23:22:03 +02:00
|
|
|
if ((a[0] === 'src' || a[0] === 'href') && a.length >= 2 && a[1]) {
|
2018-09-24 21:15:23 +02:00
|
|
|
output.push(a[1]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-07-29 15:43:53 +02:00
|
|
|
|
2018-09-24 21:15:23 +02:00
|
|
|
if (token.children && token.children.length) {
|
|
|
|
searchUrls(token.children);
|
|
|
|
}
|
|
|
|
}
|
2019-07-29 15:43:53 +02:00
|
|
|
};
|
2018-09-24 21:15:23 +02:00
|
|
|
|
|
|
|
searchUrls(tokens);
|
|
|
|
|
2018-05-23 15:25:59 +02:00
|
|
|
return output;
|
|
|
|
},
|
|
|
|
|
2021-08-23 01:35:45 +02:00
|
|
|
replaceResourceUrl(md: string, urlToReplace: string, id: string) {
|
|
|
|
const linkRegex = `(?<=\\]\\()\\<?${urlToReplace}\\>?(?=.*\\))`;
|
|
|
|
const reg = new RegExp(linkRegex, 'g');
|
|
|
|
return md.replace(reg, `:/${id}`);
|
|
|
|
},
|
|
|
|
|
2021-05-19 23:22:03 +02:00
|
|
|
extractImageUrls(md: string) {
|
2022-06-20 14:56:54 +02:00
|
|
|
return markdownUtils.extractFileUrls(md, 'image');
|
|
|
|
},
|
|
|
|
|
|
|
|
extractPdfUrls(md: string) {
|
|
|
|
return markdownUtils.extractFileUrls(md, 'pdf');
|
2021-05-19 23:22:03 +02:00
|
|
|
},
|
|
|
|
|
2020-06-06 17:00:20 +02:00
|
|
|
// The match results has 5 items
|
|
|
|
// Full match array is
|
|
|
|
// [Full match, whitespace, list token, ol line number, whitespace following token]
|
2020-11-12 21:13:28 +02:00
|
|
|
olLineNumber(line: string) {
|
2020-06-06 17:00:20 +02:00
|
|
|
const match = line.match(listRegex);
|
|
|
|
return match ? Number(match[3]) : 0;
|
|
|
|
},
|
|
|
|
|
2020-11-12 21:13:28 +02:00
|
|
|
extractListToken(line: string) {
|
2020-06-06 17:00:20 +02:00
|
|
|
const match = line.match(listRegex);
|
|
|
|
return match ? match[2] : '';
|
|
|
|
},
|
|
|
|
|
2020-11-12 21:13:28 +02:00
|
|
|
isListItem(line: string) {
|
2020-06-06 17:00:20 +02:00
|
|
|
return listRegex.test(line);
|
|
|
|
},
|
|
|
|
|
2020-11-12 21:13:28 +02:00
|
|
|
isEmptyListItem(line: string) {
|
2020-06-06 17:00:20 +02:00
|
|
|
return emptyListRegex.test(line);
|
2018-06-14 09:52:12 +02:00
|
|
|
},
|
|
|
|
|
2020-11-12 21:13:28 +02:00
|
|
|
createMarkdownTable(headers: MarkdownTableHeader[], rows: MarkdownTableRow[]): string {
|
2020-03-14 01:46:14 +02:00
|
|
|
const output = [];
|
2018-09-28 22:03:28 +02:00
|
|
|
|
2021-05-27 15:24:56 +02:00
|
|
|
const minCellWidth = 5;
|
|
|
|
|
2018-09-28 22:03:28 +02:00
|
|
|
const headersMd = [];
|
|
|
|
const lineMd = [];
|
|
|
|
for (let i = 0; i < headers.length; i++) {
|
|
|
|
const h = headers[i];
|
2021-05-27 15:24:56 +02:00
|
|
|
headersMd.push(stringPadding(h.label, minCellWidth, ' ', stringPadding.RIGHT));
|
|
|
|
|
|
|
|
const justify = h.justify ? h.justify : MarkdownTableJustify.Left;
|
|
|
|
|
|
|
|
if (justify === MarkdownTableJustify.Left) {
|
|
|
|
lineMd.push('-----');
|
|
|
|
} else if (justify === MarkdownTableJustify.Center) {
|
|
|
|
lineMd.push(':---:');
|
|
|
|
} else {
|
|
|
|
lineMd.push('----:');
|
|
|
|
}
|
2018-09-28 22:03:28 +02:00
|
|
|
}
|
|
|
|
|
2021-05-27 15:24:56 +02:00
|
|
|
output.push(`| ${headersMd.join(' | ')} |`);
|
|
|
|
output.push(`| ${lineMd.join(' | ')} |`);
|
2018-09-28 22:03:28 +02:00
|
|
|
|
|
|
|
for (let i = 0; i < rows.length; i++) {
|
|
|
|
const row = rows[i];
|
|
|
|
const rowMd = [];
|
|
|
|
for (let j = 0; j < headers.length; j++) {
|
|
|
|
const h = headers[j];
|
2021-05-27 15:24:56 +02:00
|
|
|
const value = (h.filter ? h.filter(row[h.name]) : row[h.name]) || '';
|
|
|
|
const valueMd = h.disableEscape ? value : markdownUtils.escapeTableCell(value);
|
|
|
|
rowMd.push(stringPadding(valueMd, minCellWidth, ' ', stringPadding.RIGHT));
|
2018-09-28 22:03:28 +02:00
|
|
|
}
|
2021-05-27 15:24:56 +02:00
|
|
|
output.push(`| ${rowMd.join(' | ')} |`);
|
2018-09-28 22:03:28 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return output.join('\n');
|
|
|
|
},
|
2020-05-09 16:55:00 +02:00
|
|
|
|
2021-03-26 11:08:22 +02:00
|
|
|
countTableColumns(line: string) {
|
|
|
|
if (!line) return 0;
|
|
|
|
|
|
|
|
const trimmed = line.trim();
|
|
|
|
let pipes = (line.match(/\|/g) || []).length;
|
|
|
|
|
|
|
|
if (trimmed[0] === '|') { pipes -= 1; }
|
|
|
|
if (trimmed[trimmed.length - 1] === '|') { pipes -= 1; }
|
|
|
|
|
|
|
|
return pipes + 1;
|
|
|
|
},
|
|
|
|
|
|
|
|
matchingTableDivider(header: string, divider: string) {
|
|
|
|
if (!header || !divider) return false;
|
|
|
|
|
|
|
|
const invalidChars = divider.match(/[^\s\-:|]/g);
|
|
|
|
|
|
|
|
if (invalidChars) { return false; }
|
|
|
|
|
|
|
|
const columns = markdownUtils.countTableColumns(header);
|
|
|
|
const cols = markdownUtils.countTableColumns(divider);
|
|
|
|
return cols > 0 && (cols >= columns);
|
|
|
|
},
|
|
|
|
|
2020-11-12 21:13:28 +02:00
|
|
|
titleFromBody(body: string) {
|
2020-05-09 16:55:00 +02:00
|
|
|
if (!body) return '';
|
|
|
|
const mdLinkRegex = /!?\[([^\]]+?)\]\(.+?\)/g;
|
|
|
|
const emptyMdLinkRegex = /!?\[\]\((.+?)\)/g;
|
|
|
|
const filterRegex = /^[# \n\t*`-]*/;
|
|
|
|
const lines = body.trim().split('\n');
|
|
|
|
const title = lines[0].trim();
|
|
|
|
return title.replace(filterRegex, '').replace(mdLinkRegex, '$1').replace(emptyMdLinkRegex, '$1').substring(0,80);
|
|
|
|
},
|
2017-08-02 19:47:25 +02:00
|
|
|
};
|
|
|
|
|
2020-10-09 19:35:46 +02:00
|
|
|
export default markdownUtils;
|