joplin/packages/utils/markdown.ts

/* eslint-disable import/prefer-default-export */

import * as MarkdownIt from 'markdown-it';
import { Link } from './types';

// enable file link URLs in MarkdownIt. Keeps other URL restrictions of
// MarkdownIt untouched. Format [link name](file://...)
const validateLinks = (url: string) => {
	const BAD_PROTO_RE = /^(vbscript|javascript|data):/;
	const GOOD_DATA_RE = /^data:image\/(gif|png|jpeg|webp);/;

	// url should be normalized at this point, and existing entities are decoded
	const str = url.trim().toLowerCase();

	if (str.indexOf('data:image/svg+xml,') === 0) {
		return true;
	}

	return BAD_PROTO_RE.test(str) ? (!!GOOD_DATA_RE.test(str)) : true;
};

// Note that the title is stripped of any Markdown code. So `title with
// **bold**` will become `title with bold`. Links are extracted both from
// Markdown and from HTML links.
export const extractUrls = (md: string): Link[] => {
	const markdownIt = new MarkdownIt();
	markdownIt.validateLink = validateLinks; // Necessary to support file:/// links

	const env = {};
	const tokens = markdownIt.parse(md, env);
	const output: Link[] = [];

	const searchUrls = (tokens: MarkdownIt.Token[], currentLink: Link|null) => {
		for (let i = 0; i < tokens.length; i++) {
			const token = tokens[i];
			if (token.type === 'link_open') {
				currentLink = {
					title: '',
					url: token.attrGet('href') || '',
				};
			} else if (token.type === 'link_close') {
				if (!currentLink) throw new Error('Found a link_close without a link_open');
				output.push(currentLink);
				currentLink = null;
			} else if (token.children && token.children.length) {
				searchUrls(token.children, currentLink);
			} else if (token.type === 'text' && currentLink) {
				currentLink.title += token.content;
			}
		}
	};

	searchUrls(tokens, null);

	// Definitely won't work in all cases but for our particular use case,
	// processing Markdown generated from ENEX documents, that should be enough.
	const htmlAnchorRegex = /<a[\s\S]*?href=["'](.*?)["'][\s\S]*?>(.*?)<\/a>/ig;
	let result;
	while ((result = htmlAnchorRegex.exec(md)) !== null) {
		output.push({
			url: result[1],
			title: result[2],
		});
	}

	return output;
};
Desktop, Cli: Resolves #9596: Restore note links after importing an ENEX file 2023-12-26 13:19:29 +02:00			`/* eslint-disable import/prefer-default-export */`

			`import * as MarkdownIt from 'markdown-it';`
			`import { Link } from './types';`

			`// enable file link URLs in MarkdownIt. Keeps other URL restrictions of`
			`// MarkdownIt untouched. Format [link name](file://...)`
			`const validateLinks = (url: string) => {`
			`const BAD_PROTO_RE = /^(vbscript\|javascript\|data):/;`
			`const GOOD_DATA_RE = /^data:image\/(gif\|png\|jpeg\|webp);/;`

			`// url should be normalized at this point, and existing entities are decoded`
			`const str = url.trim().toLowerCase();`

			`if (str.indexOf('data:image/svg+xml,') === 0) {`
			`return true;`
			`}`

			`return BAD_PROTO_RE.test(str) ? (!!GOOD_DATA_RE.test(str)) : true;`
			`};`

			// Note that the title is stripped of any Markdown code. So `title with
			// bold` will become `title with bold`. Links are extracted both from
			`// Markdown and from HTML links.`
			`export const extractUrls = (md: string): Link[] => {`
			`const markdownIt = new MarkdownIt();`
			`markdownIt.validateLink = validateLinks; // Necessary to support file:/// links`

			`const env = {};`
			`const tokens = markdownIt.parse(md, env);`
			`const output: Link[] = [];`

			`const searchUrls = (tokens: MarkdownIt.Token[], currentLink: Link\|null) => {`
			`for (let i = 0; i < tokens.length; i++) {`
			`const token = tokens[i];`
			`if (token.type === 'link_open') {`
			`currentLink = {`
			`title: '',`
			`url: token.attrGet('href') \|\| '',`
			`};`
			`} else if (token.type === 'link_close') {`
			`if (!currentLink) throw new Error('Found a link_close without a link_open');`
			`output.push(currentLink);`
			`currentLink = null;`
			`} else if (token.children && token.children.length) {`
			`searchUrls(token.children, currentLink);`
			`} else if (token.type === 'text' && currentLink) {`
			`currentLink.title += token.content;`
			`}`
			`}`
			`};`

			`searchUrls(tokens, null);`

			`// Definitely won't work in all cases but for our particular use case,`
			`// processing Markdown generated from ENEX documents, that should be enough.`
			`const htmlAnchorRegex = /<a[\s\S]?href=["'](.?)["'][\s\S]?>(.?)<\/a>/ig;`
			`let result;`
			`while ((result = htmlAnchorRegex.exec(md)) !== null) {`
			`output.push({`
			`url: result[1],`
			`title: result[2],`
			`});`
			`}`

			`return output;`
			`};`