joplin/packages/editor/CodeMirror/markdown/markdownMathParser.ts

// Search for $s and $$s in markdown and mark the regions between them as math.
//
// Text between single $s is marked as InlineMath and text between $$s is marked
// as BlockMath.

import { tags, Tag } from '@lezer/highlight';
import { parseMixed, SyntaxNodeRef, Input, NestedParse, ParseWrapper } from '@lezer/common';

// Extend the existing markdown parser
import {
	MarkdownConfig, InlineContext,
	BlockContext, Line, LeafBlock,
} from '@lezer/markdown';

// The existing stexMath parser is used to parse the text between the $s
import { stexMath } from '@codemirror/legacy-modes/mode/stex';
import { StreamLanguage } from '@codemirror/language';

const dollarSignCharcode = 36;
const backslashCharcode = 92;

// (?:[>]\s*)?: Optionally allow block math lines to start with '> '
const mathBlockStartRegex = /^(?:\s*[>]\s*)?\$\$/;
const mathBlockEndRegex = /\$\$\s*$/;

const texLanguage = StreamLanguage.define(stexMath);
export const blockMathTagName = 'BlockMath';
export const blockMathContentTagName = 'BlockMathContent';
export const inlineMathTagName = 'InlineMath';
export const inlineMathContentTagName = 'InlineMathContent';

export const mathTag = Tag.define(tags.monospace);
export const inlineMathTag = Tag.define(mathTag);

// Wraps a TeX math-mode parser. This removes [nodeTag] from the syntax tree
// and replaces it with a region handled by the sTeXMath parser.
//
// @param nodeTag Name of the nodes to replace with regions parsed by the sTeX parser.
// @returns a wrapped sTeX parser.
const wrappedTeXParser = (nodeTag: string): ParseWrapper => {
	return parseMixed((node: SyntaxNodeRef, _input: Input): NestedParse => {
		if (node.name !== nodeTag) {
			return null;
		}

		return {
			parser: texLanguage.parser,
		};
	});
};

// Markdown extension for recognizing inline code
const InlineMathConfig: MarkdownConfig = {
	defineNodes: [
		{
			name: inlineMathTagName,
			style: inlineMathTag,
		},
		{
			name: inlineMathContentTagName,
		},
	],
	parseInline: [{
		name: inlineMathTagName,
		after: 'InlineCode',

		parse(cx: InlineContext, current: number, pos: number): number {
			const prevCharCode = pos - 1 >= 0 ? cx.char(pos - 1) : -1;
			const nextCharCode = cx.char(pos + 1);
			if (current !== dollarSignCharcode
					|| prevCharCode === dollarSignCharcode
					|| nextCharCode === dollarSignCharcode) {
				return -1;
			}

			// Don't match if there's a space directly after the '$'
			if (/\s/.exec(String.fromCharCode(nextCharCode))) {
				return -1;
			}

			const start = pos;
			const end = cx.end;
			let escaped = false;

			pos ++;

			// Scan ahead for the next '$' symbol
			for (; pos < end && (escaped || cx.char(pos) !== dollarSignCharcode); pos++) {
				if (!escaped && cx.char(pos) === backslashCharcode) {
					escaped = true;
				} else {
					escaped = false;
				}
			}

			// Don't match if the ending '$' is preceded by a space.
			const prevChar = String.fromCharCode(cx.char(pos - 1));
			if (/\s/.exec(prevChar)) {
				return -1;
			}

			// It isn't a math region if there is no ending '$'
			if (pos === end) {
				return -1;
			}

			// Advance to just after the ending '$'
			pos ++;

			// Add a wraping inlineMathTagName node that contains an inlineMathContentTagName.
			// The inlineMathContentTagName node can thus be safely removed and the region
			// will still be marked as a math region.
			const contentElem = cx.elt(inlineMathContentTagName, start + 1, pos - 1);
			cx.addElement(cx.elt(inlineMathTagName, start, pos, [contentElem]));

			return pos + 1;
		},
	}],
	wrap: wrappedTeXParser(inlineMathContentTagName),
};

// Extension for recognising block code
const BlockMathConfig: MarkdownConfig = {
	defineNodes: [
		{
			name: blockMathTagName,
			style: mathTag,
		},
		{
			name: blockMathContentTagName,
		},
	],
	parseBlock: [{
		name: blockMathTagName,
		before: 'Blockquote',
		parse(cx: BlockContext, line: Line): boolean {
			const delimLen = 2;

			// $$ delimiter? Start math!
			const mathStartMatch = mathBlockStartRegex.exec(line.text);
			if (mathStartMatch) {
				const start = cx.lineStart + mathStartMatch[0].length;
				let stop;

				let endMatch = mathBlockEndRegex.exec(
					line.text.substring(mathStartMatch[0].length),
				);

				// If the math region ends immediately (on the same line),
				if (endMatch) {
					const lineLength = line.text.length;
					stop = cx.lineStart + lineLength - endMatch[0].length;
				} else {
					let hadNextLine = false;

					// Otherwise, it's a multi-line block display.
					// Consume lines until we reach the end.
					do {
						hadNextLine = cx.nextLine();
						endMatch = hadNextLine ? mathBlockEndRegex.exec(line.text) : null;
					}
					while (hadNextLine && endMatch === null);

					if (hadNextLine && endMatch) {
						const lineLength = line.text.length;

						// Remove the ending delimiter
						stop = cx.lineStart + lineLength - endMatch[0].length;
					} else {
						stop = cx.lineStart;
					}
				}
				const lineEnd = cx.lineStart + line.text.length;

				// Label the region. Add two labels so that one can be removed.
				const contentElem = cx.elt(blockMathContentTagName, start, stop);
				const containerElement = cx.elt(
					blockMathTagName,
					start - delimLen,

					// Math blocks don't need ending delimiters, so ensure we don't
					// include text that doesn't exist.
					Math.min(lineEnd, stop + delimLen),

					// The child of the container element should be the content element
					[contentElem],
				);
				cx.addElement(containerElement);

				// Don't re-process the ending delimiter (it may look the same
				// as the starting delimiter).
				cx.nextLine();

				return true;
			}

			return false;
		},
		// End paragraph-like blocks
		endLeaf(_cx: BlockContext, line: Line, _leaf: LeafBlock): boolean {
			// Leaf blocks (e.g. block quotes) end early if math starts.
			return mathBlockStartRegex.exec(line.text) !== null;
		},
	}],
	wrap: wrappedTeXParser(blockMathContentTagName),
};

/** Markdown configuration for block and inline math support. */
export const MarkdownMathExtension: MarkdownConfig[] = [
	InlineMathConfig,
	BlockMathConfig,
];
Tools: Apply eslint rule multiline-comment-style with checkJSDoc option 2023-06-30 10:55:56 +02:00			`// Search for $s and $$s in markdown and mark the regions between them as math.`
			`//`
			`// Text between single $s is marked as InlineMath and text between $$s is marked`
			`// as BlockMath.`
Chore: Set up repository for testing/preparation for mobile markdown toolbar PR (#6650) 2022-07-22 11:44:19 +02:00
			`import { tags, Tag } from '@lezer/highlight';`
			`import { parseMixed, SyntaxNodeRef, Input, NestedParse, ParseWrapper } from '@lezer/common';`

			`// Extend the existing markdown parser`
			`import {`
			`MarkdownConfig, InlineContext,`
			`BlockContext, Line, LeafBlock,`
			`} from '@lezer/markdown';`

			`// The existing stexMath parser is used to parse the text between the $s`
			`import { stexMath } from '@codemirror/legacy-modes/mode/stex';`
			`import { StreamLanguage } from '@codemirror/language';`

			`const dollarSignCharcode = 36;`
			`const backslashCharcode = 92;`

			`// (?:[>]\s*)?: Optionally allow block math lines to start with '> '`
			`const mathBlockStartRegex = /^(?:\s[>]\s)?\$\$/;`
			`const mathBlockEndRegex = /\$\$\s*$/;`

			`const texLanguage = StreamLanguage.define(stexMath);`
			`export const blockMathTagName = 'BlockMath';`
			`export const blockMathContentTagName = 'BlockMathContent';`
			`export const inlineMathTagName = 'InlineMath';`
			`export const inlineMathContentTagName = 'InlineMathContent';`

			`export const mathTag = Tag.define(tags.monospace);`
			`export const inlineMathTag = Tag.define(mathTag);`

Tools: Apply eslint rule multiline-comment-style with checkJSDoc option 2023-06-30 10:55:56 +02:00			`// Wraps a TeX math-mode parser. This removes [nodeTag] from the syntax tree`
			`// and replaces it with a region handled by the sTeXMath parser.`
			`//`
			`// @param nodeTag Name of the nodes to replace with regions parsed by the sTeX parser.`
			`// @returns a wrapped sTeX parser.`
Chore: Set up repository for testing/preparation for mobile markdown toolbar PR (#6650) 2022-07-22 11:44:19 +02:00			`const wrappedTeXParser = (nodeTag: string): ParseWrapper => {`
			`return parseMixed((node: SyntaxNodeRef, _input: Input): NestedParse => {`
			`if (node.name !== nodeTag) {`
			`return null;`
			`}`

			`return {`
			`parser: texLanguage.parser,`
			`};`
			`});`
			`};`

			`// Markdown extension for recognizing inline code`
			`const InlineMathConfig: MarkdownConfig = {`
			`defineNodes: [`
			`{`
			`name: inlineMathTagName,`
			`style: inlineMathTag,`
			`},`
			`{`
			`name: inlineMathContentTagName,`
			`},`
			`],`
			`parseInline: [{`
			`name: inlineMathTagName,`
			`after: 'InlineCode',`

			`parse(cx: InlineContext, current: number, pos: number): number {`
			`const prevCharCode = pos - 1 >= 0 ? cx.char(pos - 1) : -1;`
			`const nextCharCode = cx.char(pos + 1);`
			`if (current !== dollarSignCharcode`
			`\|\| prevCharCode === dollarSignCharcode`
			`\|\| nextCharCode === dollarSignCharcode) {`
			`return -1;`
			`}`

			`// Don't match if there's a space directly after the '$'`
			`if (/\s/.exec(String.fromCharCode(nextCharCode))) {`
			`return -1;`
			`}`

			`const start = pos;`
			`const end = cx.end;`
			`let escaped = false;`

			`pos ++;`

			`// Scan ahead for the next '$' symbol`
			`for (; pos < end && (escaped \|\| cx.char(pos) !== dollarSignCharcode); pos++) {`
			`if (!escaped && cx.char(pos) === backslashCharcode) {`
			`escaped = true;`
			`} else {`
			`escaped = false;`
			`}`
			`}`

			`// Don't match if the ending '$' is preceded by a space.`
			`const prevChar = String.fromCharCode(cx.char(pos - 1));`
			`if (/\s/.exec(prevChar)) {`
			`return -1;`
			`}`

			`// It isn't a math region if there is no ending '$'`
			`if (pos === end) {`
			`return -1;`
			`}`

			`// Advance to just after the ending '$'`
			`pos ++;`

			`// Add a wraping inlineMathTagName node that contains an inlineMathContentTagName.`
			`// The inlineMathContentTagName node can thus be safely removed and the region`
			`// will still be marked as a math region.`
			`const contentElem = cx.elt(inlineMathContentTagName, start + 1, pos - 1);`
			`cx.addElement(cx.elt(inlineMathTagName, start, pos, [contentElem]));`

			`return pos + 1;`
			`},`
			`}],`
			`wrap: wrappedTeXParser(inlineMathContentTagName),`
			`};`

			`// Extension for recognising block code`
			`const BlockMathConfig: MarkdownConfig = {`
			`defineNodes: [`
			`{`
			`name: blockMathTagName,`
			`style: mathTag,`
			`},`
			`{`
			`name: blockMathContentTagName,`
			`},`
			`],`
			`parseBlock: [{`
			`name: blockMathTagName,`
			`before: 'Blockquote',`
			`parse(cx: BlockContext, line: Line): boolean {`
			`const delimLen = 2;`

			`// $$ delimiter? Start math!`
			`const mathStartMatch = mathBlockStartRegex.exec(line.text);`
			`if (mathStartMatch) {`
			`const start = cx.lineStart + mathStartMatch[0].length;`
			`let stop;`

			`let endMatch = mathBlockEndRegex.exec(`
Tools: Enable eslint rule comma-dangle: always-multiline for functions 2023-08-22 12:58:53 +02:00			`line.text.substring(mathStartMatch[0].length),`
Chore: Set up repository for testing/preparation for mobile markdown toolbar PR (#6650) 2022-07-22 11:44:19 +02:00			`);`

			`// If the math region ends immediately (on the same line),`
			`if (endMatch) {`
			`const lineLength = line.text.length;`
			`stop = cx.lineStart + lineLength - endMatch[0].length;`
			`} else {`
			`let hadNextLine = false;`

			`// Otherwise, it's a multi-line block display.`
			`// Consume lines until we reach the end.`
			`do {`
			`hadNextLine = cx.nextLine();`
			`endMatch = hadNextLine ? mathBlockEndRegex.exec(line.text) : null;`
			`}`
			`while (hadNextLine && endMatch === null);`

			`if (hadNextLine && endMatch) {`
			`const lineLength = line.text.length;`

			`// Remove the ending delimiter`
			`stop = cx.lineStart + lineLength - endMatch[0].length;`
			`} else {`
			`stop = cx.lineStart;`
			`}`
			`}`
			`const lineEnd = cx.lineStart + line.text.length;`

			`// Label the region. Add two labels so that one can be removed.`
			`const contentElem = cx.elt(blockMathContentTagName, start, stop);`
			`const containerElement = cx.elt(`
			`blockMathTagName,`
			`start - delimLen,`

			`// Math blocks don't need ending delimiters, so ensure we don't`
			`// include text that doesn't exist.`
			`Math.min(lineEnd, stop + delimLen),`

			`// The child of the container element should be the content element`
Tools: Enable eslint rule comma-dangle: always-multiline for functions 2023-08-22 12:58:53 +02:00			`[contentElem],`
Chore: Set up repository for testing/preparation for mobile markdown toolbar PR (#6650) 2022-07-22 11:44:19 +02:00			`);`
			`cx.addElement(containerElement);`

			`// Don't re-process the ending delimiter (it may look the same`
			`// as the starting delimiter).`
			`cx.nextLine();`

			`return true;`
			`}`

			`return false;`
			`},`
			`// End paragraph-like blocks`
			`endLeaf(_cx: BlockContext, line: Line, _leaf: LeafBlock): boolean {`
			`// Leaf blocks (e.g. block quotes) end early if math starts.`
			`return mathBlockStartRegex.exec(line.text) !== null;`
			`},`
			`}],`
			`wrap: wrappedTeXParser(blockMathContentTagName),`
			`};`

			`/** Markdown configuration for block and inline math support. */`
			`export const MarkdownMathExtension: MarkdownConfig[] = [`
			`InlineMathConfig,`
			`BlockMathConfig,`
			`];`