2023-06-30 09:55:56 +01:00
|
|
|
// Search for $s and $$s in markdown and mark the regions between them as math.
|
|
|
|
//
|
|
|
|
// Text between single $s is marked as InlineMath and text between $$s is marked
|
|
|
|
// as BlockMath.
|
2022-07-22 02:44:19 -07:00
|
|
|
|
|
|
|
import { tags, Tag } from '@lezer/highlight';
|
|
|
|
import { parseMixed, SyntaxNodeRef, Input, NestedParse, ParseWrapper } from '@lezer/common';
|
|
|
|
|
|
|
|
// Extend the existing markdown parser
|
|
|
|
import {
|
|
|
|
MarkdownConfig, InlineContext,
|
|
|
|
BlockContext, Line, LeafBlock,
|
|
|
|
} from '@lezer/markdown';
|
|
|
|
|
|
|
|
// The existing stexMath parser is used to parse the text between the $s
|
|
|
|
import { stexMath } from '@codemirror/legacy-modes/mode/stex';
|
|
|
|
import { StreamLanguage } from '@codemirror/language';
|
|
|
|
|
|
|
|
const dollarSignCharcode = 36;
|
|
|
|
const backslashCharcode = 92;
|
|
|
|
|
|
|
|
// (?:[>]\s*)?: Optionally allow block math lines to start with '> '
|
|
|
|
const mathBlockStartRegex = /^(?:\s*[>]\s*)?\$\$/;
|
|
|
|
const mathBlockEndRegex = /\$\$\s*$/;
|
|
|
|
|
|
|
|
const texLanguage = StreamLanguage.define(stexMath);
|
|
|
|
export const blockMathTagName = 'BlockMath';
|
|
|
|
export const blockMathContentTagName = 'BlockMathContent';
|
|
|
|
export const inlineMathTagName = 'InlineMath';
|
|
|
|
export const inlineMathContentTagName = 'InlineMathContent';
|
|
|
|
|
|
|
|
export const mathTag = Tag.define(tags.monospace);
|
|
|
|
export const inlineMathTag = Tag.define(mathTag);
|
|
|
|
|
2023-06-30 09:55:56 +01:00
|
|
|
// Wraps a TeX math-mode parser. This removes [nodeTag] from the syntax tree
|
|
|
|
// and replaces it with a region handled by the sTeXMath parser.
|
|
|
|
//
|
|
|
|
// @param nodeTag Name of the nodes to replace with regions parsed by the sTeX parser.
|
|
|
|
// @returns a wrapped sTeX parser.
|
2022-07-22 02:44:19 -07:00
|
|
|
const wrappedTeXParser = (nodeTag: string): ParseWrapper => {
|
|
|
|
return parseMixed((node: SyntaxNodeRef, _input: Input): NestedParse => {
|
|
|
|
if (node.name !== nodeTag) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
|
|
|
return {
|
|
|
|
parser: texLanguage.parser,
|
|
|
|
};
|
|
|
|
});
|
|
|
|
};
|
|
|
|
|
|
|
|
// Markdown extension for recognizing inline code
|
|
|
|
const InlineMathConfig: MarkdownConfig = {
|
|
|
|
defineNodes: [
|
|
|
|
{
|
|
|
|
name: inlineMathTagName,
|
|
|
|
style: inlineMathTag,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: inlineMathContentTagName,
|
|
|
|
},
|
|
|
|
],
|
|
|
|
parseInline: [{
|
|
|
|
name: inlineMathTagName,
|
|
|
|
after: 'InlineCode',
|
|
|
|
|
|
|
|
parse(cx: InlineContext, current: number, pos: number): number {
|
|
|
|
const prevCharCode = pos - 1 >= 0 ? cx.char(pos - 1) : -1;
|
|
|
|
const nextCharCode = cx.char(pos + 1);
|
|
|
|
if (current !== dollarSignCharcode
|
|
|
|
|| prevCharCode === dollarSignCharcode
|
|
|
|
|| nextCharCode === dollarSignCharcode) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Don't match if there's a space directly after the '$'
|
|
|
|
if (/\s/.exec(String.fromCharCode(nextCharCode))) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
const start = pos;
|
|
|
|
const end = cx.end;
|
|
|
|
let escaped = false;
|
|
|
|
|
|
|
|
pos ++;
|
|
|
|
|
|
|
|
// Scan ahead for the next '$' symbol
|
|
|
|
for (; pos < end && (escaped || cx.char(pos) !== dollarSignCharcode); pos++) {
|
|
|
|
if (!escaped && cx.char(pos) === backslashCharcode) {
|
|
|
|
escaped = true;
|
|
|
|
} else {
|
|
|
|
escaped = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Don't match if the ending '$' is preceded by a space.
|
|
|
|
const prevChar = String.fromCharCode(cx.char(pos - 1));
|
|
|
|
if (/\s/.exec(prevChar)) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// It isn't a math region if there is no ending '$'
|
|
|
|
if (pos === end) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Advance to just after the ending '$'
|
|
|
|
pos ++;
|
|
|
|
|
|
|
|
// Add a wraping inlineMathTagName node that contains an inlineMathContentTagName.
|
|
|
|
// The inlineMathContentTagName node can thus be safely removed and the region
|
|
|
|
// will still be marked as a math region.
|
|
|
|
const contentElem = cx.elt(inlineMathContentTagName, start + 1, pos - 1);
|
|
|
|
cx.addElement(cx.elt(inlineMathTagName, start, pos, [contentElem]));
|
|
|
|
|
|
|
|
return pos + 1;
|
|
|
|
},
|
|
|
|
}],
|
|
|
|
wrap: wrappedTeXParser(inlineMathContentTagName),
|
|
|
|
};
|
|
|
|
|
|
|
|
// Extension for recognising block code
|
|
|
|
const BlockMathConfig: MarkdownConfig = {
|
|
|
|
defineNodes: [
|
|
|
|
{
|
|
|
|
name: blockMathTagName,
|
|
|
|
style: mathTag,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: blockMathContentTagName,
|
|
|
|
},
|
|
|
|
],
|
|
|
|
parseBlock: [{
|
|
|
|
name: blockMathTagName,
|
|
|
|
before: 'Blockquote',
|
|
|
|
parse(cx: BlockContext, line: Line): boolean {
|
|
|
|
const delimLen = 2;
|
|
|
|
|
|
|
|
// $$ delimiter? Start math!
|
|
|
|
const mathStartMatch = mathBlockStartRegex.exec(line.text);
|
|
|
|
if (mathStartMatch) {
|
|
|
|
const start = cx.lineStart + mathStartMatch[0].length;
|
|
|
|
let stop;
|
|
|
|
|
|
|
|
let endMatch = mathBlockEndRegex.exec(
|
2023-08-22 11:58:53 +01:00
|
|
|
line.text.substring(mathStartMatch[0].length),
|
2022-07-22 02:44:19 -07:00
|
|
|
);
|
|
|
|
|
|
|
|
// If the math region ends immediately (on the same line),
|
|
|
|
if (endMatch) {
|
|
|
|
const lineLength = line.text.length;
|
|
|
|
stop = cx.lineStart + lineLength - endMatch[0].length;
|
|
|
|
} else {
|
|
|
|
let hadNextLine = false;
|
|
|
|
|
|
|
|
// Otherwise, it's a multi-line block display.
|
|
|
|
// Consume lines until we reach the end.
|
|
|
|
do {
|
|
|
|
hadNextLine = cx.nextLine();
|
|
|
|
endMatch = hadNextLine ? mathBlockEndRegex.exec(line.text) : null;
|
|
|
|
}
|
|
|
|
while (hadNextLine && endMatch === null);
|
|
|
|
|
|
|
|
if (hadNextLine && endMatch) {
|
|
|
|
const lineLength = line.text.length;
|
|
|
|
|
|
|
|
// Remove the ending delimiter
|
|
|
|
stop = cx.lineStart + lineLength - endMatch[0].length;
|
|
|
|
} else {
|
|
|
|
stop = cx.lineStart;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
const lineEnd = cx.lineStart + line.text.length;
|
|
|
|
|
|
|
|
// Label the region. Add two labels so that one can be removed.
|
|
|
|
const contentElem = cx.elt(blockMathContentTagName, start, stop);
|
|
|
|
const containerElement = cx.elt(
|
|
|
|
blockMathTagName,
|
|
|
|
start - delimLen,
|
|
|
|
|
|
|
|
// Math blocks don't need ending delimiters, so ensure we don't
|
|
|
|
// include text that doesn't exist.
|
|
|
|
Math.min(lineEnd, stop + delimLen),
|
|
|
|
|
|
|
|
// The child of the container element should be the content element
|
2023-08-22 11:58:53 +01:00
|
|
|
[contentElem],
|
2022-07-22 02:44:19 -07:00
|
|
|
);
|
|
|
|
cx.addElement(containerElement);
|
|
|
|
|
|
|
|
// Don't re-process the ending delimiter (it may look the same
|
|
|
|
// as the starting delimiter).
|
|
|
|
cx.nextLine();
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
},
|
|
|
|
// End paragraph-like blocks
|
|
|
|
endLeaf(_cx: BlockContext, line: Line, _leaf: LeafBlock): boolean {
|
|
|
|
// Leaf blocks (e.g. block quotes) end early if math starts.
|
|
|
|
return mathBlockStartRegex.exec(line.text) !== null;
|
|
|
|
},
|
|
|
|
}],
|
|
|
|
wrap: wrappedTeXParser(blockMathContentTagName),
|
|
|
|
};
|
|
|
|
|
|
|
|
/** Markdown configuration for block and inline math support. */
|
|
|
|
export const MarkdownMathExtension: MarkdownConfig[] = [
|
|
|
|
InlineMathConfig,
|
|
|
|
BlockMathConfig,
|
|
|
|
];
|