1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-01-02 12:47:41 +02:00
joplin/packages/lib/HtmlToMd.ts

86 lines
3.2 KiB
TypeScript
Raw Normal View History

const TurndownService = require('@joplin/turndown');
const turndownPluginGfm = require('@joplin/turndown-plugin-gfm').gfm;
import markdownUtils from './markdownUtils';
2018-05-16 15:16:14 +02:00
const pdfUrlRegex = /[\s\S]*?\.pdf$/i;
export interface ParseOptions {
anchorNames?: string[];
preserveImageTagsWithSize?: boolean;
preserveNestedTables?: boolean;
preserveColorStyles?: boolean;
baseUrl?: string;
disableEscapeContent?: boolean;
convertEmbeddedPdfsToLinks?: boolean;
}
export default class HtmlToMd {
public parse(html: string, options: ParseOptions = {}) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
const turndownOpts: any = {
headingStyle: 'atx',
anchorNames: options.anchorNames ? options.anchorNames.map(n => n.trim().toLowerCase()) : [],
codeBlockStyle: 'fenced',
Desktop: Resolves #176: Added experimental WYSIWYG editor (#2556) * Trying to get TuiEditor to work * Tests with TinyMCE * Fixed build * Improved asset loading * Added support for Joplin source blocks * Added support for Joplin source blocks * Better integration * Make sure noteDidUpdate event is always dispatched at the right time * Minor tweaks * Fixed tests * Add support for checkboxes * Minor refactoring * Added support for file attachments * Add support for fenced code blocks * Fix new line issue on code block * Added support for Fountain scripts * Refactoring * Better handling of saving and loading notes * Fix saving and loading ntoes * Handle multi-note selection and fixed new note creation issue * Fixed newline issue in test * Fixed newline issue in test * Improve saving and loading * Improve saving and loading note * Removed undeeded prop * Fixed issue when new note being saved is incorrectly reloaded * Refactoring and improve saving of note when unmounting component * Fixed TypeScript error * Small changes * Improved further handling of saving and loading notes * Handle provisional notes and fixed various saving and loading bugs * Adding back support for HTML notes * Added support for HTML notes * Better handling of editable nodes * Preserve image HTML tag when the size is set * Handle switching between editor when the note has note finished saving * Handle templates * Handle templates * Handle loading note that is being saved * Handle note being reloaded via sync * Clean up * Clean up and improved logging * Fixed TS error * Fixed a few issues * Fixed test * Logging * Various improvements * Add blockquote support * Moved CWD operation to shim * Removed deleted files * Added support for Joplin commands
2020-03-10 01:24:57 +02:00
preserveImageTagsWithSize: !!options.preserveImageTagsWithSize,
preserveNestedTables: !!options.preserveNestedTables,
preserveColorStyles: !!options.preserveColorStyles,
bulletListMarker: '-',
emDelimiter: '*',
strongDelimiter: '**',
allowResourcePlaceholders: true,
// If soft-breaks are enabled, lines need to end with two or more spaces for
// trailing <br/>s to render. See
// https://github.com/laurent22/joplin/issues/8430
br: ' ',
disableEscapeContent: 'disableEscapeContent' in options ? options.disableEscapeContent : false,
};
if (options.convertEmbeddedPdfsToLinks) {
// Turndown ignores empty <object> tags, so we need to handle this case separately
// https://github.com/mixmark-io/turndown/issues/293#issuecomment-588984202
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
turndownOpts.blankReplacement = (content: string, node: any) => {
if (node.matches('object')) {
return pdfRule.replacement(content, node, {});
}
if (node.isCode) {
// Fix: Web clipper has trouble with code blocks on Joplin's website.
// See https://github.com/laurent22/joplin/pull/10126#issuecomment-2016523281 .
// If isCode, blank keep empty
// test case: packages/app-cli/tests/html_to_md/code_multiline_3.html
return '';
}
return '\n\n';
};
}
const turndown = new TurndownService(turndownOpts);
2019-07-29 15:43:53 +02:00
turndown.use(turndownPluginGfm);
turndown.remove('script');
turndown.remove('style');
const pdfRule = {
filter: ['embed', 'object'],
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied
replacement: function(_content: string, node: any, _options: any) {
// We are setting embedded_pdf as name so that we can later distinguish them from normal links and create resources for them.
if (node.matches('embed') && node.getAttribute('src') && pdfUrlRegex.test(node.getAttribute('src'))) {
return `[embedded_pdf](${node.getAttribute('src')})`;
} else if (node.matches('object') && node.getAttribute('data') && pdfUrlRegex.test(node.getAttribute('data'))) {
return `[embedded_pdf](${node.getAttribute('data')})`;
}
return '';
},
};
if (options.convertEmbeddedPdfsToLinks) {
turndown.addRule('pdf', pdfRule);
}
2019-07-29 15:43:53 +02:00
let md = turndown.turndown(html);
if (options.baseUrl) md = markdownUtils.prependBaseUrl(md, options.baseUrl);
return md;
2018-05-16 15:16:14 +02:00
}
}