diff --git a/packages/app-cli/tests/HtmlToMd.ts b/packages/app-cli/tests/HtmlToMd.ts index db043efbe..c7e6915bb 100644 --- a/packages/app-cli/tests/HtmlToMd.ts +++ b/packages/app-cli/tests/HtmlToMd.ts @@ -84,4 +84,10 @@ describe('HtmlToMd', function() { } })); + it('should allow disabling escape', async () => { + const htmlToMd = new HtmlToMd(); + expect(htmlToMd.parse('https://test.com/1_2_3.pdf', { disableEscapeContent: true })).toBe('https://test.com/1_2_3.pdf'); + expect(htmlToMd.parse('https://test.com/1_2_3.pdf', { disableEscapeContent: false })).toBe('https://test.com/1\\_2\\_3.pdf'); + }); + }); diff --git a/packages/app-desktop/gui/NoteEditor/utils/clipboardUtils.ts b/packages/app-desktop/gui/NoteEditor/utils/clipboardUtils.ts index 995353e37..99660398f 100644 --- a/packages/app-desktop/gui/NoteEditor/utils/clipboardUtils.ts +++ b/packages/app-desktop/gui/NoteEditor/utils/clipboardUtils.ts @@ -69,8 +69,17 @@ export function htmlToClipboardData(html: string): ClipboardData { // In that case we need to set both HTML and Text context, otherwise it // won't be possible to paste the text in, for example, a text editor. // https://github.com/laurent22/joplin/issues/4788 + // + // Also we don't escape the content produced in HTML to MD conversion + // because it's not what would be expected. For example, if the content is + // `* something`, strictly speaking it would be correct to escape to `\* + // something`, however this is not what the user would expect when copying + // text. Likewise for URLs that contain "_". So the resulting Markdown might + // not be perfectly valid but would be closer to what a user would expect. + // If they want accurate MArkdown they can switch to the MD editor. + // https://github.com/laurent22/joplin/issues/5440 return { - text: htmlToMd().parse(copyableContent), + text: htmlToMd().parse(copyableContent, { disableEscapeContent: true }), html: cleanUpCodeBlocks(copyableContent), }; } diff --git a/packages/lib/HtmlToMd.ts b/packages/lib/HtmlToMd.ts index 91f19e23f..e38f0d090 100644 --- a/packages/lib/HtmlToMd.ts +++ b/packages/lib/HtmlToMd.ts @@ -6,6 +6,7 @@ export interface ParseOptions { anchorNames?: string[]; preserveImageTagsWithSize?: boolean; baseUrl?: string; + disableEscapeContent?: boolean; } export default class HtmlToMd { @@ -20,6 +21,7 @@ export default class HtmlToMd { emDelimiter: '*', strongDelimiter: '**', br: '', + disableEscapeContent: 'disableEscapeContent' in options ? options.disableEscapeContent : false, }); turndown.use(turndownPluginGfm); turndown.remove('script'); diff --git a/packages/turndown/src/turndown.js b/packages/turndown/src/turndown.js index 299fc4379..f916226e9 100644 --- a/packages/turndown/src/turndown.js +++ b/packages/turndown/src/turndown.js @@ -23,6 +23,7 @@ export default function TurndownService (options) { linkReferenceStyle: 'full', anchorNames: [], br: ' ', + disableEscapeContent: false, blankReplacement: function (content, node) { return node.isBlock ? '\n\n' : '' }, @@ -181,6 +182,8 @@ TurndownService.prototype = { */ function process (parentNode, escapeContent = 'auto') { + if (this.options.disableEscapeContent) escapeContent = false; + var self = this return reduce.call(parentNode.childNodes, function (output, node) { node = new Node(node)