From 21897a3cd4ae7fb4277d2b7fead6f2a72d9b92ca Mon Sep 17 00:00:00 2001 From: Laurent Cozic Date: Sat, 22 Jun 2019 18:57:41 +0100 Subject: [PATCH] Clipper: Resolves #1669: Handle special case of code block used on Microsoft website --- CliClient/package-lock.json | 62 ++++++++++++++++++- CliClient/package.json | 2 +- CliClient/tests/html_to_md/code_1.md | 12 ++-- CliClient/tests/html_to_md/code_2.html | 2 + CliClient/tests/html_to_md/code_2.md | 5 ++ .../html_to_md/text_with_escaped_html.md | 4 +- .../content_scripts/index.js | 17 +++++ ElectronClient/app/package.json | 2 +- ReactNativeClient/lib/HtmlToMd.js | 1 + 9 files changed, 96 insertions(+), 11 deletions(-) create mode 100644 CliClient/tests/html_to_md/code_2.html create mode 100644 CliClient/tests/html_to_md/code_2.md diff --git a/CliClient/package-lock.json b/CliClient/package-lock.json index ff783047e..3dcc6a5f9 100644 --- a/CliClient/package-lock.json +++ b/CliClient/package-lock.json @@ -165,6 +165,11 @@ "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", "integrity": "sha1-x57Zf380y48robyXkLzDZkdLS3k=" }, + "atob": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/atob/-/atob-2.1.2.tgz", + "integrity": "sha512-Wm6ukoaOGJi/73p/cl2GvLjTI5JM1k/O14isD73YML8StrH/7/lRFgmg8nICZgD3bZZvjwCGxtMOD3wWNAu8cg==" + }, "aws-sign2": { "version": "0.7.0", "resolved": "https://registry.npmjs.org/aws-sign2/-/aws-sign2-0.7.0.tgz", @@ -453,6 +458,24 @@ } } }, + "css": { + "version": "2.2.4", + "resolved": "https://registry.npmjs.org/css/-/css-2.2.4.tgz", + "integrity": "sha512-oUnjmWpy0niI3x/mPL8dVEI1l7MnG3+HHyRPHf+YFSbK+svOhXpmSOcDURUh2aOCgl2grzrOPt1nHLuCVFULLw==", + "requires": { + "inherits": "^2.0.3", + "source-map": "^0.6.1", + "source-map-resolve": "^0.5.2", + "urix": "^0.1.0" + }, + "dependencies": { + "source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==" + } + } + }, "cssom": { "version": "0.3.6", "resolved": "https://registry.npmjs.org/cssom/-/cssom-0.3.6.tgz", @@ -517,6 +540,11 @@ "ms": "2.0.0" } }, + "decode-uri-component": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/decode-uri-component/-/decode-uri-component-0.2.0.tgz", + "integrity": "sha1-6zkTMzRYd1y4TNGh+uBiEGu4dUU=" + }, "decompress-response": { "version": "3.3.0", "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-3.3.0.tgz", @@ -1437,10 +1465,11 @@ "dev": true }, "joplin-turndown": { - "version": "4.0.12", - "resolved": "https://registry.npmjs.org/joplin-turndown/-/joplin-turndown-4.0.12.tgz", - "integrity": "sha512-HlxkcIiNFSMLBvYktoXqLLHFGuwQYlcPclo0Peeatw3cPe6iFqSsEgEGY/0bYM/fubA/zpPULrJcjST99BO9wQ==", + "version": "4.0.15", + "resolved": "https://registry.npmjs.org/joplin-turndown/-/joplin-turndown-4.0.15.tgz", + "integrity": "sha512-68ukx19XFbKtJ5hfPfPX6IDLFZ1+NI+CpxJZyDEXAN5rPkyGXDw9xnEfo1IYRd+fq56upjo5Fn7J1hTCQTVTIA==", "requires": { + "css": "^2.2.4", "html-entities": "^1.2.1", "jsdom": "^11.9.0" } @@ -2441,6 +2470,11 @@ "resolved": "https://registry.npmjs.org/requires-port/-/requires-port-1.0.0.tgz", "integrity": "sha1-kl0mAdOaxIXgkc8NpcbmlNw9yv8=" }, + "resolve-url": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/resolve-url/-/resolve-url-0.2.1.tgz", + "integrity": "sha1-LGN/53yJOv0qZj/iGqkIAGjiBSo=" + }, "retry": { "version": "0.10.1", "resolved": "https://registry.npmjs.org/retry/-/retry-0.10.1.tgz", @@ -2598,6 +2632,23 @@ "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.5.7.tgz", "integrity": "sha1-igOdLRAh0i0eoUyA2OpGi6LvP8w=" }, + "source-map-resolve": { + "version": "0.5.2", + "resolved": "https://registry.npmjs.org/source-map-resolve/-/source-map-resolve-0.5.2.tgz", + "integrity": "sha512-MjqsvNwyz1s0k81Goz/9vRBe9SZdB09Bdw+/zYyO+3CuPk6fouTaxscHkgtE8jKvf01kVfl8riHzERQ/kefaSA==", + "requires": { + "atob": "^2.1.1", + "decode-uri-component": "^0.2.0", + "resolve-url": "^0.2.1", + "source-map-url": "^0.4.0", + "urix": "^0.1.0" + } + }, + "source-map-url": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/source-map-url/-/source-map-url-0.4.0.tgz", + "integrity": "sha1-PpNdfd1zYxuXZZlW1VEo6HtQhKM=" + }, "split-skip": { "version": "0.0.2", "resolved": "https://registry.npmjs.org/split-skip/-/split-skip-0.0.2.tgz", @@ -3114,6 +3165,11 @@ } } }, + "urix": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/urix/-/urix-0.1.0.tgz", + "integrity": "sha1-2pN/emLiH+wf0Y1Js1wpNQZ6bHI=" + }, "url-parse": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.2.0.tgz", diff --git a/CliClient/package.json b/CliClient/package.json index b44d9c070..951b040f5 100644 --- a/CliClient/package.json +++ b/CliClient/package.json @@ -43,7 +43,7 @@ "html-minifier": "^3.5.15", "image-data-uri": "^2.0.0", "image-type": "^3.0.0", - "joplin-turndown": "^4.0.12", + "joplin-turndown": "^4.0.15", "joplin-turndown-plugin-gfm": "^1.0.8", "jssha": "^2.3.0", "levenshtein": "^1.0.5", diff --git a/CliClient/tests/html_to_md/code_1.md b/CliClient/tests/html_to_md/code_1.md index 884a01512..9a9bf67bd 100644 --- a/CliClient/tests/html_to_md/code_1.md +++ b/CliClient/tests/html_to_md/code_1.md @@ -1,5 +1,7 @@ - def ma_fonction(): - """ - C'est une super fonction - """ - pass \ No newline at end of file +``` +def ma_fonction(): + """ + C'est une super fonction + """ + pass +``` \ No newline at end of file diff --git a/CliClient/tests/html_to_md/code_2.html b/CliClient/tests/html_to_md/code_2.html new file mode 100644 index 000000000..33185c93b --- /dev/null +++ b/CliClient/tests/html_to_md/code_2.html @@ -0,0 +1,2 @@ +
thatsCode();
+
thatsJustPre(); // In that case we do not have enough info to know if it is a codeblock or not, so we leave it as plain text
\ No newline at end of file diff --git a/CliClient/tests/html_to_md/code_2.md b/CliClient/tests/html_to_md/code_2.md new file mode 100644 index 000000000..fecab46b7 --- /dev/null +++ b/CliClient/tests/html_to_md/code_2.md @@ -0,0 +1,5 @@ +``` +thatsCode(); +``` + +thatsJustPre(); // In that case we do not have enough info to know if it is a codeblock or not, so we leave it as plain text \ No newline at end of file diff --git a/CliClient/tests/html_to_md/text_with_escaped_html.md b/CliClient/tests/html_to_md/text_with_escaped_html.md index 89d3719be..ee09050dc 100644 --- a/CliClient/tests/html_to_md/text_with_escaped_html.md +++ b/CliClient/tests/html_to_md/text_with_escaped_html.md @@ -6,4 +6,6 @@ Some text, not an image, so it should remain escaped: But this is code so it can be unescaped: - \ No newline at end of file +``` + +``` \ No newline at end of file diff --git a/Clipper/joplin-webclipper/content_scripts/index.js b/Clipper/joplin-webclipper/content_scripts/index.js index 3056d935b..5cf942210 100644 --- a/Clipper/joplin-webclipper/content_scripts/index.js +++ b/Clipper/joplin-webclipper/content_scripts/index.js @@ -114,6 +114,21 @@ } } + // This sets the PRE elements computed style to the style attribute, so that + // the info can be exported and later processed by the htmlToMd converter + // to detect code blocks. + function hardcodePreStyles(doc) { + const preElements = doc.getElementsByTagName('pre'); + + for (const preElement of preElements) { + const fontFamily = getComputedStyle(preElement).getPropertyValue('font-family'); + const fontFamilyArray = fontFamily.split(',').map(f => f.toLowerCase().trim()); + if (fontFamilyArray.indexOf('monospace') >= 0) { + preElement.style.fontFamily = fontFamily; + } + } + } + function documentForReadability() { // Readability directly change the passed document so clone it so as // to preserve the original web page. @@ -180,6 +195,7 @@ } else if (command.name === "completePageHtml") { + hardcodePreStyles(document); const cleanDocument = document.body.cloneNode(true); const imageSizes = getImageSizes(document, true); cleanUpElement(cleanDocument, imageSizes); @@ -187,6 +203,7 @@ } else if (command.name === "selectedHtml") { + hardcodePreStyles(document); const range = window.getSelection().getRangeAt(0); const container = document.createElement('div'); container.appendChild(range.cloneContents()); diff --git a/ElectronClient/app/package.json b/ElectronClient/app/package.json index 3ef24872f..dd061ff1c 100644 --- a/ElectronClient/app/package.json +++ b/ElectronClient/app/package.json @@ -101,7 +101,7 @@ "highlight.js": "^9.15.6", "html-entities": "^1.2.1", "image-type": "^3.0.0", - "joplin-turndown": "^4.0.12", + "joplin-turndown": "^4.0.15", "joplin-turndown-plugin-gfm": "^1.0.8", "jssha": "^2.3.1", "katex": "^0.10.0", diff --git a/ReactNativeClient/lib/HtmlToMd.js b/ReactNativeClient/lib/HtmlToMd.js index 37c2ba7e7..4d915ddc4 100644 --- a/ReactNativeClient/lib/HtmlToMd.js +++ b/ReactNativeClient/lib/HtmlToMd.js @@ -8,6 +8,7 @@ class HtmlToMd { const turndown = new TurndownService({ headingStyle: 'atx', anchorNames: options.anchorNames ? options.anchorNames.map(n => n.trim().toLowerCase()) : [], + codeBlockStyle: 'fenced', }) turndown.use(turndownPluginGfm) turndown.remove('script');