From fcd00b32125744636fb1c5b9c4a3b71cf2520edc Mon Sep 17 00:00:00 2001 From: Laurent Cozic Date: Mon, 15 Jun 2020 17:10:51 +0100 Subject: [PATCH] Desktop, Cli: Fixed various bugs related to the import of ENEX files as HTML --- CliClient/tests/EnexToHtml.js | 28 ++++++------- CliClient/tests/enex_to_html/attachment.html | 1 - .../tests/enex_to_html/en-media--audio.html | 1 - .../tests/enex_to_html/en-media--image.html | 2 - ReactNativeClient/lib/htmlUtils.js | 26 ++++++++++++ ReactNativeClient/lib/import-enex-html-gen.js | 40 ++++++++++--------- ReactNativeClient/lib/import-enex-md-gen.js | 2 +- 7 files changed, 62 insertions(+), 38 deletions(-) diff --git a/CliClient/tests/EnexToHtml.js b/CliClient/tests/EnexToHtml.js index 513676d83..d9d28f69e 100644 --- a/CliClient/tests/EnexToHtml.js +++ b/CliClient/tests/EnexToHtml.js @@ -89,20 +89,20 @@ describe('EnexToHtml', function() { }], }); - it('fails when not given a matching resource', asyncTest(async () => { - // To test the promise-unexpectedly-resolved case, add `audioResource` to the array. - const resources = []; - const inputFile = fileWithPath('en-media--image.enex'); - const enexInput = await shim.fsDriver().readFile(inputFile); - const promisedOutput = enexXmlToHtml(enexInput, resources); + // it('fails when not given a matching resource', asyncTest(async () => { + // // To test the promise-unexpectedly-resolved case, add `audioResource` to the array. + // const resources = []; + // const inputFile = fileWithPath('en-media--image.enex'); + // const enexInput = await shim.fsDriver().readFile(inputFile); + // const promisedOutput = enexXmlToHtml(enexInput, resources); - promisedOutput.then(() => { - // Promise should not be resolved - expect(false).toEqual(true); - }, (reason) => { - expect(reason) - .toBe('Hash with no associated resource: 89ce7da62c6b2832929a6964237e98e9'); - }); - })); + // promisedOutput.then(() => { + // // Promise should not be resolved + // expect(false).toEqual(true); + // }, (reason) => { + // expect(reason) + // .toBe('Hash with no associated resource: 89ce7da62c6b2832929a6964237e98e9'); + // }); + // })); }); diff --git a/CliClient/tests/enex_to_html/attachment.html b/CliClient/tests/enex_to_html/attachment.html index f3c9b5887..b54a0a822 100644 --- a/CliClient/tests/enex_to_html/attachment.html +++ b/CliClient/tests/enex_to_html/attachment.html @@ -2,6 +2,5 @@
attachment-1

-
\ No newline at end of file diff --git a/CliClient/tests/enex_to_html/en-media--audio.html b/CliClient/tests/enex_to_html/en-media--audio.html index feefcfab1..dbfe0fafa 100644 --- a/CliClient/tests/enex_to_html/en-media--audio.html +++ b/CliClient/tests/enex_to_html/en-media--audio.html @@ -8,6 +8,5 @@

-
\ No newline at end of file diff --git a/CliClient/tests/enex_to_html/en-media--image.html b/CliClient/tests/enex_to_html/en-media--image.html index f3c09555e..0e929dc98 100644 --- a/CliClient/tests/enex_to_html/en-media--image.html +++ b/CliClient/tests/enex_to_html/en-media--image.html @@ -4,11 +4,9 @@
A test for italic
-

-
\ No newline at end of file diff --git a/ReactNativeClient/lib/htmlUtils.js b/ReactNativeClient/lib/htmlUtils.js index a7ff8037a..08297d544 100644 --- a/ReactNativeClient/lib/htmlUtils.js +++ b/ReactNativeClient/lib/htmlUtils.js @@ -7,6 +7,28 @@ const htmlentities = new Entities().encode; const imageRegex = //gi; const anchorRegex = //gi; +const selfClosingElements = [ + 'area', + 'base', + 'basefont', + 'br', + 'col', + 'command', + 'embed', + 'frame', + 'hr', + 'img', + 'input', + 'isindex', + 'keygen', + 'link', + 'meta', + 'param', + 'source', + 'track', + 'wbr', +]; + class HtmlUtils { headAndBodyHtml(doc) { const output = []; @@ -15,6 +37,10 @@ class HtmlUtils { return output.join('\n'); } + isSelfClosingTag(tagName) { + return selfClosingElements.includes(tagName.toLowerCase()); + } + extractImageUrls(html) { if (!html) return []; diff --git a/ReactNativeClient/lib/import-enex-html-gen.js b/ReactNativeClient/lib/import-enex-html-gen.js index 93b675d9e..d86cf9dbc 100644 --- a/ReactNativeClient/lib/import-enex-html-gen.js +++ b/ReactNativeClient/lib/import-enex-html-gen.js @@ -1,6 +1,9 @@ const stringToStream = require('string-to-stream'); const cleanHtml = require('clean-html'); const resourceUtils = require('lib/resourceUtils.js'); +const { isSelfClosingTag } = require('lib/htmlUtils'); +const Entities = require('html-entities').AllHtmlEntities; +const htmlentities = new Entities().encode; function addResourceTag(lines, resource, attributes) { // Note: refactor to use Resource.markdownTag @@ -56,7 +59,7 @@ function enexXmlToHtml_(stream, resources) { } }; - return new Promise((resolve, reject) => { + return new Promise((resolve) => { const options = {}; const strict = false; const saxStream = require('sax').createStream(strict, options); @@ -69,12 +72,11 @@ function enexXmlToHtml_(stream, resources) { saxStream.on('error', function(e) { console.warn(e); - // reject(e); }); saxStream.on('text', function(text) { - section.lines.push(text); + section.lines.push(htmlentities(text)); }); saxStream.on('opentag', function(node) { @@ -110,7 +112,7 @@ function enexXmlToHtml_(stream, resources) { } if (!found) { - reject(`Hash with no associated resource: ${hash}`); + // console.warn(`Hash with no associated resource: ${hash}`); } } @@ -122,16 +124,16 @@ function enexXmlToHtml_(stream, resources) { } } else if (tagName == 'en-todo') { section.lines.push(''); - } else if (node.isSelfClosing) { - section.lines.push(`<${tagName}${attributesStr}>`); + } else if (isSelfClosingTag(tagName)) { + section.lines.push(`<${tagName}${attributesStr}/>`); } else { - section.lines.push(`<${tagName}${attributesStr} />`); + section.lines.push(`<${tagName}${attributesStr}>`); } }); - saxStream.on('closetag', function(n) { - const tagName = n ? n.toLowerCase() : n; - section.lines.push(``); + saxStream.on('closetag', function(node) { + const tagName = node ? node.toLowerCase() : node; + if (!isSelfClosingTag(tagName)) section.lines.push(``); }); saxStream.on('attribute', function() {}); @@ -151,19 +153,19 @@ async function enexXmlToHtml(xmlString, resources, options = {}) { const stream = stringToStream(xmlString); const result = await enexXmlToHtml_(stream, resources, options); - try { - const preCleaning = result.content.lines.join(''); // xmlString - const final = await beautifyHtml(preCleaning); - return final.join(''); - } catch (error) { - console.warn(error); - } + const preCleaning = result.content.lines.join(''); + const final = await beautifyHtml(preCleaning); + return final.join(''); } const beautifyHtml = (html) => { return new Promise((resolve) => { - const options = { wrap: 0 }; - cleanHtml.clean(html, options, (...cleanedHtml) => resolve(cleanedHtml)); + try { + cleanHtml.clean(html, { wrap: 0 }, (...cleanedHtml) => resolve(cleanedHtml)); + } catch (error) { + console.warn(`Could not clean HTML - the "unclean" version will be used: ${error.message}: ${html.trim().substr(0, 512).replace(/[\n\r]/g, ' ')}...`); + resolve([html]); + } }); }; diff --git a/ReactNativeClient/lib/import-enex-md-gen.js b/ReactNativeClient/lib/import-enex-md-gen.js index 8fb91f22c..99402f97c 100644 --- a/ReactNativeClient/lib/import-enex-md-gen.js +++ b/ReactNativeClient/lib/import-enex-md-gen.js @@ -688,7 +688,7 @@ function enexXmlToMdArray(stream, resources) { } if (!found) { - console.warn(`Hash with no associated resource: ${hash}`); + // console.warn(`Hash with no associated resource: ${hash}`); } }