1
0
mirror of https://github.com/laurent22/joplin.git synced 2024-12-24 10:27:10 +02:00

Desktop, Cli: Fixed various bugs related to the import of ENEX files as HTML

This commit is contained in:
Laurent Cozic 2020-06-15 17:10:51 +01:00
parent 6a41d6e85a
commit fcd00b3212
7 changed files with 62 additions and 38 deletions

View File

@ -89,20 +89,20 @@ describe('EnexToHtml', function() {
}], }],
}); });
it('fails when not given a matching resource', asyncTest(async () => { // it('fails when not given a matching resource', asyncTest(async () => {
// To test the promise-unexpectedly-resolved case, add `audioResource` to the array. // // To test the promise-unexpectedly-resolved case, add `audioResource` to the array.
const resources = []; // const resources = [];
const inputFile = fileWithPath('en-media--image.enex'); // const inputFile = fileWithPath('en-media--image.enex');
const enexInput = await shim.fsDriver().readFile(inputFile); // const enexInput = await shim.fsDriver().readFile(inputFile);
const promisedOutput = enexXmlToHtml(enexInput, resources); // const promisedOutput = enexXmlToHtml(enexInput, resources);
promisedOutput.then(() => { // promisedOutput.then(() => {
// Promise should not be resolved // // Promise should not be resolved
expect(false).toEqual(true); // expect(false).toEqual(true);
}, (reason) => { // }, (reason) => {
expect(reason) // expect(reason)
.toBe('Hash with no associated resource: 89ce7da62c6b2832929a6964237e98e9'); // .toBe('Hash with no associated resource: 89ce7da62c6b2832929a6964237e98e9');
}); // });
})); // }));
}); });

View File

@ -2,6 +2,5 @@
<div><a href="joplin://21ca2b948f222a38802940ec7e2e5de3" hash="21ca2b948f222a38802940ec7e2e5de3" type="application/pdf" style="cursor:pointer;" alt="attachment-1">attachment-1</a></div> <div><a href="joplin://21ca2b948f222a38802940ec7e2e5de3" hash="21ca2b948f222a38802940ec7e2e5de3" type="application/pdf" style="cursor:pointer;" alt="attachment-1">attachment-1</a></div>
<div> <div>
<br> <br>
<br>
</div> </div>
</en-note> </en-note>

View File

@ -8,6 +8,5 @@
</div> </div>
<div> <div>
<br> <br>
<br>
</div> </div>
</en-note> </en-note>

View File

@ -4,11 +4,9 @@
<div> <div>
<input type="checkbox" onclick="return false;">A test for <i>italic</i> <input type="checkbox" onclick="return false;">A test for <i>italic</i>
<br> <br>
<br>
</div> </div>
<div> <div>
<br> <br>
<br>
</div> </div>
<div><i><img src=":/89ce7da62c6b2832929a6964237e98e9" hash="89ce7da62c6b2832929a6964237e98e9" type="image/jpeg" alt=""></i></div> <div><i><img src=":/89ce7da62c6b2832929a6964237e98e9" hash="89ce7da62c6b2832929a6964237e98e9" type="image/jpeg" alt=""></i></div>
</en-note> </en-note>

View File

@ -7,6 +7,28 @@ const htmlentities = new Entities().encode;
const imageRegex = /<img([\s\S]*?)src=["']([\s\S]*?)["']([\s\S]*?)>/gi; const imageRegex = /<img([\s\S]*?)src=["']([\s\S]*?)["']([\s\S]*?)>/gi;
const anchorRegex = /<a([\s\S]*?)href=["']([\s\S]*?)["']([\s\S]*?)>/gi; const anchorRegex = /<a([\s\S]*?)href=["']([\s\S]*?)["']([\s\S]*?)>/gi;
const selfClosingElements = [
'area',
'base',
'basefont',
'br',
'col',
'command',
'embed',
'frame',
'hr',
'img',
'input',
'isindex',
'keygen',
'link',
'meta',
'param',
'source',
'track',
'wbr',
];
class HtmlUtils { class HtmlUtils {
headAndBodyHtml(doc) { headAndBodyHtml(doc) {
const output = []; const output = [];
@ -15,6 +37,10 @@ class HtmlUtils {
return output.join('\n'); return output.join('\n');
} }
isSelfClosingTag(tagName) {
return selfClosingElements.includes(tagName.toLowerCase());
}
extractImageUrls(html) { extractImageUrls(html) {
if (!html) return []; if (!html) return [];

View File

@ -1,6 +1,9 @@
const stringToStream = require('string-to-stream'); const stringToStream = require('string-to-stream');
const cleanHtml = require('clean-html'); const cleanHtml = require('clean-html');
const resourceUtils = require('lib/resourceUtils.js'); const resourceUtils = require('lib/resourceUtils.js');
const { isSelfClosingTag } = require('lib/htmlUtils');
const Entities = require('html-entities').AllHtmlEntities;
const htmlentities = new Entities().encode;
function addResourceTag(lines, resource, attributes) { function addResourceTag(lines, resource, attributes) {
// Note: refactor to use Resource.markdownTag // Note: refactor to use Resource.markdownTag
@ -56,7 +59,7 @@ function enexXmlToHtml_(stream, resources) {
} }
}; };
return new Promise((resolve, reject) => { return new Promise((resolve) => {
const options = {}; const options = {};
const strict = false; const strict = false;
const saxStream = require('sax').createStream(strict, options); const saxStream = require('sax').createStream(strict, options);
@ -69,12 +72,11 @@ function enexXmlToHtml_(stream, resources) {
saxStream.on('error', function(e) { saxStream.on('error', function(e) {
console.warn(e); console.warn(e);
// reject(e);
}); });
saxStream.on('text', function(text) { saxStream.on('text', function(text) {
section.lines.push(text); section.lines.push(htmlentities(text));
}); });
saxStream.on('opentag', function(node) { saxStream.on('opentag', function(node) {
@ -110,7 +112,7 @@ function enexXmlToHtml_(stream, resources) {
} }
if (!found) { if (!found) {
reject(`Hash with no associated resource: ${hash}`); // console.warn(`Hash with no associated resource: ${hash}`);
} }
} }
@ -122,16 +124,16 @@ function enexXmlToHtml_(stream, resources) {
} }
} else if (tagName == 'en-todo') { } else if (tagName == 'en-todo') {
section.lines.push('<input type="checkbox" onclick="return false;" />'); section.lines.push('<input type="checkbox" onclick="return false;" />');
} else if (node.isSelfClosing) { } else if (isSelfClosingTag(tagName)) {
section.lines.push(`<${tagName}${attributesStr}>`); section.lines.push(`<${tagName}${attributesStr}/>`);
} else { } else {
section.lines.push(`<${tagName}${attributesStr} />`); section.lines.push(`<${tagName}${attributesStr}>`);
} }
}); });
saxStream.on('closetag', function(n) { saxStream.on('closetag', function(node) {
const tagName = n ? n.toLowerCase() : n; const tagName = node ? node.toLowerCase() : node;
section.lines.push(`</${tagName}>`); if (!isSelfClosingTag(tagName)) section.lines.push(`</${tagName}>`);
}); });
saxStream.on('attribute', function() {}); saxStream.on('attribute', function() {});
@ -151,19 +153,19 @@ async function enexXmlToHtml(xmlString, resources, options = {}) {
const stream = stringToStream(xmlString); const stream = stringToStream(xmlString);
const result = await enexXmlToHtml_(stream, resources, options); const result = await enexXmlToHtml_(stream, resources, options);
try { const preCleaning = result.content.lines.join('');
const preCleaning = result.content.lines.join(''); // xmlString const final = await beautifyHtml(preCleaning);
const final = await beautifyHtml(preCleaning); return final.join('');
return final.join('');
} catch (error) {
console.warn(error);
}
} }
const beautifyHtml = (html) => { const beautifyHtml = (html) => {
return new Promise((resolve) => { return new Promise((resolve) => {
const options = { wrap: 0 }; try {
cleanHtml.clean(html, options, (...cleanedHtml) => resolve(cleanedHtml)); cleanHtml.clean(html, { wrap: 0 }, (...cleanedHtml) => resolve(cleanedHtml));
} catch (error) {
console.warn(`Could not clean HTML - the "unclean" version will be used: ${error.message}: ${html.trim().substr(0, 512).replace(/[\n\r]/g, ' ')}...`);
resolve([html]);
}
}); });
}; };

View File

@ -688,7 +688,7 @@ function enexXmlToMdArray(stream, resources) {
} }
if (!found) { if (!found) {
console.warn(`Hash with no associated resource: ${hash}`); // console.warn(`Hash with no associated resource: ${hash}`);
} }
} }