1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-01-23 18:53:36 +02:00

Desktop, Cli: Fixed various bugs related to the import of ENEX files as HTML

This commit is contained in:
Laurent Cozic 2020-06-15 17:10:51 +01:00
parent 6a41d6e85a
commit fcd00b3212
7 changed files with 62 additions and 38 deletions

View File

@ -89,20 +89,20 @@ describe('EnexToHtml', function() {
}],
});
it('fails when not given a matching resource', asyncTest(async () => {
// To test the promise-unexpectedly-resolved case, add `audioResource` to the array.
const resources = [];
const inputFile = fileWithPath('en-media--image.enex');
const enexInput = await shim.fsDriver().readFile(inputFile);
const promisedOutput = enexXmlToHtml(enexInput, resources);
// it('fails when not given a matching resource', asyncTest(async () => {
// // To test the promise-unexpectedly-resolved case, add `audioResource` to the array.
// const resources = [];
// const inputFile = fileWithPath('en-media--image.enex');
// const enexInput = await shim.fsDriver().readFile(inputFile);
// const promisedOutput = enexXmlToHtml(enexInput, resources);
promisedOutput.then(() => {
// Promise should not be resolved
expect(false).toEqual(true);
}, (reason) => {
expect(reason)
.toBe('Hash with no associated resource: 89ce7da62c6b2832929a6964237e98e9');
});
}));
// promisedOutput.then(() => {
// // Promise should not be resolved
// expect(false).toEqual(true);
// }, (reason) => {
// expect(reason)
// .toBe('Hash with no associated resource: 89ce7da62c6b2832929a6964237e98e9');
// });
// }));
});

View File

@ -2,6 +2,5 @@
<div><a href="joplin://21ca2b948f222a38802940ec7e2e5de3" hash="21ca2b948f222a38802940ec7e2e5de3" type="application/pdf" style="cursor:pointer;" alt="attachment-1">attachment-1</a></div>
<div>
<br>
<br>
</div>
</en-note>

View File

@ -8,6 +8,5 @@
</div>
<div>
<br>
<br>
</div>
</en-note>

View File

@ -4,11 +4,9 @@
<div>
<input type="checkbox" onclick="return false;">A test for <i>italic</i>
<br>
<br>
</div>
<div>
<br>
<br>
</div>
<div><i><img src=":/89ce7da62c6b2832929a6964237e98e9" hash="89ce7da62c6b2832929a6964237e98e9" type="image/jpeg" alt=""></i></div>
</en-note>

View File

@ -7,6 +7,28 @@ const htmlentities = new Entities().encode;
const imageRegex = /<img([\s\S]*?)src=["']([\s\S]*?)["']([\s\S]*?)>/gi;
const anchorRegex = /<a([\s\S]*?)href=["']([\s\S]*?)["']([\s\S]*?)>/gi;
const selfClosingElements = [
'area',
'base',
'basefont',
'br',
'col',
'command',
'embed',
'frame',
'hr',
'img',
'input',
'isindex',
'keygen',
'link',
'meta',
'param',
'source',
'track',
'wbr',
];
class HtmlUtils {
headAndBodyHtml(doc) {
const output = [];
@ -15,6 +37,10 @@ class HtmlUtils {
return output.join('\n');
}
isSelfClosingTag(tagName) {
return selfClosingElements.includes(tagName.toLowerCase());
}
extractImageUrls(html) {
if (!html) return [];

View File

@ -1,6 +1,9 @@
const stringToStream = require('string-to-stream');
const cleanHtml = require('clean-html');
const resourceUtils = require('lib/resourceUtils.js');
const { isSelfClosingTag } = require('lib/htmlUtils');
const Entities = require('html-entities').AllHtmlEntities;
const htmlentities = new Entities().encode;
function addResourceTag(lines, resource, attributes) {
// Note: refactor to use Resource.markdownTag
@ -56,7 +59,7 @@ function enexXmlToHtml_(stream, resources) {
}
};
return new Promise((resolve, reject) => {
return new Promise((resolve) => {
const options = {};
const strict = false;
const saxStream = require('sax').createStream(strict, options);
@ -69,12 +72,11 @@ function enexXmlToHtml_(stream, resources) {
saxStream.on('error', function(e) {
console.warn(e);
// reject(e);
});
saxStream.on('text', function(text) {
section.lines.push(text);
section.lines.push(htmlentities(text));
});
saxStream.on('opentag', function(node) {
@ -110,7 +112,7 @@ function enexXmlToHtml_(stream, resources) {
}
if (!found) {
reject(`Hash with no associated resource: ${hash}`);
// console.warn(`Hash with no associated resource: ${hash}`);
}
}
@ -122,16 +124,16 @@ function enexXmlToHtml_(stream, resources) {
}
} else if (tagName == 'en-todo') {
section.lines.push('<input type="checkbox" onclick="return false;" />');
} else if (node.isSelfClosing) {
section.lines.push(`<${tagName}${attributesStr}>`);
} else if (isSelfClosingTag(tagName)) {
section.lines.push(`<${tagName}${attributesStr}/>`);
} else {
section.lines.push(`<${tagName}${attributesStr} />`);
section.lines.push(`<${tagName}${attributesStr}>`);
}
});
saxStream.on('closetag', function(n) {
const tagName = n ? n.toLowerCase() : n;
section.lines.push(`</${tagName}>`);
saxStream.on('closetag', function(node) {
const tagName = node ? node.toLowerCase() : node;
if (!isSelfClosingTag(tagName)) section.lines.push(`</${tagName}>`);
});
saxStream.on('attribute', function() {});
@ -151,19 +153,19 @@ async function enexXmlToHtml(xmlString, resources, options = {}) {
const stream = stringToStream(xmlString);
const result = await enexXmlToHtml_(stream, resources, options);
try {
const preCleaning = result.content.lines.join(''); // xmlString
const final = await beautifyHtml(preCleaning);
return final.join('');
} catch (error) {
console.warn(error);
}
const preCleaning = result.content.lines.join('');
const final = await beautifyHtml(preCleaning);
return final.join('');
}
const beautifyHtml = (html) => {
return new Promise((resolve) => {
const options = { wrap: 0 };
cleanHtml.clean(html, options, (...cleanedHtml) => resolve(cleanedHtml));
try {
cleanHtml.clean(html, { wrap: 0 }, (...cleanedHtml) => resolve(cleanedHtml));
} catch (error) {
console.warn(`Could not clean HTML - the "unclean" version will be used: ${error.message}: ${html.trim().substr(0, 512).replace(/[\n\r]/g, ' ')}...`);
resolve([html]);
}
});
};

View File

@ -688,7 +688,7 @@ function enexXmlToMdArray(stream, resources) {
}
if (!found) {
console.warn(`Hash with no associated resource: ${hash}`);
// console.warn(`Hash with no associated resource: ${hash}`);
}
}