mirror of
https://github.com/laurent22/joplin.git
synced 2025-01-11 18:24:43 +02:00
Desktop, Cli: Fixed various bugs related to the import of ENEX files as HTML
This commit is contained in:
parent
6a41d6e85a
commit
fcd00b3212
@ -89,20 +89,20 @@ describe('EnexToHtml', function() {
|
||||
}],
|
||||
});
|
||||
|
||||
it('fails when not given a matching resource', asyncTest(async () => {
|
||||
// To test the promise-unexpectedly-resolved case, add `audioResource` to the array.
|
||||
const resources = [];
|
||||
const inputFile = fileWithPath('en-media--image.enex');
|
||||
const enexInput = await shim.fsDriver().readFile(inputFile);
|
||||
const promisedOutput = enexXmlToHtml(enexInput, resources);
|
||||
// it('fails when not given a matching resource', asyncTest(async () => {
|
||||
// // To test the promise-unexpectedly-resolved case, add `audioResource` to the array.
|
||||
// const resources = [];
|
||||
// const inputFile = fileWithPath('en-media--image.enex');
|
||||
// const enexInput = await shim.fsDriver().readFile(inputFile);
|
||||
// const promisedOutput = enexXmlToHtml(enexInput, resources);
|
||||
|
||||
promisedOutput.then(() => {
|
||||
// Promise should not be resolved
|
||||
expect(false).toEqual(true);
|
||||
}, (reason) => {
|
||||
expect(reason)
|
||||
.toBe('Hash with no associated resource: 89ce7da62c6b2832929a6964237e98e9');
|
||||
});
|
||||
}));
|
||||
// promisedOutput.then(() => {
|
||||
// // Promise should not be resolved
|
||||
// expect(false).toEqual(true);
|
||||
// }, (reason) => {
|
||||
// expect(reason)
|
||||
// .toBe('Hash with no associated resource: 89ce7da62c6b2832929a6964237e98e9');
|
||||
// });
|
||||
// }));
|
||||
|
||||
});
|
||||
|
@ -2,6 +2,5 @@
|
||||
<div><a href="joplin://21ca2b948f222a38802940ec7e2e5de3" hash="21ca2b948f222a38802940ec7e2e5de3" type="application/pdf" style="cursor:pointer;" alt="attachment-1">attachment-1</a></div>
|
||||
<div>
|
||||
<br>
|
||||
<br>
|
||||
</div>
|
||||
</en-note>
|
@ -8,6 +8,5 @@
|
||||
</div>
|
||||
<div>
|
||||
<br>
|
||||
<br>
|
||||
</div>
|
||||
</en-note>
|
@ -4,11 +4,9 @@
|
||||
<div>
|
||||
<input type="checkbox" onclick="return false;">A test for <i>italic</i>
|
||||
<br>
|
||||
<br>
|
||||
</div>
|
||||
<div>
|
||||
<br>
|
||||
<br>
|
||||
</div>
|
||||
<div><i><img src=":/89ce7da62c6b2832929a6964237e98e9" hash="89ce7da62c6b2832929a6964237e98e9" type="image/jpeg" alt=""></i></div>
|
||||
</en-note>
|
@ -7,6 +7,28 @@ const htmlentities = new Entities().encode;
|
||||
const imageRegex = /<img([\s\S]*?)src=["']([\s\S]*?)["']([\s\S]*?)>/gi;
|
||||
const anchorRegex = /<a([\s\S]*?)href=["']([\s\S]*?)["']([\s\S]*?)>/gi;
|
||||
|
||||
const selfClosingElements = [
|
||||
'area',
|
||||
'base',
|
||||
'basefont',
|
||||
'br',
|
||||
'col',
|
||||
'command',
|
||||
'embed',
|
||||
'frame',
|
||||
'hr',
|
||||
'img',
|
||||
'input',
|
||||
'isindex',
|
||||
'keygen',
|
||||
'link',
|
||||
'meta',
|
||||
'param',
|
||||
'source',
|
||||
'track',
|
||||
'wbr',
|
||||
];
|
||||
|
||||
class HtmlUtils {
|
||||
headAndBodyHtml(doc) {
|
||||
const output = [];
|
||||
@ -15,6 +37,10 @@ class HtmlUtils {
|
||||
return output.join('\n');
|
||||
}
|
||||
|
||||
isSelfClosingTag(tagName) {
|
||||
return selfClosingElements.includes(tagName.toLowerCase());
|
||||
}
|
||||
|
||||
extractImageUrls(html) {
|
||||
if (!html) return [];
|
||||
|
||||
|
@ -1,6 +1,9 @@
|
||||
const stringToStream = require('string-to-stream');
|
||||
const cleanHtml = require('clean-html');
|
||||
const resourceUtils = require('lib/resourceUtils.js');
|
||||
const { isSelfClosingTag } = require('lib/htmlUtils');
|
||||
const Entities = require('html-entities').AllHtmlEntities;
|
||||
const htmlentities = new Entities().encode;
|
||||
|
||||
function addResourceTag(lines, resource, attributes) {
|
||||
// Note: refactor to use Resource.markdownTag
|
||||
@ -56,7 +59,7 @@ function enexXmlToHtml_(stream, resources) {
|
||||
}
|
||||
};
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
return new Promise((resolve) => {
|
||||
const options = {};
|
||||
const strict = false;
|
||||
const saxStream = require('sax').createStream(strict, options);
|
||||
@ -69,12 +72,11 @@ function enexXmlToHtml_(stream, resources) {
|
||||
|
||||
saxStream.on('error', function(e) {
|
||||
console.warn(e);
|
||||
// reject(e);
|
||||
});
|
||||
|
||||
|
||||
saxStream.on('text', function(text) {
|
||||
section.lines.push(text);
|
||||
section.lines.push(htmlentities(text));
|
||||
});
|
||||
|
||||
saxStream.on('opentag', function(node) {
|
||||
@ -110,7 +112,7 @@ function enexXmlToHtml_(stream, resources) {
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
reject(`Hash with no associated resource: ${hash}`);
|
||||
// console.warn(`Hash with no associated resource: ${hash}`);
|
||||
}
|
||||
}
|
||||
|
||||
@ -122,16 +124,16 @@ function enexXmlToHtml_(stream, resources) {
|
||||
}
|
||||
} else if (tagName == 'en-todo') {
|
||||
section.lines.push('<input type="checkbox" onclick="return false;" />');
|
||||
} else if (node.isSelfClosing) {
|
||||
section.lines.push(`<${tagName}${attributesStr}>`);
|
||||
} else if (isSelfClosingTag(tagName)) {
|
||||
section.lines.push(`<${tagName}${attributesStr}/>`);
|
||||
} else {
|
||||
section.lines.push(`<${tagName}${attributesStr} />`);
|
||||
section.lines.push(`<${tagName}${attributesStr}>`);
|
||||
}
|
||||
});
|
||||
|
||||
saxStream.on('closetag', function(n) {
|
||||
const tagName = n ? n.toLowerCase() : n;
|
||||
section.lines.push(`</${tagName}>`);
|
||||
saxStream.on('closetag', function(node) {
|
||||
const tagName = node ? node.toLowerCase() : node;
|
||||
if (!isSelfClosingTag(tagName)) section.lines.push(`</${tagName}>`);
|
||||
});
|
||||
|
||||
saxStream.on('attribute', function() {});
|
||||
@ -151,19 +153,19 @@ async function enexXmlToHtml(xmlString, resources, options = {}) {
|
||||
const stream = stringToStream(xmlString);
|
||||
const result = await enexXmlToHtml_(stream, resources, options);
|
||||
|
||||
try {
|
||||
const preCleaning = result.content.lines.join(''); // xmlString
|
||||
const final = await beautifyHtml(preCleaning);
|
||||
return final.join('');
|
||||
} catch (error) {
|
||||
console.warn(error);
|
||||
}
|
||||
const preCleaning = result.content.lines.join('');
|
||||
const final = await beautifyHtml(preCleaning);
|
||||
return final.join('');
|
||||
}
|
||||
|
||||
const beautifyHtml = (html) => {
|
||||
return new Promise((resolve) => {
|
||||
const options = { wrap: 0 };
|
||||
cleanHtml.clean(html, options, (...cleanedHtml) => resolve(cleanedHtml));
|
||||
try {
|
||||
cleanHtml.clean(html, { wrap: 0 }, (...cleanedHtml) => resolve(cleanedHtml));
|
||||
} catch (error) {
|
||||
console.warn(`Could not clean HTML - the "unclean" version will be used: ${error.message}: ${html.trim().substr(0, 512).replace(/[\n\r]/g, ' ')}...`);
|
||||
resolve([html]);
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
|
@ -688,7 +688,7 @@ function enexXmlToMdArray(stream, resources) {
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
console.warn(`Hash with no associated resource: ${hash}`);
|
||||
// console.warn(`Hash with no associated resource: ${hash}`);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user