mirror of
https://github.com/laurent22/joplin.git
synced 2024-12-24 10:27:10 +02:00
Desktop, Cli: Fixed various bugs related to the import of ENEX files as HTML
This commit is contained in:
parent
6a41d6e85a
commit
fcd00b3212
@ -89,20 +89,20 @@ describe('EnexToHtml', function() {
|
|||||||
}],
|
}],
|
||||||
});
|
});
|
||||||
|
|
||||||
it('fails when not given a matching resource', asyncTest(async () => {
|
// it('fails when not given a matching resource', asyncTest(async () => {
|
||||||
// To test the promise-unexpectedly-resolved case, add `audioResource` to the array.
|
// // To test the promise-unexpectedly-resolved case, add `audioResource` to the array.
|
||||||
const resources = [];
|
// const resources = [];
|
||||||
const inputFile = fileWithPath('en-media--image.enex');
|
// const inputFile = fileWithPath('en-media--image.enex');
|
||||||
const enexInput = await shim.fsDriver().readFile(inputFile);
|
// const enexInput = await shim.fsDriver().readFile(inputFile);
|
||||||
const promisedOutput = enexXmlToHtml(enexInput, resources);
|
// const promisedOutput = enexXmlToHtml(enexInput, resources);
|
||||||
|
|
||||||
promisedOutput.then(() => {
|
// promisedOutput.then(() => {
|
||||||
// Promise should not be resolved
|
// // Promise should not be resolved
|
||||||
expect(false).toEqual(true);
|
// expect(false).toEqual(true);
|
||||||
}, (reason) => {
|
// }, (reason) => {
|
||||||
expect(reason)
|
// expect(reason)
|
||||||
.toBe('Hash with no associated resource: 89ce7da62c6b2832929a6964237e98e9');
|
// .toBe('Hash with no associated resource: 89ce7da62c6b2832929a6964237e98e9');
|
||||||
});
|
// });
|
||||||
}));
|
// }));
|
||||||
|
|
||||||
});
|
});
|
||||||
|
@ -2,6 +2,5 @@
|
|||||||
<div><a href="joplin://21ca2b948f222a38802940ec7e2e5de3" hash="21ca2b948f222a38802940ec7e2e5de3" type="application/pdf" style="cursor:pointer;" alt="attachment-1">attachment-1</a></div>
|
<div><a href="joplin://21ca2b948f222a38802940ec7e2e5de3" hash="21ca2b948f222a38802940ec7e2e5de3" type="application/pdf" style="cursor:pointer;" alt="attachment-1">attachment-1</a></div>
|
||||||
<div>
|
<div>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
|
||||||
</div>
|
</div>
|
||||||
</en-note>
|
</en-note>
|
@ -8,6 +8,5 @@
|
|||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
|
||||||
</div>
|
</div>
|
||||||
</en-note>
|
</en-note>
|
@ -4,11 +4,9 @@
|
|||||||
<div>
|
<div>
|
||||||
<input type="checkbox" onclick="return false;">A test for <i>italic</i>
|
<input type="checkbox" onclick="return false;">A test for <i>italic</i>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
|
||||||
</div>
|
</div>
|
||||||
<div><i><img src=":/89ce7da62c6b2832929a6964237e98e9" hash="89ce7da62c6b2832929a6964237e98e9" type="image/jpeg" alt=""></i></div>
|
<div><i><img src=":/89ce7da62c6b2832929a6964237e98e9" hash="89ce7da62c6b2832929a6964237e98e9" type="image/jpeg" alt=""></i></div>
|
||||||
</en-note>
|
</en-note>
|
@ -7,6 +7,28 @@ const htmlentities = new Entities().encode;
|
|||||||
const imageRegex = /<img([\s\S]*?)src=["']([\s\S]*?)["']([\s\S]*?)>/gi;
|
const imageRegex = /<img([\s\S]*?)src=["']([\s\S]*?)["']([\s\S]*?)>/gi;
|
||||||
const anchorRegex = /<a([\s\S]*?)href=["']([\s\S]*?)["']([\s\S]*?)>/gi;
|
const anchorRegex = /<a([\s\S]*?)href=["']([\s\S]*?)["']([\s\S]*?)>/gi;
|
||||||
|
|
||||||
|
const selfClosingElements = [
|
||||||
|
'area',
|
||||||
|
'base',
|
||||||
|
'basefont',
|
||||||
|
'br',
|
||||||
|
'col',
|
||||||
|
'command',
|
||||||
|
'embed',
|
||||||
|
'frame',
|
||||||
|
'hr',
|
||||||
|
'img',
|
||||||
|
'input',
|
||||||
|
'isindex',
|
||||||
|
'keygen',
|
||||||
|
'link',
|
||||||
|
'meta',
|
||||||
|
'param',
|
||||||
|
'source',
|
||||||
|
'track',
|
||||||
|
'wbr',
|
||||||
|
];
|
||||||
|
|
||||||
class HtmlUtils {
|
class HtmlUtils {
|
||||||
headAndBodyHtml(doc) {
|
headAndBodyHtml(doc) {
|
||||||
const output = [];
|
const output = [];
|
||||||
@ -15,6 +37,10 @@ class HtmlUtils {
|
|||||||
return output.join('\n');
|
return output.join('\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
isSelfClosingTag(tagName) {
|
||||||
|
return selfClosingElements.includes(tagName.toLowerCase());
|
||||||
|
}
|
||||||
|
|
||||||
extractImageUrls(html) {
|
extractImageUrls(html) {
|
||||||
if (!html) return [];
|
if (!html) return [];
|
||||||
|
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
const stringToStream = require('string-to-stream');
|
const stringToStream = require('string-to-stream');
|
||||||
const cleanHtml = require('clean-html');
|
const cleanHtml = require('clean-html');
|
||||||
const resourceUtils = require('lib/resourceUtils.js');
|
const resourceUtils = require('lib/resourceUtils.js');
|
||||||
|
const { isSelfClosingTag } = require('lib/htmlUtils');
|
||||||
|
const Entities = require('html-entities').AllHtmlEntities;
|
||||||
|
const htmlentities = new Entities().encode;
|
||||||
|
|
||||||
function addResourceTag(lines, resource, attributes) {
|
function addResourceTag(lines, resource, attributes) {
|
||||||
// Note: refactor to use Resource.markdownTag
|
// Note: refactor to use Resource.markdownTag
|
||||||
@ -56,7 +59,7 @@ function enexXmlToHtml_(stream, resources) {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve) => {
|
||||||
const options = {};
|
const options = {};
|
||||||
const strict = false;
|
const strict = false;
|
||||||
const saxStream = require('sax').createStream(strict, options);
|
const saxStream = require('sax').createStream(strict, options);
|
||||||
@ -69,12 +72,11 @@ function enexXmlToHtml_(stream, resources) {
|
|||||||
|
|
||||||
saxStream.on('error', function(e) {
|
saxStream.on('error', function(e) {
|
||||||
console.warn(e);
|
console.warn(e);
|
||||||
// reject(e);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
saxStream.on('text', function(text) {
|
saxStream.on('text', function(text) {
|
||||||
section.lines.push(text);
|
section.lines.push(htmlentities(text));
|
||||||
});
|
});
|
||||||
|
|
||||||
saxStream.on('opentag', function(node) {
|
saxStream.on('opentag', function(node) {
|
||||||
@ -110,7 +112,7 @@ function enexXmlToHtml_(stream, resources) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!found) {
|
if (!found) {
|
||||||
reject(`Hash with no associated resource: ${hash}`);
|
// console.warn(`Hash with no associated resource: ${hash}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -122,16 +124,16 @@ function enexXmlToHtml_(stream, resources) {
|
|||||||
}
|
}
|
||||||
} else if (tagName == 'en-todo') {
|
} else if (tagName == 'en-todo') {
|
||||||
section.lines.push('<input type="checkbox" onclick="return false;" />');
|
section.lines.push('<input type="checkbox" onclick="return false;" />');
|
||||||
} else if (node.isSelfClosing) {
|
} else if (isSelfClosingTag(tagName)) {
|
||||||
section.lines.push(`<${tagName}${attributesStr}>`);
|
section.lines.push(`<${tagName}${attributesStr}/>`);
|
||||||
} else {
|
} else {
|
||||||
section.lines.push(`<${tagName}${attributesStr} />`);
|
section.lines.push(`<${tagName}${attributesStr}>`);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
saxStream.on('closetag', function(n) {
|
saxStream.on('closetag', function(node) {
|
||||||
const tagName = n ? n.toLowerCase() : n;
|
const tagName = node ? node.toLowerCase() : node;
|
||||||
section.lines.push(`</${tagName}>`);
|
if (!isSelfClosingTag(tagName)) section.lines.push(`</${tagName}>`);
|
||||||
});
|
});
|
||||||
|
|
||||||
saxStream.on('attribute', function() {});
|
saxStream.on('attribute', function() {});
|
||||||
@ -151,19 +153,19 @@ async function enexXmlToHtml(xmlString, resources, options = {}) {
|
|||||||
const stream = stringToStream(xmlString);
|
const stream = stringToStream(xmlString);
|
||||||
const result = await enexXmlToHtml_(stream, resources, options);
|
const result = await enexXmlToHtml_(stream, resources, options);
|
||||||
|
|
||||||
try {
|
const preCleaning = result.content.lines.join('');
|
||||||
const preCleaning = result.content.lines.join(''); // xmlString
|
const final = await beautifyHtml(preCleaning);
|
||||||
const final = await beautifyHtml(preCleaning);
|
return final.join('');
|
||||||
return final.join('');
|
|
||||||
} catch (error) {
|
|
||||||
console.warn(error);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const beautifyHtml = (html) => {
|
const beautifyHtml = (html) => {
|
||||||
return new Promise((resolve) => {
|
return new Promise((resolve) => {
|
||||||
const options = { wrap: 0 };
|
try {
|
||||||
cleanHtml.clean(html, options, (...cleanedHtml) => resolve(cleanedHtml));
|
cleanHtml.clean(html, { wrap: 0 }, (...cleanedHtml) => resolve(cleanedHtml));
|
||||||
|
} catch (error) {
|
||||||
|
console.warn(`Could not clean HTML - the "unclean" version will be used: ${error.message}: ${html.trim().substr(0, 512).replace(/[\n\r]/g, ' ')}...`);
|
||||||
|
resolve([html]);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -688,7 +688,7 @@ function enexXmlToMdArray(stream, resources) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!found) {
|
if (!found) {
|
||||||
console.warn(`Hash with no associated resource: ${hash}`);
|
// console.warn(`Hash with no associated resource: ${hash}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user