1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-10-31 00:07:48 +02:00

Desktop, Cli: Fixed importing certain Evernote images that have invalid dimension attributes (#13472)

This commit is contained in:
Laurent Cozic
2025-10-18 09:17:22 +01:00
committed by GitHub
parent 0d457d1bde
commit 561716efea
17 changed files with 119 additions and 147 deletions

View File

@@ -0,0 +1,8 @@
<en-note>
<div>
<en-media style="--en-viewerProps:{};" type="image/jpeg" hash="e2d4887c5a32ab1686276c7c5ae733ef" width="1.125in" />
</div>
<div>
<br />
</div>
</en-note>

View File

@@ -0,0 +1,8 @@
<en-note>
<div>
<img src=":/e2d4887c5a32ab1686276c7c5ae733ef" style="--en-viewerProps:{};" type="image/jpeg" hash="e2d4887c5a32ab1686276c7c5ae733ef" width="108" alt="attachment-image" />
</div>
<div>
<br/>
</div>
</en-note>

View File

@@ -1,6 +1,8 @@
<en-note>
<div><a href=":/21ca2b948f222a38802940ec7e2e5de3" hash="21ca2b948f222a38802940ec7e2e5de3" type="application/pdf" style="cursor:pointer;" alt="attachment-1">attachment-1</a></div>
<div>
<br>
<a href=':/21ca2b948f222a38802940ec7e2e5de3' hash="21ca2b948f222a38802940ec7e2e5de3" type="application/pdf" style="cursor:pointer;" alt="attachment-1"> attachment-1</a>
</div>
<div>
<br/>
</div>
</en-note>

View File

@@ -1,16 +1,11 @@
<en-note>
<div>
<p>For example, consider an exported Evernote list with todo checkboxes like this:</p>
<ul>
<li>
<div><input checked="checked" type="checkbox" onclick="return false;">Foo</div>
</li>
<li>
<div><input type="checkbox" onclick="return false;"><b>Bar</b></div>
</li>
<li>
<div><input type="checkbox" onclick="return false;"><i>Baz</i></div>
</li>
<li><div><input checked="checked" type="checkbox" onclick="return false;" />Foo</div></li>
<li><div><input type="checkbox" onclick="return false;" /><b>Bar</b></div></li>
<li><div><input type="checkbox" onclick="return false;" /><i>Baz</i></div></li>
</ul>
</div>
</en-note>

View File

@@ -1,19 +1,11 @@
<en-note>
<div>
<p>In Evernote a checklist is not the same as a list with checkboxes.</p>
<ul style="--en-todo:true;">
<li style="--en-checked:false;">
<input type="checkbox" onclick="return false;">
<div>One</div>
</li>
<li style="--en-checked:true;">
<input checked="checked" type="checkbox" onclick="return false;">
<div>Two</div>
</li>
<li style="--en-checked:false;">
<input type="checkbox" onclick="return false;">
<div>Three</div>
</li>
<ul STYLE="--en-todo:true;">
<li STYLE="--en-checked:false;"> <input type="checkbox" onclick="return false;" /><div>One</div></li>
<li STYLE="--en-checked:true;"> <input checked="checked" type="checkbox" onclick="return false;" /><div>Two</div>
</li><li STYLE="--en-checked:false;"> <input type="checkbox" onclick="return false;" /><div>Three</div></li>
</ul>
</div>
</en-note>

View File

@@ -1,12 +1 @@
<en-note>
<div>
<audio controls="" preload="none" style="width:480px;">
<source src=":/9168ee833d03c5ea7c730ac6673978c1" type="audio/mp4">
<p>Your browser does not support HTML5 audio.</p>
</audio>
<p><a href=":/9168ee833d03c5ea7c730ac6673978c1">audio test</a></p>
</div>
<div>
<br>
</div>
</en-note>
<en-note><div><audio controls preload="none" style="width:480px;"><source src=":/9168ee833d03c5ea7c730ac6673978c1" type="audio/mp4" /><p>Your browser does not support HTML5 audio.</p></audio><p><a href=":/9168ee833d03c5ea7c730ac6673978c1">audio test</a></p></div><div><br/></div></en-note>

View File

@@ -1,12 +1 @@
<en-note>
<div><input type="checkbox" onclick="return false;">This is a test</div>
<div><input type="checkbox" onclick="return false;">A test for <span style="font-weight: bold;">bold</span></div>
<div>
<input type="checkbox" onclick="return false;">A test for <i>italic</i>
<br>
</div>
<div>
<br>
</div>
<div><i><img src=":/89ce7da62c6b2832929a6964237e98e9" hash="89ce7da62c6b2832929a6964237e98e9" type="image/jpeg" alt=""></i></div>
</en-note>
<en-note><div><input type="checkbox" onclick="return false;" />This is a test</div><div><input type="checkbox" onclick="return false;" />A test for <span STYLE="font-weight: bold;">bold</span></div><div><input type="checkbox" onclick="return false;" />A test for <i>italic</i><br/></div><div><br/></div><div><i><img src=":/89ce7da62c6b2832929a6964237e98e9" hash="89ce7da62c6b2832929a6964237e98e9" type="image/jpeg" alt="" /></i></div></en-note>

View File

@@ -1,3 +1,3 @@
<en-note>
<h1 style="box-sizing:inherit;font-family:&quot;Guardian TextSans Web&quot;, &quot;Helvetica Neue&quot;, Helvetica, Arial, sans-serif;margin-top:0.2em;margin-bottom:0.35em;font-size:2.125em;font-weight:600;line-height:1.3;">Association Between mRNA Vaccination and COVID-19 Hospitalization and Disease Severity</h1>
<h1 STYLE="box-sizing:inherit;font-family:&quot;Guardian TextSans Web&quot;, &quot;Helvetica Neue&quot;, Helvetica, Arial, sans-serif;margin-top:0.2em;margin-bottom:0.35em;font-size:2.125em;font-weight:600;line-height:1.3;">Association Between mRNA Vaccination and COVID-19 Hospitalization and Disease Severity</h1>
</en-note>

View File

@@ -1,3 +1,5 @@
<en-note>
<div><img style="margin:0px;padding:0px;outline:0px;width:74px;height:36px;position:absolute;bottom:-5px;left:0px;transform:translate(0px, 100%);stroke-dasharray:90;transition:stroke-dashoffset 0.5s cubic-bezier(0.97, 0.16, 0.62, 0.76) 0s;stroke-dashoffset:0;" src="data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' data-evernote-id='97' class='js-evernote-checked'%3e%3cuse xlink:href='https://wordminds.com/wp-content/themes/wordminds/assets/img/hint_left.svg%23hint_left' data-evernote-id='98' class='js-evernote-checked'%3e%3c/use%3e%3c/svg%3e"></div>
<div>
<img STYLE="margin:0px;padding:0px;outline:0px;width:74px;height:36px;position:absolute;bottom:-5px;left:0px;transform:translate(0px, 100%);stroke-dasharray:90;transition:stroke-dashoffset 0.5s cubic-bezier(0.97, 0.16, 0.62, 0.76) 0s;stroke-dashoffset:0;" SRC="data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' data-evernote-id='97' class='js-evernote-checked'%3e%3cuse xlink:href='https://wordminds.com/wp-content/themes/wordminds/assets/img/hint_left.svg%23hint_left' data-evernote-id='98' class='js-evernote-checked'%3e%3c/use%3e%3c/svg%3e"/>
</div>
</en-note>

View File

@@ -38,4 +38,4 @@
"repository": "https://github.com/laurent22/generator-joplin",
"license": "AGPL-3.0-or-later",
"private": true
}
}

View File

@@ -1,12 +1,14 @@
const stringToStream = require('string-to-stream');
// const cleanHtml = require('clean-html');
const resourceUtils = require('./resourceUtils.js');
const { cssValue } = require('./import-enex-md-gen');
const htmlUtils = require('./htmlUtils').default;
const Entities = require('html-entities').AllHtmlEntities;
const { fixAttributes } = require('@joplin/utils/html');
const htmlentities = new Entities().encode;
function addResourceTag(lines, resource, attributes) {
attributes = fixAttributes(attributes);
// Note: refactor to use Resource.markdownTag
if (!attributes.alt) attributes.alt = resource.title;
if (!attributes.alt) attributes.alt = resource.filename;
@@ -137,7 +139,7 @@ function enexXmlToHtml_(stream, resources) {
saxStream.on('closetag', (node) => {
const tagName = node ? node.toLowerCase() : node;
if (!htmlUtils.isSelfClosingTag(tagName)) section.lines.push(`</${tagName}>`);
if (!htmlUtils.isSelfClosingTag(tagName) && tagName !== 'en-media' && tagName !== 'en-todo') section.lines.push(`</${tagName}>`);
});
saxStream.on('attribute', () => {});

View File

@@ -1,7 +1,6 @@
const { setupDatabaseAndSynchronizer, switchClient, supportDir } = require('./testing/test-utils.js');
const shim = require('./shim').default;
const { enexXmlToHtml } = require('./import-enex-html-gen.js');
const cleanHtml = require('clean-html');
const fileWithPath = (filename) =>
`${supportDir}/../enex_to_html/${filename}`;
@@ -14,20 +13,6 @@ const audioResource = {
title: 'audio test',
};
// All the test HTML files are beautified ones, so we need to run
// this before the comparison. Before, beautifying was done by `enexXmlToHtml`
// but that was removed due to problems with the clean-html package.
const beautifyHtml = (html) => {
return new Promise((resolve) => {
try {
cleanHtml.clean(html, { wrap: 0 }, (...cleanedHtml) => resolve(cleanedHtml.join('')));
} catch (error) {
console.warn(`Could not clean HTML - the "unclean" version will be used: ${error.message}: ${html.trim().substr(0, 512).replace(/[\n\r]/g, ' ')}...`);
resolve([html].join(''));
}
});
};
// Tests the importer for a single note, checking that the result of
// processing the given `.enex` input file matches the contents of the given
// `.html` file.
@@ -51,7 +36,7 @@ const compareOutputToExpected = (options) => {
it(testTitle, (async () => {
const enexInput = await shim.fsDriver().readFile(inputFile);
const expectedOutput = await shim.fsDriver().readFile(outputFile);
const actualOutput = await beautifyHtml(await enexXmlToHtml(enexInput, options.resources));
const actualOutput = (await enexXmlToHtml(enexInput, options.resources)).trim();
expect(actualOutput).toEqual(expectedOutput);
}));
};
@@ -100,6 +85,16 @@ describe('EnexToHtml', () => {
}],
});
compareOutputToExpected({
testName: 'attachment-image',
resources: [{
filename: 'attachment-image',
id: 'e2d4887c5a32ab1686276c7c5ae733ef',
mime: 'image/jpeg', // Any non-image/non-audio mime type will do
width: '1.125in',
}],
});
compareOutputToExpected({
testName: 'quoted-attributes',
});

View File

@@ -29,7 +29,6 @@
"@types/node-rsa": "1.1.4",
"@types/react": "18.3.23",
"@types/uuid": "10.0.0",
"clean-html": "1.5.0",
"jest": "29.7.0",
"jest-expect-message": "1.1.3",
"jsdom": "26.1.0",

View File

@@ -69,7 +69,7 @@ const audioElement = ({ src, alt, id }) =>
` ${alt || src || id || 'Download audio'}`,
' </a>',
'</p>',
].join('');
].map(s => s.trim()).join('');
const resourceUtils = {
imgElement,

View File

@@ -82,3 +82,69 @@ export const extractUrls = (html: string) => {
return output;
};
const parseDimensionAttribute = (value: string) => {
const regex = /^([0-9]*\.?[0-9]+)\s*(in|cm|mm|pt|pc|px)?$/i;
const m = value.trim().match(regex);
if (!m) return null;
const num = parseFloat(m[1]);
const unit = m[2]?.toLowerCase() || 'px';
return { num, unit };
};
const dimensionAttributeInPixels = (value: string) => {
const parsed = parseDimensionAttribute(value);
if (!parsed) {
return null;
} else {
switch (parsed.unit) {
case 'px':
return parsed.num;
case 'in':
return parsed.num * 96;
case 'cm':
return parsed.num * 96 / 2.54;
case 'mm':
return parsed.num * 96 / 25.4;
case 'pt':
return parsed.num * 96 / 72;
case 'pc':
return parsed.num * 16;
default:
return null;
}
}
};
// Currently this function only fix the width and height attributes: those should be specified in
// pixels, however certain application (such as Evernote) occasionally specify them in inches. When
// that happens, and we import it, Electron is going to ignore the unit and assume pixels. So "1in"
// becomes 1 pixel. So the function below is used to convert those invalid values to actual pixel
// values by converting them properly.
//
// Currently only used in import-enex-html-gen.js and tested there.
//
// Ref: https://html.spec.whatwg.org/multipage/embedded-content-other.html#dimension-attributes
export const fixAttributes = (attributes: Record<string, string>) => {
const output: Record<string, string> = {};
for (const [keyRaw, value] of Object.entries(attributes)) {
const key = keyRaw.toLowerCase();
let finalValue = value;
if (key === 'width' || key === 'height') {
const pixelValue = dimensionAttributeInPixels(value);
if (pixelValue === null) {
// Skip if the value can't be parsed, which means the image will display at its real
// size. Better than letting bad values go through as it may cause rendering issues.
continue;
}
finalValue = pixelValue.toString();
}
output[keyRaw] = finalValue;
}
return output;
};

View File

@@ -206,11 +206,6 @@
"@aws-sdk/client-s3",
"@aws-sdk/s3-request-presigner",
// 2.x appears to have breaking changes since the generated HTML is
// different. Not important enough to fix since the package is only used
// for tests.
"clean-html",
// We are too many versions behind so it needs to be manually upgraded.
"immer",

View File

@@ -9526,7 +9526,6 @@ __metadata:
base64-stream: "npm:1.0.0"
builtin-modules: "npm:3.3.0"
chokidar: "npm:3.6.0"
clean-html: "npm:1.5.0"
color: "npm:3.2.1"
compare-versions: "npm:6.1.1"
diff-match-patch: "npm:1.0.5"
@@ -20130,18 +20129,6 @@ __metadata:
languageName: node
linkType: hard
"clean-html@npm:1.5.0":
version: 1.5.0
resolution: "clean-html@npm:1.5.0"
dependencies:
htmlparser2: "npm:^3.8.2"
minimist: "npm:^1.1.1"
bin:
clean-html: cmd.js
checksum: 10/d49cd30cb5aa4b637756c652117f81039cf05f2ae13a9bd5bff03b1f68533fa53a85691a7632b0541bd55a46c53780c5dcb3c09bb02041afdea92b7577651164
languageName: node
linkType: hard
"clean-stack@npm:^2.0.0":
version: 2.2.0
resolution: "clean-stack@npm:2.2.0"
@@ -23745,16 +23732,6 @@ __metadata:
languageName: node
linkType: hard
"dom-serializer@npm:0":
version: 0.2.2
resolution: "dom-serializer@npm:0.2.2"
dependencies:
domelementtype: "npm:^2.0.1"
entities: "npm:^2.0.0"
checksum: 10/376344893e4feccab649a14ca1a46473e9961f40fe62479ea692d4fee4d9df1c00ca8654811a79c1ca7b020096987e1ca4fb4d7f8bae32c1db800a680a0e5d5e
languageName: node
linkType: hard
"dom-serializer@npm:^1.0.1":
version: 1.4.1
resolution: "dom-serializer@npm:1.4.1"
@@ -23784,13 +23761,6 @@ __metadata:
languageName: node
linkType: hard
"domelementtype@npm:1, domelementtype@npm:^1.3.1":
version: 1.3.1
resolution: "domelementtype@npm:1.3.1"
checksum: 10/7893da40218ae2106ec6ffc146b17f203487a52f5228b032ea7aa470e41dfe03e1bd762d0ee0139e792195efda765434b04b43cddcf63207b098f6ae44b36ad6
languageName: node
linkType: hard
"domelementtype@npm:2.3.0, domelementtype@npm:^2.2.0, domelementtype@npm:^2.3.0":
version: 2.3.0
resolution: "domelementtype@npm:2.3.0"
@@ -23832,15 +23802,6 @@ __metadata:
languageName: node
linkType: hard
"domhandler@npm:^2.3.0":
version: 2.4.2
resolution: "domhandler@npm:2.4.2"
dependencies:
domelementtype: "npm:1"
checksum: 10/d8b0303c53c0eda912e45820ef8f6023f8462a724e8b824324f27923970222a250c7569e067de398c4d9ca3ce0f2b2d2818bc632d6fa72956721d6729479a9b9
languageName: node
linkType: hard
"domhandler@npm:^4.0.0, domhandler@npm:^4.2.0, domhandler@npm:^4.3.1":
version: 4.3.1
resolution: "domhandler@npm:4.3.1"
@@ -23880,16 +23841,6 @@ __metadata:
languageName: node
linkType: hard
"domutils@npm:^1.5.1":
version: 1.7.0
resolution: "domutils@npm:1.7.0"
dependencies:
dom-serializer: "npm:0"
domelementtype: "npm:1"
checksum: 10/8c1d879fd3bbfc0156c970d12ebdf530f541cbda895d7f631b2444d22bbb9d0e5a3a4c3210cffb17708ad67531d7d40e1bef95e915c53a218d268607b66b63c8
languageName: node
linkType: hard
"domutils@npm:^2.5.2, domutils@npm:^2.8.0":
version: 2.8.0
resolution: "domutils@npm:2.8.0"
@@ -24412,13 +24363,6 @@ __metadata:
languageName: node
linkType: hard
"entities@npm:^1.1.1":
version: 1.1.2
resolution: "entities@npm:1.1.2"
checksum: 10/4a707022f4e932060f03df2526be55d085a2576fe534421e5b22bc62abb0d1f04241c171f9981e3d7baa4f4160606cad72a2f7eb01b6a25e279e3f31a2be4bf2
languageName: node
linkType: hard
"entities@npm:^4.2.0, entities@npm:^4.4.0":
version: 4.4.0
resolution: "entities@npm:4.4.0"
@@ -29652,20 +29596,6 @@ __metadata:
languageName: node
linkType: hard
"htmlparser2@npm:^3.8.2":
version: 3.10.1
resolution: "htmlparser2@npm:3.10.1"
dependencies:
domelementtype: "npm:^1.3.1"
domhandler: "npm:^2.3.0"
domutils: "npm:^1.5.1"
entities: "npm:^1.1.1"
inherits: "npm:^2.0.1"
readable-stream: "npm:^3.1.1"
checksum: 10/d5297fe76c0d6b0f35f39781417eb560ef12fa121953578083f3f2b240c74d5c35a38185689d181b6a82b66a3025436f14aa3413b94f3cd50ba15733f2f72389
languageName: node
linkType: hard
"htmlparser2@npm:^6.1.0":
version: 6.1.0
resolution: "htmlparser2@npm:6.1.0"
@@ -36295,7 +36225,7 @@ __metadata:
languageName: node
linkType: hard
"minimist@npm:^1.1.0, minimist@npm:^1.1.1, minimist@npm:^1.1.3, minimist@npm:^1.2.0, minimist@npm:^1.2.3, minimist@npm:^1.2.5, minimist@npm:~1.2.5":
"minimist@npm:^1.1.0, minimist@npm:^1.1.3, minimist@npm:^1.2.0, minimist@npm:^1.2.3, minimist@npm:^1.2.5, minimist@npm:~1.2.5":
version: 1.2.5
resolution: "minimist@npm:1.2.5"
checksum: 10/9dff2c7d845b4ac8aeedc7fd31e7fb394e7a2bf46d5a4c0bf818f124b35fab1ed260e6e95df3c0504a63bc93ac318f86a234cff1694d67af7f7da260a0342257