1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-08-24 20:19:10 +02:00

Compare commits

...

2 Commits

Author SHA1 Message Date
Laurent Cozic
ba509dced1 Desktop release v2.5.2 2021-10-13 11:07:49 +01:00
Laurent Cozic
d92b617bbf Desktop, Cli: Fixes #5543: Fixed running out of memory when importing large ENEX files 2021-10-13 11:07:39 +01:00
3 changed files with 34 additions and 24 deletions

View File

@@ -1,12 +1,12 @@
{
"name": "@joplin/app-desktop",
"version": "2.5.1",
"version": "2.5.2",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "@joplin/app-desktop",
"version": "2.5.1",
"version": "2.5.2",
"license": "MIT",
"dependencies": {
"@electron/remote": "^2.0.1",

View File

@@ -1,6 +1,6 @@
{
"name": "@joplin/app-desktop",
"version": "2.5.1",
"version": "2.5.2",
"description": "Joplin for Desktop",
"main": "main.js",
"private": true,

View File

@@ -301,28 +301,38 @@ interface NoteResourceRecognition {
}
const preProcessFile = async (filePath: string): Promise<string> => {
const content: string = await shim.fsDriver().readFile(filePath, 'utf8');
// Disabled pre-processing for now because it runs out of memory:
// https://github.com/laurent22/joplin/issues/5543
//
// It could be fixed by not loading the whole file in memory, but there are
// other issues because people import 1GB+ files so pre-processing
// everything means creating a new copy of that file, and that has its own
// problems.
// The note content in an ENEX file is wrapped in a CDATA block so it means
// that any "]]>" inside the note must be somehow escaped, or else the CDATA
// block would be closed at the wrong point.
//
// The problem is that Evernote appears to encode "]]>" as "]]<![CDATA[>]]>"
// instead of the more sensible "]]&gt;", or perhaps they have nothing in
// place to properly escape data imported from their web clipper. In any
// case it results in invalid XML that Evernote cannot even import back.
//
// Handling that invalid XML with SAX would also be very tricky, so instead
// we add a pre-processing step that converts this tags to just "&gt;". It
// should be safe to do so because such content can only be within the body
// of a note - and ">" or "&gt;" is equivalent.
//
// Ref: https://discourse.joplinapp.org/t/20470/4
const newContent = content.replace(/<!\[CDATA\[>\]\]>/g, '&gt;');
if (content === newContent) return filePath;
const newFilePath = `${Setting.value('tempDir')}/${md5(Date.now() + Math.random())}.enex`;
await shim.fsDriver().writeFile(newFilePath, newContent, 'utf8');
return newFilePath;
return filePath;
// const content: string = await shim.fsDriver().readFile(filePath, 'utf8');
// // The note content in an ENEX file is wrapped in a CDATA block so it means
// // that any "]]>" inside the note must be somehow escaped, or else the CDATA
// // block would be closed at the wrong point.
// //
// // The problem is that Evernote appears to encode "]]>" as "]]<![CDATA[>]]>"
// // instead of the more sensible "]]&gt;", or perhaps they have nothing in
// // place to properly escape data imported from their web clipper. In any
// // case it results in invalid XML that Evernote cannot even import back.
// //
// // Handling that invalid XML with SAX would also be very tricky, so instead
// // we add a pre-processing step that converts this tags to just "&gt;". It
// // should be safe to do so because such content can only be within the body
// // of a note - and ">" or "&gt;" is equivalent.
// //
// // Ref: https://discourse.joplinapp.org/t/20470/4
// const newContent = content.replace(/<!\[CDATA\[>\]\]>/g, '&gt;');
// if (content === newContent) return filePath;
// const newFilePath = `${Setting.value('tempDir')}/${md5(Date.now() + Math.random())}.enex`;
// await shim.fsDriver().writeFile(newFilePath, newContent, 'utf8');
// return newFilePath;
};
export default async function importEnex(parentFolderId: string, filePath: string, importOptions: ImportOptions = null) {