mirror of
https://github.com/laurent22/joplin.git
synced 2024-12-24 10:27:10 +02:00
Desktop, Cli: Fix ENEX import issue
Ref: https://discourse.joplinapp.org/t/error-importing-notes-from-format-enex/35001
This commit is contained in:
parent
4e8863d81f
commit
20b1c2e7cb
@ -6,16 +6,16 @@ const os = require('os');
|
|||||||
const { filename } = require('./path-utils');
|
const { filename } = require('./path-utils');
|
||||||
import { setupDatabaseAndSynchronizer, switchClient, expectNotThrow, supportDir, expectThrow } from './testing/test-utils';
|
import { setupDatabaseAndSynchronizer, switchClient, expectNotThrow, supportDir, expectThrow } from './testing/test-utils';
|
||||||
const { enexXmlToMd } = require('./import-enex-md-gen.js');
|
const { enexXmlToMd } = require('./import-enex-md-gen.js');
|
||||||
import importEnex from './import-enex';
|
import importEnex, { ImportOptions } from './import-enex';
|
||||||
import Note from './models/Note';
|
import Note from './models/Note';
|
||||||
import Tag from './models/Tag';
|
import Tag from './models/Tag';
|
||||||
import Resource from './models/Resource';
|
import Resource from './models/Resource';
|
||||||
|
|
||||||
const enexSampleBaseDir = `${supportDir}/../enex_to_md`;
|
const enexSampleBaseDir = `${supportDir}/../enex_to_md`;
|
||||||
|
|
||||||
const importEnexFile = async (filename: string) => {
|
const importEnexFile = async (filename: string, options: ImportOptions = null) => {
|
||||||
const filePath = `${enexSampleBaseDir}/${filename}`;
|
const filePath = `${enexSampleBaseDir}/${filename}`;
|
||||||
await importEnex('', filePath);
|
await importEnex('', filePath, options);
|
||||||
};
|
};
|
||||||
|
|
||||||
const readExpectedFile = async (filename: string) => {
|
const readExpectedFile = async (filename: string) => {
|
||||||
@ -221,7 +221,7 @@ describe('import-enex-md-gen', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
it('should resolve note links', async () => {
|
it('should resolve note links', async () => {
|
||||||
await importEnexFile('linked_notes.enex');
|
await importEnexFile('linked_notes.enex', { batchSize: 1 });
|
||||||
const notes: NoteEntity[] = await Note.all();
|
const notes: NoteEntity[] = await Note.all();
|
||||||
|
|
||||||
const note1 = notes.find(n => n.title === 'Note 1');
|
const note1 = notes.find(n => n.title === 'Note 1');
|
||||||
|
@ -58,7 +58,6 @@ interface ParserState {
|
|||||||
spanAttributes: string[];
|
spanAttributes: string[];
|
||||||
tags: ParserStateTag[];
|
tags: ParserStateTag[];
|
||||||
currentCode?: string;
|
currentCode?: string;
|
||||||
evernoteLinkTitles: Record<string, string>;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -608,7 +607,6 @@ function enexXmlToMdArray(stream: any, resources: ResourceEntity[], tasks: Extra
|
|||||||
anchorAttributes: [],
|
anchorAttributes: [],
|
||||||
spanAttributes: [],
|
spanAttributes: [],
|
||||||
tags: [],
|
tags: [],
|
||||||
evernoteLinkTitles: {},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const options = {};
|
const options = {};
|
||||||
|
@ -1,10 +1,8 @@
|
|||||||
import uuid from './uuid';
|
import uuid from './uuid';
|
||||||
import BaseModel from './BaseModel';
|
|
||||||
import Note from './models/Note';
|
import Note from './models/Note';
|
||||||
import Tag from './models/Tag';
|
import Tag from './models/Tag';
|
||||||
import Resource from './models/Resource';
|
import Resource from './models/Resource';
|
||||||
import Setting from './models/Setting';
|
import Setting from './models/Setting';
|
||||||
import time from './time';
|
|
||||||
import shim from './shim';
|
import shim from './shim';
|
||||||
import { NoteEntity, ResourceEntity } from './services/database/types';
|
import { NoteEntity, ResourceEntity } from './services/database/types';
|
||||||
import { enexXmlToMd } from './import-enex-md-gen';
|
import { enexXmlToMd } from './import-enex-md-gen';
|
||||||
@ -15,7 +13,6 @@ import { extractUrls as extractUrlsFromMarkdown } from '@joplin/utils/markdown';
|
|||||||
const moment = require('moment');
|
const moment = require('moment');
|
||||||
const { wrapError } = require('./errorUtils');
|
const { wrapError } = require('./errorUtils');
|
||||||
const { enexXmlToHtml } = require('./import-enex-html-gen.js');
|
const { enexXmlToHtml } = require('./import-enex-html-gen.js');
|
||||||
const Levenshtein = require('levenshtein');
|
|
||||||
const md5 = require('md5');
|
const md5 = require('md5');
|
||||||
const { Base64Decode } = require('base64-stream');
|
const { Base64Decode } = require('base64-stream');
|
||||||
const md5File = require('md5-file');
|
const md5File = require('md5-file');
|
||||||
@ -96,38 +93,6 @@ function removeUndefinedProperties(note: NoteEntity) {
|
|||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
function levenshteinPercent(s1: string, s2: string) {
|
|
||||||
const l = new Levenshtein(s1, s2);
|
|
||||||
if (!s1.length || !s2.length) return 1;
|
|
||||||
return Math.abs(l.distance / s1.length);
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fuzzyMatch(note: ExtractedNote) {
|
|
||||||
if (note.created_time < time.unixMs() - 1000 * 60 * 60 * 24 * 360) {
|
|
||||||
const notes = await Note.modelSelectAll('SELECT * FROM notes WHERE is_conflict = 0 AND created_time = ? AND title = ?', [note.created_time, note.title]);
|
|
||||||
return notes.length !== 1 ? null : notes[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
const notes = await Note.modelSelectAll('SELECT * FROM notes WHERE is_conflict = 0 AND created_time = ?', [note.created_time]);
|
|
||||||
if (notes.length === 0) return null;
|
|
||||||
if (notes.length === 1) return notes[0];
|
|
||||||
|
|
||||||
let lowestL = 1;
|
|
||||||
let lowestN = null;
|
|
||||||
for (let i = 0; i < notes.length; i++) {
|
|
||||||
const n = notes[i];
|
|
||||||
const l = levenshteinPercent(note.title, n.title);
|
|
||||||
if (l < lowestL) {
|
|
||||||
lowestL = l;
|
|
||||||
lowestN = n;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (lowestN && lowestL < 0.2) return lowestN;
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface ExtractedResource {
|
interface ExtractedResource {
|
||||||
hasData?: boolean;
|
hasData?: boolean;
|
||||||
id?: string;
|
id?: string;
|
||||||
@ -155,6 +120,14 @@ interface ExtractedNote extends NoteEntity {
|
|||||||
bodyXml?: string;
|
bodyXml?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Those are the notes that have been parsed and saved to Joplin. We don't keep
|
||||||
|
// in memory the whole `ExtractedNote` because it contains resource data, etc.
|
||||||
|
// We only keep what is needed to restore the note links.
|
||||||
|
interface SavedNote {
|
||||||
|
id: string;
|
||||||
|
body: string;
|
||||||
|
}
|
||||||
|
|
||||||
// At this point we have the resource as it's been parsed from the XML, but
|
// At this point we have the resource as it's been parsed from the XML, but
|
||||||
// additional processing needs to be done to get the final resource file, its
|
// additional processing needs to be done to get the final resource file, its
|
||||||
// size, MD5, etc.
|
// size, MD5, etc.
|
||||||
@ -245,26 +218,19 @@ async function saveNoteTags(note: ExtractedNote) {
|
|||||||
return notesTagged;
|
return notesTagged;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface ImportOptions {
|
export interface ImportOptions {
|
||||||
fuzzyMatching?: boolean;
|
|
||||||
// eslint-disable-next-line @typescript-eslint/ban-types -- Old code before rule was applied
|
// eslint-disable-next-line @typescript-eslint/ban-types -- Old code before rule was applied
|
||||||
onProgress?: Function;
|
onProgress?: Function;
|
||||||
// eslint-disable-next-line @typescript-eslint/ban-types -- Old code before rule was applied
|
// eslint-disable-next-line @typescript-eslint/ban-types -- Old code before rule was applied
|
||||||
onError?: Function;
|
onError?: Function;
|
||||||
outputFormat?: string;
|
outputFormat?: string;
|
||||||
|
batchSize?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function saveNoteToStorage(note: ExtractedNote, importOptions: ImportOptions) {
|
async function saveNoteToStorage(note: ExtractedNote) {
|
||||||
importOptions = { fuzzyMatching: false, ...importOptions };
|
|
||||||
|
|
||||||
note = Note.filter(note as any);
|
note = Note.filter(note as any);
|
||||||
|
|
||||||
const existingNote = importOptions.fuzzyMatching ? await fuzzyMatch(note) : null;
|
|
||||||
|
|
||||||
const result = {
|
const result = {
|
||||||
noteCreated: false,
|
|
||||||
noteUpdated: false,
|
|
||||||
noteSkipped: false,
|
|
||||||
resourcesCreated: 0,
|
resourcesCreated: 0,
|
||||||
notesTagged: 0,
|
notesTagged: 0,
|
||||||
};
|
};
|
||||||
@ -275,28 +241,10 @@ async function saveNoteToStorage(note: ExtractedNote, importOptions: ImportOptio
|
|||||||
const notesTagged = await saveNoteTags(note);
|
const notesTagged = await saveNoteTags(note);
|
||||||
result.notesTagged += notesTagged;
|
result.notesTagged += notesTagged;
|
||||||
|
|
||||||
if (existingNote) {
|
await Note.save(note, {
|
||||||
const diff = BaseModel.diffObjects(existingNote, note);
|
isNew: true,
|
||||||
delete diff.tags;
|
autoTimestamp: false,
|
||||||
delete diff.resources;
|
});
|
||||||
delete diff.id;
|
|
||||||
|
|
||||||
if (!Object.getOwnPropertyNames(diff).length) {
|
|
||||||
result.noteSkipped = true;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
diff.id = existingNote.id;
|
|
||||||
diff.type_ = existingNote.type_;
|
|
||||||
await Note.save(diff, { autoTimestamp: false });
|
|
||||||
result.noteUpdated = true;
|
|
||||||
} else {
|
|
||||||
await Note.save(note, {
|
|
||||||
isNew: true,
|
|
||||||
autoTimestamp: false,
|
|
||||||
});
|
|
||||||
result.noteCreated = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -345,12 +293,47 @@ const preProcessFile = async (filePath: string): Promise<string> => {
|
|||||||
// return newFilePath;
|
// return newFilePath;
|
||||||
};
|
};
|
||||||
|
|
||||||
export default async function importEnex(parentFolderId: string, filePath: string, importOptions: ImportOptions = null) {
|
|
||||||
if (!importOptions) importOptions = {};
|
|
||||||
if (!('fuzzyMatching' in importOptions)) importOptions.fuzzyMatching = false;
|
|
||||||
if (!('onProgress' in importOptions)) importOptions.onProgress = function() {};
|
|
||||||
if (!('onError' in importOptions)) importOptions.onError = function() {};
|
|
||||||
|
|
||||||
|
const restoreNoteLinks = async (notes: SavedNote[], noteTitlesToIds: Record<string, string[]>, importOptions: ImportOptions) => {
|
||||||
|
// --------------------------------------------------------
|
||||||
|
// Convert the Evernote note links to Joplin note links. If
|
||||||
|
// we don't find a matching note, or if there are multiple
|
||||||
|
// matching notes, we leave the Evernote links as is.
|
||||||
|
// --------------------------------------------------------
|
||||||
|
|
||||||
|
for (const note of notes) {
|
||||||
|
const links = importOptions.outputFormat === 'html' ?
|
||||||
|
extractUrlsFromHtml(note.body) :
|
||||||
|
extractUrlsFromMarkdown(note.body);
|
||||||
|
|
||||||
|
let noteChanged = false;
|
||||||
|
|
||||||
|
for (const link of links) {
|
||||||
|
const matchingNoteIds = noteTitlesToIds[link.title];
|
||||||
|
if (matchingNoteIds && matchingNoteIds.length === 1) {
|
||||||
|
note.body = note.body.replace(link.url, `:/${matchingNoteIds[0]}`);
|
||||||
|
noteChanged = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (noteChanged) {
|
||||||
|
await Note.save({
|
||||||
|
id: note.id,
|
||||||
|
body: note.body,
|
||||||
|
updated_time: Date.now(),
|
||||||
|
}, {
|
||||||
|
autoTimestamp: false,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
interface ParseNotesResult {
|
||||||
|
savedNotes: SavedNote[];
|
||||||
|
noteTitlesToIds: Record<string, string[]>;
|
||||||
|
}
|
||||||
|
|
||||||
|
const parseNotes = async (parentFolderId: string, filePath: string, importOptions: ImportOptions = null): Promise<ParseNotesResult> => {
|
||||||
// eslint-disable-next-line @typescript-eslint/ban-types -- Old code before rule was applied
|
// eslint-disable-next-line @typescript-eslint/ban-types -- Old code before rule was applied
|
||||||
function handleSaxStreamEvent(fn: Function) {
|
function handleSaxStreamEvent(fn: Function) {
|
||||||
return function(...args: any[]) {
|
return function(...args: any[]) {
|
||||||
@ -397,6 +380,9 @@ export default async function importEnex(parentFolderId: string, filePath: strin
|
|||||||
let noteResourceRecognition: NoteResourceRecognition = null;
|
let noteResourceRecognition: NoteResourceRecognition = null;
|
||||||
const notes: ExtractedNote[] = [];
|
const notes: ExtractedNote[] = [];
|
||||||
let processingNotes = false;
|
let processingNotes = false;
|
||||||
|
const savedNotes: SavedNote[] = [];
|
||||||
|
const createdNoteIds: string[] = [];
|
||||||
|
const noteTitlesToIds: Record<string, string[]> = {};
|
||||||
|
|
||||||
const createErrorWithNoteTitle = (fnThis: any, error: any) => {
|
const createErrorWithNoteTitle = (fnThis: any, error: any) => {
|
||||||
const line = [];
|
const line = [];
|
||||||
@ -437,15 +423,6 @@ export default async function importEnex(parentFolderId: string, filePath: strin
|
|||||||
processingNotes = true;
|
processingNotes = true;
|
||||||
stream.pause();
|
stream.pause();
|
||||||
|
|
||||||
// Set the note ID so that we can create a title-to-id map, which
|
|
||||||
// will be needed to recreate the note links below.
|
|
||||||
const noteTitleToId: Record<string, string[]> = {};
|
|
||||||
for (const note of notes) {
|
|
||||||
if (!noteTitleToId[note.title]) noteTitleToId[note.title] = [];
|
|
||||||
note.id = uuid.create();
|
|
||||||
noteTitleToId[note.title].push(note.id);
|
|
||||||
}
|
|
||||||
|
|
||||||
while (notes.length) {
|
while (notes.length) {
|
||||||
const note = notes.shift();
|
const note = notes.shift();
|
||||||
|
|
||||||
@ -467,32 +444,16 @@ export default async function importEnex(parentFolderId: string, filePath: strin
|
|||||||
// Convert the ENEX body to either Markdown or HTML
|
// Convert the ENEX body to either Markdown or HTML
|
||||||
// --------------------------------------------------------
|
// --------------------------------------------------------
|
||||||
|
|
||||||
let body: string = importOptions.outputFormat === 'html' ?
|
const body: string = importOptions.outputFormat === 'html' ?
|
||||||
await enexXmlToHtml(note.bodyXml, note.resources) :
|
await enexXmlToHtml(note.bodyXml, note.resources) :
|
||||||
await enexXmlToMd(note.bodyXml, note.resources, note.tasks);
|
await enexXmlToMd(note.bodyXml, note.resources, note.tasks);
|
||||||
delete note.bodyXml;
|
delete note.bodyXml;
|
||||||
|
|
||||||
// --------------------------------------------------------
|
|
||||||
// Convert the Evernote note links to Joplin note links. If
|
|
||||||
// we don't find a matching note, or if there are multiple
|
|
||||||
// matching notes, we leave the Evernote links as is.
|
|
||||||
// --------------------------------------------------------
|
|
||||||
|
|
||||||
const links = importOptions.outputFormat === 'html' ?
|
|
||||||
extractUrlsFromHtml(body) :
|
|
||||||
extractUrlsFromMarkdown(body);
|
|
||||||
|
|
||||||
for (const link of links) {
|
|
||||||
const matchingNoteIds = noteTitleToId[link.title];
|
|
||||||
if (matchingNoteIds && matchingNoteIds.length === 1) {
|
|
||||||
body = body.replace(link.url, `:/${matchingNoteIds[0]}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// --------------------------------------------------------
|
// --------------------------------------------------------
|
||||||
// Finish setting up the note
|
// Finish setting up the note
|
||||||
// --------------------------------------------------------
|
// --------------------------------------------------------
|
||||||
|
|
||||||
|
note.id = uuid.create();
|
||||||
note.markup_language = importOptions.outputFormat === 'html' ?
|
note.markup_language = importOptions.outputFormat === 'html' ?
|
||||||
MarkupToHtml.MARKUP_LANGUAGE_HTML :
|
MarkupToHtml.MARKUP_LANGUAGE_HTML :
|
||||||
MarkupToHtml.MARKUP_LANGUAGE_MARKDOWN;
|
MarkupToHtml.MARKUP_LANGUAGE_MARKDOWN;
|
||||||
@ -511,15 +472,17 @@ export default async function importEnex(parentFolderId: string, filePath: strin
|
|||||||
// that case
|
// that case
|
||||||
if (!note.updated_time) note.updated_time = note.created_time;
|
if (!note.updated_time) note.updated_time = note.created_time;
|
||||||
|
|
||||||
const result = await saveNoteToStorage(note, importOptions);
|
const result = await saveNoteToStorage(note);
|
||||||
|
|
||||||
if (result.noteUpdated) {
|
createdNoteIds.push(note.id);
|
||||||
progressState.updated++;
|
if (!noteTitlesToIds[note.title]) noteTitlesToIds[note.title] = [];
|
||||||
} else if (result.noteCreated) {
|
noteTitlesToIds[note.title].push(note.id);
|
||||||
progressState.created++;
|
savedNotes.push({
|
||||||
} else if (result.noteSkipped) {
|
id: note.id,
|
||||||
progressState.skipped++;
|
body: note.body,
|
||||||
}
|
});
|
||||||
|
|
||||||
|
progressState.created++;
|
||||||
progressState.resourcesCreated += result.resourcesCreated;
|
progressState.resourcesCreated += result.resourcesCreated;
|
||||||
progressState.notesTagged += result.notesTagged;
|
progressState.notesTagged += result.notesTagged;
|
||||||
importOptions.onProgress(progressState);
|
importOptions.onProgress(progressState);
|
||||||
@ -648,7 +611,7 @@ export default async function importEnex(parentFolderId: string, filePath: strin
|
|||||||
|
|
||||||
notes.push(note);
|
notes.push(note);
|
||||||
|
|
||||||
if (notes.length >= 10) {
|
if (notes.length >= importOptions.batchSize) {
|
||||||
// eslint-disable-next-line promise/prefer-await-to-then -- Old code before rule was applied
|
// eslint-disable-next-line promise/prefer-await-to-then -- Old code before rule was applied
|
||||||
processNotes().catch(error => {
|
processNotes().catch(error => {
|
||||||
importOptions.onError(createErrorWithNoteTitle(this, error));
|
importOptions.onError(createErrorWithNoteTitle(this, error));
|
||||||
@ -718,12 +681,25 @@ export default async function importEnex(parentFolderId: string, filePath: strin
|
|||||||
if (allDone) {
|
if (allDone) {
|
||||||
shim.clearTimeout(iid);
|
shim.clearTimeout(iid);
|
||||||
if (needToDeleteFileToProcess) void shim.fsDriver().remove(fileToProcess);
|
if (needToDeleteFileToProcess) void shim.fsDriver().remove(fileToProcess);
|
||||||
resolve(null);
|
resolve({
|
||||||
|
savedNotes,
|
||||||
|
noteTitlesToIds,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}, 500);
|
}, 1000);
|
||||||
}));
|
}));
|
||||||
|
|
||||||
stream.pipe(saxStream);
|
stream.pipe(saxStream);
|
||||||
});
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
export default async function importEnex(parentFolderId: string, filePath: string, importOptions: ImportOptions = null) {
|
||||||
|
if (!importOptions) importOptions = {};
|
||||||
|
if (!('onProgress' in importOptions)) importOptions.onProgress = function() {};
|
||||||
|
if (!('onError' in importOptions)) importOptions.onError = function() {};
|
||||||
|
if (!('batchSize' in importOptions)) importOptions.batchSize = 10;
|
||||||
|
|
||||||
|
const result = await parseNotes(parentFolderId, filePath, importOptions);
|
||||||
|
await restoreNoteLinks(result.savedNotes, result.noteTitlesToIds, importOptions);
|
||||||
}
|
}
|
||||||
|
@ -66,7 +66,6 @@
|
|||||||
"image-type": "3.1.0",
|
"image-type": "3.1.0",
|
||||||
"immer": "7.0.15",
|
"immer": "7.0.15",
|
||||||
"js-yaml": "4.1.0",
|
"js-yaml": "4.1.0",
|
||||||
"levenshtein": "1.0.5",
|
|
||||||
"markdown-it": "13.0.2",
|
"markdown-it": "13.0.2",
|
||||||
"md5": "2.3.0",
|
"md5": "2.3.0",
|
||||||
"md5-file": "5.0.0",
|
"md5-file": "5.0.0",
|
||||||
|
@ -6871,7 +6871,6 @@ __metadata:
|
|||||||
immer: 7.0.15
|
immer: 7.0.15
|
||||||
jest: 29.7.0
|
jest: 29.7.0
|
||||||
js-yaml: 4.1.0
|
js-yaml: 4.1.0
|
||||||
levenshtein: 1.0.5
|
|
||||||
markdown-it: 13.0.2
|
markdown-it: 13.0.2
|
||||||
md5: 2.3.0
|
md5: 2.3.0
|
||||||
md5-file: 5.0.0
|
md5-file: 5.0.0
|
||||||
@ -27688,13 +27687,6 @@ __metadata:
|
|||||||
languageName: node
|
languageName: node
|
||||||
linkType: hard
|
linkType: hard
|
||||||
|
|
||||||
"levenshtein@npm:1.0.5":
|
|
||||||
version: 1.0.5
|
|
||||||
resolution: "levenshtein@npm:1.0.5"
|
|
||||||
checksum: d5ceca3bfc4804ad50515291841d968eea5f1f740310c21b5ae6cb6d5514ee68b9c00405059f36934611d3258967bad6d306dcf299f446c7cdd25bdda2c4720c
|
|
||||||
languageName: node
|
|
||||||
linkType: hard
|
|
||||||
|
|
||||||
"levn@npm:^0.4.1":
|
"levn@npm:^0.4.1":
|
||||||
version: 0.4.1
|
version: 0.4.1
|
||||||
resolution: "levn@npm:0.4.1"
|
resolution: "levn@npm:0.4.1"
|
||||||
|
Loading…
Reference in New Issue
Block a user