You've already forked joplin
mirror of
https://github.com/laurent22/joplin.git
synced 2025-11-06 09:19:22 +02:00
Added support for enex import
This commit is contained in:
@@ -1,623 +0,0 @@
|
||||
const stringPadding = require('string-padding');
|
||||
|
||||
const BLOCK_OPEN = "[[BLOCK_OPEN]]";
|
||||
const BLOCK_CLOSE = "[[BLOCK_CLOSE]]";
|
||||
const NEWLINE = "[[NEWLINE]]";
|
||||
const NEWLINE_MERGED = "[[MERGED]]";
|
||||
const SPACE = "[[SPACE]]";
|
||||
|
||||
function processMdArrayNewLines(md) {
|
||||
while (md.length && md[0] == BLOCK_OPEN) {
|
||||
md.shift();
|
||||
}
|
||||
|
||||
while (md.length && md[md.length - 1] == BLOCK_CLOSE) {
|
||||
md.pop();
|
||||
}
|
||||
|
||||
let temp = [];
|
||||
let last = '';
|
||||
for (let i = 0; i < md.length; i++) { let v = md[i];
|
||||
if (isNewLineBlock(last) && isNewLineBlock(v) && last == v) {
|
||||
// Skip it
|
||||
} else {
|
||||
temp.push(v);
|
||||
}
|
||||
last = v;
|
||||
}
|
||||
md = temp;
|
||||
|
||||
|
||||
|
||||
temp = [];
|
||||
last = "";
|
||||
for (let i = 0; i < md.length; i++) { let v = md[i];
|
||||
if (last == BLOCK_CLOSE && v == BLOCK_OPEN) {
|
||||
temp.pop();
|
||||
temp.push(NEWLINE_MERGED);
|
||||
} else {
|
||||
temp.push(v);
|
||||
}
|
||||
last = v;
|
||||
}
|
||||
md = temp;
|
||||
|
||||
|
||||
|
||||
temp = [];
|
||||
last = "";
|
||||
for (let i = 0; i < md.length; i++) { let v = md[i];
|
||||
if (last == NEWLINE && (v == NEWLINE_MERGED || v == BLOCK_CLOSE)) {
|
||||
// Skip it
|
||||
} else {
|
||||
temp.push(v);
|
||||
}
|
||||
last = v;
|
||||
}
|
||||
md = temp;
|
||||
|
||||
|
||||
|
||||
// NEW!!!
|
||||
temp = [];
|
||||
last = "";
|
||||
for (let i = 0; i < md.length; i++) { let v = md[i];
|
||||
if (last == NEWLINE && (v == NEWLINE_MERGED || v == BLOCK_OPEN)) {
|
||||
// Skip it
|
||||
} else {
|
||||
temp.push(v);
|
||||
}
|
||||
last = v;
|
||||
}
|
||||
md = temp;
|
||||
|
||||
|
||||
|
||||
|
||||
if (md.length > 2) {
|
||||
if (md[md.length - 2] == NEWLINE_MERGED && md[md.length - 1] == NEWLINE) {
|
||||
md.pop();
|
||||
}
|
||||
}
|
||||
|
||||
let output = '';
|
||||
let previous = '';
|
||||
let start = true;
|
||||
for (let i = 0; i < md.length; i++) { let v = md[i];
|
||||
let add = '';
|
||||
if (v == BLOCK_CLOSE || v == BLOCK_OPEN || v == NEWLINE || v == NEWLINE_MERGED) {
|
||||
add = "\n";
|
||||
} else if (v == SPACE) {
|
||||
if (previous == SPACE || previous == "\n" || start) {
|
||||
continue; // skip
|
||||
} else {
|
||||
add = " ";
|
||||
}
|
||||
} else {
|
||||
add = v;
|
||||
}
|
||||
start = false;
|
||||
output += add;
|
||||
previous = add;
|
||||
}
|
||||
|
||||
if (!output.trim().length) return '';
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
function isWhiteSpace(c) {
|
||||
return c == '\n' || c == '\r' || c == '\v' || c == '\f' || c == '\t' || c == ' ';
|
||||
}
|
||||
|
||||
// Like QString::simpified(), except that it preserves non-breaking spaces (which
|
||||
// Evernote uses for identation, etc.)
|
||||
function simplifyString(s) {
|
||||
let output = '';
|
||||
let previousWhite = false;
|
||||
for (let i = 0; i < s.length; i++) {
|
||||
let c = s[i];
|
||||
let isWhite = isWhiteSpace(c);
|
||||
if (previousWhite && isWhite) {
|
||||
// skip
|
||||
} else {
|
||||
output += c;
|
||||
}
|
||||
previousWhite = isWhite;
|
||||
}
|
||||
|
||||
while (output.length && isWhiteSpace(output[0])) output = output.substr(1);
|
||||
while (output.length && isWhiteSpace(output[output.length - 1])) output = output.substr(0, output.length - 1);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
function collapseWhiteSpaceAndAppend(lines, state, text) {
|
||||
if (state.inCode) {
|
||||
text = "\t" + text;
|
||||
lines.push(text);
|
||||
} else {
|
||||
// Remove all \n and \r from the left and right of the text
|
||||
while (text.length && (text[0] == "\n" || text[0] == "\r")) text = text.substr(1);
|
||||
while (text.length && (text[text.length - 1] == "\n" || text[text.length - 1] == "\r")) text = text.substr(0, text.length - 1);
|
||||
|
||||
// Collapse all white spaces to just one. If there are spaces to the left and right of the string
|
||||
// also collapse them to just one space.
|
||||
let spaceLeft = text.length && text[0] == ' ';
|
||||
let spaceRight = text.length && text[text.length - 1] == ' ';
|
||||
text = simplifyString(text);
|
||||
|
||||
if (!spaceLeft && !spaceRight && text == "") return lines;
|
||||
|
||||
if (spaceLeft) lines.push(SPACE);
|
||||
lines.push(text);
|
||||
if (spaceRight) lines.push(SPACE);
|
||||
}
|
||||
|
||||
return lines;
|
||||
}
|
||||
|
||||
const imageMimeTypes = ["image/cgm", "image/fits", "image/g3fax", "image/gif", "image/ief", "image/jp2", "image/jpeg", "image/jpm", "image/jpx", "image/naplps", "image/png", "image/prs.btif", "image/prs.pti", "image/t38", "image/tiff", "image/tiff-fx", "image/vnd.adobe.photoshop", "image/vnd.cns.inf2", "image/vnd.djvu", "image/vnd.dwg", "image/vnd.dxf", "image/vnd.fastbidsheet", "image/vnd.fpx", "image/vnd.fst", "image/vnd.fujixerox.edmics-mmr", "image/vnd.fujixerox.edmics-rlc", "image/vnd.globalgraphics.pgb", "image/vnd.microsoft.icon", "image/vnd.mix", "image/vnd.ms-modi", "image/vnd.net-fpx", "image/vnd.sealed.png", "image/vnd.sealedmedia.softseal.gif", "image/vnd.sealedmedia.softseal.jpg", "image/vnd.svf", "image/vnd.wap.wbmp", "image/vnd.xiff"];
|
||||
|
||||
function isImageMimeType(m) {
|
||||
return imageMimeTypes.indexOf(m) >= 0;
|
||||
}
|
||||
|
||||
function addResourceTag(lines, resource, alt = "") {
|
||||
// TODO: refactor to use Resource.markdownTag
|
||||
|
||||
let tagAlt = alt == "" ? resource.alt : alt;
|
||||
if (!tagAlt) tagAlt = '';
|
||||
if (isImageMimeType(resource.mime)) {
|
||||
lines.push("");
|
||||
} else {
|
||||
lines.push("[");
|
||||
lines.push(tagAlt);
|
||||
lines.push("](:/" + resource.id + ")");
|
||||
}
|
||||
|
||||
return lines;
|
||||
}
|
||||
|
||||
|
||||
function isBlockTag(n) {
|
||||
return n=="div" || n=="p" || n=="dl" || n=="dd" || n=="center";
|
||||
}
|
||||
|
||||
function isStrongTag(n) {
|
||||
return n == "strong" || n == "b";
|
||||
}
|
||||
|
||||
function isEmTag(n) {
|
||||
return n == "em" || n == "i" || n == "u";
|
||||
}
|
||||
|
||||
function isAnchor(n) {
|
||||
return n == "a";
|
||||
}
|
||||
|
||||
function isIgnoredEndTag(n) {
|
||||
return n=="en-note" || n=="en-todo" || n=="span" || n=="body" || n=="html" || n=="font" || n=="br" || n=='hr' || n=='s' || n == 'tbody';
|
||||
}
|
||||
|
||||
function isListTag(n) {
|
||||
return n == "ol" || n == "ul";
|
||||
}
|
||||
|
||||
// Elements that don't require any special treatment beside adding a newline character
|
||||
function isNewLineOnlyEndTag(n) {
|
||||
return n=="div" || n=="p" || n=="li" || n=="h1" || n=="h2" || n=="h3" || n=="h4" || n=="h5" || n=="dl" || n=="dd" || n=="center";
|
||||
}
|
||||
|
||||
function isCodeTag(n) {
|
||||
return n == "pre" || n == "code";
|
||||
}
|
||||
|
||||
function isNewLineBlock(s) {
|
||||
return s == BLOCK_OPEN || s == BLOCK_CLOSE;
|
||||
}
|
||||
|
||||
function xmlNodeText(xmlNode) {
|
||||
if (!xmlNode || !xmlNode.length) return '';
|
||||
return xmlNode[0];
|
||||
}
|
||||
|
||||
function enexXmlToMdArray(stream, resources) {
|
||||
resources = resources.slice();
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
let state = {
|
||||
inCode: false,
|
||||
lists: [],
|
||||
anchorAttributes: [],
|
||||
};
|
||||
|
||||
let options = {};
|
||||
let strict = true;
|
||||
var saxStream = require('sax').createStream(strict, options)
|
||||
|
||||
let section = {
|
||||
type: 'text',
|
||||
lines: [],
|
||||
parent: null,
|
||||
};
|
||||
|
||||
saxStream.on('error', function(e) {
|
||||
reject(e);
|
||||
})
|
||||
|
||||
saxStream.on('text', function(text) {
|
||||
section.lines = collapseWhiteSpaceAndAppend(section.lines, state, text);
|
||||
})
|
||||
|
||||
saxStream.on('opentag', function(node) {
|
||||
let n = node.name.toLowerCase();
|
||||
if (n == 'en-note') {
|
||||
// Start of note
|
||||
} else if (isBlockTag(n)) {
|
||||
section.lines.push(BLOCK_OPEN);
|
||||
} else if (n == 'table') {
|
||||
let newSection = {
|
||||
type: 'table',
|
||||
lines: [],
|
||||
parent: section,
|
||||
};
|
||||
section.lines.push(newSection);
|
||||
section = newSection;
|
||||
} else if (n == 'tbody') {
|
||||
// Ignore it
|
||||
} else if (n == 'tr') {
|
||||
if (section.type != 'table') throw new Error('Found a <tr> tag outside of a table');
|
||||
|
||||
let newSection = {
|
||||
type: 'tr',
|
||||
lines: [],
|
||||
parent: section,
|
||||
isHeader: false,
|
||||
}
|
||||
|
||||
section.lines.push(newSection);
|
||||
section = newSection;
|
||||
} else if (n == 'td' || n == 'th') {
|
||||
if (section.type != 'tr') throw new Error('Found a <td> tag outside of a <tr>');
|
||||
|
||||
if (n == 'th') section.isHeader = true;
|
||||
|
||||
let newSection = {
|
||||
type: 'td',
|
||||
lines: [],
|
||||
parent: section,
|
||||
};
|
||||
|
||||
section.lines.push(newSection);
|
||||
section = newSection;
|
||||
} else if (isListTag(n)) {
|
||||
section.lines.push(BLOCK_OPEN);
|
||||
state.lists.push({ tag: n, counter: 1 });
|
||||
} else if (n == 'li') {
|
||||
section.lines.push(BLOCK_OPEN);
|
||||
if (!state.lists.length) {
|
||||
reject("Found <li> tag without being inside a list"); // TODO: could be a warning, but nothing to handle warnings at the moment
|
||||
return;
|
||||
}
|
||||
|
||||
let container = state.lists[state.lists.length - 1];
|
||||
if (container.tag == "ul") {
|
||||
section.lines.push("- ");
|
||||
} else {
|
||||
section.lines.push(container.counter + '. ');
|
||||
container.counter++;
|
||||
}
|
||||
} else if (isStrongTag(n)) {
|
||||
section.lines.push("**");
|
||||
} else if (n == 's') {
|
||||
// Not supported
|
||||
} else if (isAnchor(n)) {
|
||||
state.anchorAttributes.push(node.attributes);
|
||||
section.lines.push('[');
|
||||
} else if (isEmTag(n)) {
|
||||
section.lines.push("*");
|
||||
} else if (n == "en-todo") {
|
||||
let x = node.attributes && node.attributes.checked && node.attributes.checked.toLowerCase() == 'true' ? 'X' : ' ';
|
||||
section.lines.push('- [' + x + '] ');
|
||||
} else if (n == "hr") {
|
||||
// Needs to be surrounded by new lines so that it's properly rendered as a line when converting to HTML
|
||||
section.lines.push(NEWLINE);
|
||||
section.lines.push('----------------------------------------');
|
||||
section.lines.push(NEWLINE);
|
||||
section.lines.push(NEWLINE);
|
||||
} else if (n == "h1") {
|
||||
section.lines.push(BLOCK_OPEN); section.lines.push("# ");
|
||||
} else if (n == "h2") {
|
||||
section.lines.push(BLOCK_OPEN); section.lines.push("## ");
|
||||
} else if (n == "h3") {
|
||||
section.lines.push(BLOCK_OPEN); section.lines.push("### ");
|
||||
} else if (n == "h4") {
|
||||
section.lines.push(BLOCK_OPEN); section.lines.push("#### ");
|
||||
} else if (n == "h5") {
|
||||
section.lines.push(BLOCK_OPEN); section.lines.push("##### ");
|
||||
} else if (n == "h6") {
|
||||
section.lines.push(BLOCK_OPEN); section.lines.push("###### ");
|
||||
} else if (isCodeTag(n)) {
|
||||
section.lines.push(BLOCK_OPEN);
|
||||
state.inCode = true;
|
||||
} else if (n == "br") {
|
||||
section.lines.push(NEWLINE);
|
||||
} else if (n == "en-media") {
|
||||
const hash = node.attributes.hash;
|
||||
|
||||
let resource = null;
|
||||
for (let i = 0; i < resources.length; i++) {
|
||||
let r = resources[i];
|
||||
if (r.id == hash) {
|
||||
resource = r;
|
||||
resources.splice(i, 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!resource) {
|
||||
// This is a bit of a hack. Notes sometime have resources attached to it, but those <resource> tags don't contain
|
||||
// an "objID" tag, making it impossible to reference the resource. However, in this case the content of the note
|
||||
// will contain a corresponding <en-media/> tag, which has the ID in the "hash" attribute. All this information
|
||||
// has been collected above so we now set the resource ID to the hash attribute of the en-media tags. Here's an
|
||||
// example of note that shows this problem:
|
||||
|
||||
// <?xml version="1.0" encoding="UTF-8"?>
|
||||
// <!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export2.dtd">
|
||||
// <en-export export-date="20161221T203133Z" application="Evernote/Windows" version="6.x">
|
||||
// <note>
|
||||
// <title>Commande</title>
|
||||
// <content>
|
||||
// <![CDATA[
|
||||
// <?xml version="1.0" encoding="UTF-8"?>
|
||||
// <!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">
|
||||
// <en-note>
|
||||
// <en-media alt="your QR code" hash="216a16a1bbe007fba4ccf60b118b4ccc" type="image/png"></en-media>
|
||||
// </en-note>
|
||||
// ]]>
|
||||
// </content>
|
||||
// <created>20160921T203424Z</created>
|
||||
// <updated>20160921T203438Z</updated>
|
||||
// <note-attributes>
|
||||
// <reminder-order>20160902T140445Z</reminder-order>
|
||||
// <reminder-done-time>20160924T101120Z</reminder-done-time>
|
||||
// </note-attributes>
|
||||
// <resource>
|
||||
// <data encoding="base64">........</data>
|
||||
// <mime>image/png</mime>
|
||||
// <width>150</width>
|
||||
// <height>150</height>
|
||||
// </resource>
|
||||
// </note>
|
||||
// </en-export>
|
||||
|
||||
let found = false;
|
||||
for (let i = 0; i < resources.length; i++) {
|
||||
let r = resources[i];
|
||||
if (!r.id) {
|
||||
r.id = hash;
|
||||
resources[i] = r;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
console.warn('Hash with no associated resource: ' + hash);
|
||||
}
|
||||
} else {
|
||||
// If the resource does not appear among the note's resources, it
|
||||
// means it's an attachement. It will be appended along with the
|
||||
// other remaining resources at the bottom of the markdown text.
|
||||
if (!!resource.id) {
|
||||
section.lines = addResourceTag(section.lines, resource, node.attributes.alt);
|
||||
}
|
||||
}
|
||||
} else if (n == "span" || n == "font") {
|
||||
// Ignore
|
||||
} else {
|
||||
console.warn("Unsupported start tag: " + n);
|
||||
}
|
||||
})
|
||||
|
||||
saxStream.on('closetag', function(n) {
|
||||
if (n == 'en-note') {
|
||||
// End of note
|
||||
} else if (isNewLineOnlyEndTag(n)) {
|
||||
section.lines.push(BLOCK_CLOSE);
|
||||
} else if (n == 'td' || n == 'th') {
|
||||
section = section.parent;
|
||||
} else if (n == 'tr') {
|
||||
section = section.parent;
|
||||
} else if (n == 'table') {
|
||||
section = section.parent;
|
||||
} else if (isIgnoredEndTag(n)) {
|
||||
// Skip
|
||||
} else if (isListTag(n)) {
|
||||
section.lines.push(BLOCK_CLOSE);
|
||||
state.lists.pop();
|
||||
} else if (isStrongTag(n)) {
|
||||
section.lines.push("**");
|
||||
} else if (isEmTag(n)) {
|
||||
section.lines.push("*");
|
||||
} else if (isCodeTag(n)) {
|
||||
state.inCode = false;
|
||||
section.lines.push(BLOCK_CLOSE);
|
||||
} else if (isAnchor(n)) {
|
||||
let attributes = state.anchorAttributes.pop();
|
||||
let url = attributes && attributes.href ? attributes.href : '';
|
||||
|
||||
if (section.lines.length < 1) throw new Error('Invalid anchor tag closing'); // Sanity check, but normally not possible
|
||||
|
||||
// When closing the anchor tag, check if there's is any text content. If not
|
||||
// put the URL as is (don't wrap it in [](url)). The markdown parser, using
|
||||
// GitHub flavour, will turn this URL into a link. This is to generate slightly
|
||||
// cleaner markdown.
|
||||
let previous = section.lines[section.lines.length - 1];
|
||||
if (previous == '[') {
|
||||
section.lines.pop();
|
||||
section.lines.push(url);
|
||||
} else if (!previous || previous == url) {
|
||||
section.lines.pop();
|
||||
section.lines.pop();
|
||||
section.lines.push(url);
|
||||
} else {
|
||||
section.lines.push('](' + url + ')');
|
||||
}
|
||||
} else if (isListTag(n)) {
|
||||
section.lines.push(BLOCK_CLOSE);
|
||||
state.lists.pop();
|
||||
} else if (n == "en-media") {
|
||||
// Skip
|
||||
} else if (isIgnoredEndTag(n)) {
|
||||
// Skip
|
||||
} else {
|
||||
console.warn("Unsupported end tag: " + n);
|
||||
}
|
||||
|
||||
})
|
||||
|
||||
saxStream.on('attribute', function(attr) {
|
||||
|
||||
})
|
||||
|
||||
saxStream.on('end', function() {
|
||||
resolve({
|
||||
content: section,
|
||||
resources: resources,
|
||||
});
|
||||
})
|
||||
|
||||
stream.pipe(saxStream);
|
||||
});
|
||||
}
|
||||
|
||||
function setTableCellContent(table) {
|
||||
if (!table.type == 'table') throw new Error('Only for tables');
|
||||
|
||||
for (let trIndex = 0; trIndex < table.lines.length; trIndex++) {
|
||||
const tr = table.lines[trIndex];
|
||||
for (let tdIndex = 0; tdIndex < tr.lines.length; tdIndex++) {
|
||||
let td = tr.lines[tdIndex];
|
||||
td.content = processMdArrayNewLines(td.lines);
|
||||
td.content = td.content.replace(/\n\n\n\n\n/g, ' ');
|
||||
td.content = td.content.replace(/\n\n\n\n/g, ' ');
|
||||
td.content = td.content.replace(/\n\n\n/g, ' ');
|
||||
td.content = td.content.replace(/\n\n/g, ' ');
|
||||
td.content = td.content.replace(/\n/g, ' ');
|
||||
}
|
||||
}
|
||||
|
||||
return table;
|
||||
}
|
||||
|
||||
function cellWidth(cellText) {
|
||||
const lines = cellText.split("\n");
|
||||
let maxWidth = 0;
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const line = lines[i];
|
||||
if (line.length > maxWidth) maxWidth = line.length;
|
||||
}
|
||||
return maxWidth;
|
||||
}
|
||||
|
||||
function colWidths(table) {
|
||||
let output = [];
|
||||
for (let trIndex = 0; trIndex < table.lines.length; trIndex++) {
|
||||
const tr = table.lines[trIndex];
|
||||
for (let tdIndex = 0; tdIndex < tr.lines.length; tdIndex++) {
|
||||
const td = tr.lines[tdIndex];
|
||||
const w = cellWidth(td.content);
|
||||
if (output.length <= tdIndex) output.push(0);
|
||||
if (w > output[tdIndex]) output[tdIndex] = w;
|
||||
}
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
function drawTable(table, colWidths) {
|
||||
// | First Header | Second Header |
|
||||
// | ------------- | ------------- |
|
||||
// | Content Cell | Content Cell |
|
||||
// | Content Cell | Content Cell |
|
||||
|
||||
// There must be at least 3 dashes separating each header cell.
|
||||
// https://gist.github.com/IanWang/28965e13cdafdef4e11dc91f578d160d#tables
|
||||
const minColWidth = 3;
|
||||
let lines = [];
|
||||
let headerDone = false;
|
||||
for (let trIndex = 0; trIndex < table.lines.length; trIndex++) {
|
||||
const tr = table.lines[trIndex];
|
||||
const isHeader = tr.isHeader;
|
||||
let line = [];
|
||||
let headerLine = [];
|
||||
let emptyHeader = null;
|
||||
for (let tdIndex = 0; tdIndex < colWidths.length; tdIndex++) {
|
||||
const width = Math.max(minColWidth, colWidths[tdIndex]);
|
||||
const cell = tr.lines[tdIndex] ? tr.lines[tdIndex].content : '';
|
||||
line.push(stringPadding(cell, width, ' ', stringPadding.RIGHT));
|
||||
|
||||
if (!headerDone) {
|
||||
if (!isHeader) {
|
||||
if (!emptyHeader) emptyHeader = [];
|
||||
let h = stringPadding(' ', width, ' ', stringPadding.RIGHT);
|
||||
if (!width) h = '';
|
||||
emptyHeader.push(h);
|
||||
}
|
||||
headerLine.push('-'.repeat(width));
|
||||
}
|
||||
}
|
||||
|
||||
if (emptyHeader) {
|
||||
lines.push('| ' + emptyHeader.join(' | ') + ' |');
|
||||
lines.push('| ' + headerLine.join(' | ') + ' |');
|
||||
headerDone = true;
|
||||
}
|
||||
|
||||
lines.push('| ' + line.join(' | ') + ' |');
|
||||
|
||||
if (!headerDone) {
|
||||
lines.push('| ' + headerLine.join(' | ') + ' |');
|
||||
headerDone = true;
|
||||
}
|
||||
}
|
||||
|
||||
return lines.join('<<<<:D>>>>' + NEWLINE + '<<<<:D>>>>').split('<<<<:D>>>>');
|
||||
}
|
||||
|
||||
async function enexXmlToMd(stream, resources) {
|
||||
let result = await enexXmlToMdArray(stream, resources);
|
||||
|
||||
let mdLines = [];
|
||||
|
||||
for (let i = 0; i < result.content.lines.length; i++) {
|
||||
let line = result.content.lines[i];
|
||||
if (typeof line === 'object') { // A table
|
||||
let table = setTableCellContent(line);
|
||||
//console.log(require('util').inspect(table, false, null))
|
||||
const cw = colWidths(table);
|
||||
const tableLines = drawTable(table, cw);
|
||||
mdLines.push(BLOCK_OPEN);
|
||||
mdLines = mdLines.concat(tableLines);
|
||||
mdLines.push(BLOCK_CLOSE);
|
||||
} else { // an actual line
|
||||
mdLines.push(line);
|
||||
}
|
||||
}
|
||||
|
||||
let firstAttachment = true;
|
||||
for (let i = 0; i < result.resources.length; i++) {
|
||||
let r = result.resources[i];
|
||||
if (firstAttachment) mdLines.push(NEWLINE);
|
||||
mdLines.push(NEWLINE);
|
||||
mdLines = addResourceTag(mdLines, r, r.filename);
|
||||
firstAttachment = false;
|
||||
}
|
||||
|
||||
return processMdArrayNewLines(mdLines);
|
||||
}
|
||||
|
||||
module.exports = { enexXmlToMd, processMdArrayNewLines, NEWLINE, addResourceTag };
|
||||
@@ -1,411 +0,0 @@
|
||||
const { uuid } = require('lib/uuid.js');
|
||||
const moment = require('moment');
|
||||
const { promiseChain } = require('lib/promise-utils.js');
|
||||
const { folderItemFilename } = require('lib/string-utils.js');
|
||||
const { BaseModel } = require('lib/base-model.js');
|
||||
const { Note } = require('lib/models/note.js');
|
||||
const { Tag } = require('lib/models/tag.js');
|
||||
const { Resource } = require('lib/models/resource.js');
|
||||
const { Folder } = require('lib/models/folder.js');
|
||||
const { enexXmlToMd } = require('./import-enex-md-gen.js');
|
||||
const { time } = require('lib/time-utils.js');
|
||||
const Levenshtein = require('levenshtein');
|
||||
const jsSHA = require("jssha");
|
||||
|
||||
const Promise = require('promise');
|
||||
const fs = require('fs-extra');
|
||||
const stringToStream = require('string-to-stream')
|
||||
|
||||
function dateToTimestamp(s, zeroIfInvalid = false) {
|
||||
let m = moment(s, 'YYYYMMDDTHHmmssZ');
|
||||
if (!m.isValid()) {
|
||||
if (zeroIfInvalid) return 0;
|
||||
throw new Error('Invalid date: ' + s);
|
||||
}
|
||||
return m.toDate().getTime();
|
||||
}
|
||||
|
||||
function extractRecognitionObjId(recognitionXml) {
|
||||
const r = recognitionXml.match(/objID="(.*?)"/);
|
||||
return r && r.length >= 2 ? r[1] : null;
|
||||
}
|
||||
|
||||
function filePutContents(filePath, content) {
|
||||
return fs.writeFile(filePath, content);
|
||||
}
|
||||
|
||||
function removeUndefinedProperties(note) {
|
||||
let output = {};
|
||||
for (let n in note) {
|
||||
if (!note.hasOwnProperty(n)) continue;
|
||||
let v = note[n];
|
||||
if (v === undefined || v === null) continue;
|
||||
output[n] = v;
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
function createNoteId(note) {
|
||||
let shaObj = new jsSHA("SHA-256", "TEXT");
|
||||
shaObj.update(note.title + '_' + note.body + "_" + note.created_time + "_" + note.updated_time + "_");
|
||||
let hash = shaObj.getHash("HEX");
|
||||
return hash.substr(0, 32);
|
||||
}
|
||||
|
||||
function levenshteinPercent(s1, s2) {
|
||||
let l = new Levenshtein(s1, s2);
|
||||
if (!s1.length || !s2.length) return 1;
|
||||
return Math.abs(l.distance / s1.length);
|
||||
}
|
||||
|
||||
async function fuzzyMatch(note) {
|
||||
if (note.created_time < time.unixMs() - 1000 * 60 * 60 * 24 * 360) {
|
||||
let notes = await Note.modelSelectAll('SELECT * FROM notes WHERE is_conflict = 0 AND created_time = ? AND title = ?', [note.created_time, note.title]);
|
||||
return notes.length !== 1 ? null : notes[0];
|
||||
}
|
||||
|
||||
let notes = await Note.modelSelectAll('SELECT * FROM notes WHERE is_conflict = 0 AND created_time = ?', [note.created_time]);
|
||||
if (notes.length === 0) return null;
|
||||
if (notes.length === 1) return notes[0];
|
||||
|
||||
let lowestL = 1;
|
||||
let lowestN = null;
|
||||
for (let i = 0; i < notes.length; i++) {
|
||||
let n = notes[i];
|
||||
let l = levenshteinPercent(note.title, n.title);
|
||||
if (l < lowestL) {
|
||||
lowestL = l;
|
||||
lowestN = n;
|
||||
}
|
||||
}
|
||||
|
||||
if (lowestN && lowestL < 0.2) return lowestN;
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function saveNoteResources(note) {
|
||||
let resourcesCreated = 0;
|
||||
for (let i = 0; i < note.resources.length; i++) {
|
||||
let resource = note.resources[i];
|
||||
let toSave = Object.assign({}, resource);
|
||||
delete toSave.data;
|
||||
|
||||
// The same resource sometimes appear twice in the same enex (exact same ID and file).
|
||||
// In that case, just skip it - it means two different notes might be linked to the
|
||||
// same resource.
|
||||
let existingResource = await Resource.load(toSave.id);
|
||||
if (existingResource) continue;
|
||||
|
||||
await filePutContents(Resource.fullPath(toSave), resource.data)
|
||||
await Resource.save(toSave, { isNew: true });
|
||||
resourcesCreated++;
|
||||
}
|
||||
return resourcesCreated;
|
||||
}
|
||||
|
||||
async function saveNoteTags(note) {
|
||||
let notesTagged = 0;
|
||||
for (let i = 0; i < note.tags.length; i++) {
|
||||
let tagTitle = note.tags[i];
|
||||
|
||||
let tag = await Tag.loadByTitle(tagTitle);
|
||||
if (!tag) tag = await Tag.save({ title: tagTitle });
|
||||
|
||||
await Tag.addNote(tag.id, note.id);
|
||||
|
||||
notesTagged++;
|
||||
}
|
||||
return notesTagged;
|
||||
}
|
||||
|
||||
async function saveNoteToStorage(note, fuzzyMatching = false) {
|
||||
note = Note.filter(note);
|
||||
|
||||
let existingNote = fuzzyMatching ? await fuzzyMatch(note) : null;
|
||||
|
||||
let result = {
|
||||
noteCreated: false,
|
||||
noteUpdated: false,
|
||||
noteSkipped: false,
|
||||
resourcesCreated: 0,
|
||||
notesTagged: 0,
|
||||
};
|
||||
|
||||
let resourcesCreated = await saveNoteResources(note);
|
||||
result.resourcesCreated += resourcesCreated;
|
||||
|
||||
let notesTagged = await saveNoteTags(note);
|
||||
result.notesTagged += notesTagged;
|
||||
|
||||
if (existingNote) {
|
||||
let diff = BaseModel.diffObjects(existingNote, note);
|
||||
delete diff.tags;
|
||||
delete diff.resources;
|
||||
delete diff.id;
|
||||
|
||||
if (!Object.getOwnPropertyNames(diff).length) {
|
||||
result.noteSkipped = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
diff.id = existingNote.id;
|
||||
diff.type_ = existingNote.type_;
|
||||
await Note.save(diff, { autoTimestamp: false })
|
||||
result.noteUpdated = true;
|
||||
} else {
|
||||
await Note.save(note, {
|
||||
isNew: true,
|
||||
autoTimestamp: false,
|
||||
});
|
||||
result.noteCreated = true;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
function importEnex(parentFolderId, filePath, importOptions = null) {
|
||||
if (!importOptions) importOptions = {};
|
||||
if (!('fuzzyMatching' in importOptions)) importOptions.fuzzyMatching = false;
|
||||
if (!('onProgress' in importOptions)) importOptions.onProgress = function(state) {};
|
||||
if (!('onError' in importOptions)) importOptions.onError = function(error) {};
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
let progressState = {
|
||||
loaded: 0,
|
||||
created: 0,
|
||||
updated: 0,
|
||||
skipped: 0,
|
||||
resourcesCreated: 0,
|
||||
notesTagged: 0,
|
||||
};
|
||||
|
||||
let stream = fs.createReadStream(filePath);
|
||||
|
||||
let options = {};
|
||||
let strict = true;
|
||||
let saxStream = require('sax').createStream(strict, options);
|
||||
|
||||
let nodes = []; // LIFO list of nodes so that we know in which node we are in the onText event
|
||||
let note = null;
|
||||
let noteAttributes = null;
|
||||
let noteResource = null;
|
||||
let noteResourceAttributes = null;
|
||||
let noteResourceRecognition = null;
|
||||
let notes = [];
|
||||
let processingNotes = false;
|
||||
|
||||
stream.on('error', (error) => {
|
||||
reject(new Error(error.toString()));
|
||||
});
|
||||
|
||||
function currentNodeName() {
|
||||
if (!nodes.length) return null;
|
||||
return nodes[nodes.length - 1].name;
|
||||
}
|
||||
|
||||
function currentNodeAttributes() {
|
||||
if (!nodes.length) return {};
|
||||
return nodes[nodes.length - 1].attributes;
|
||||
}
|
||||
|
||||
async function processNotes() {
|
||||
if (processingNotes) return false;
|
||||
|
||||
processingNotes = true;
|
||||
stream.pause();
|
||||
|
||||
let chain = [];
|
||||
while (notes.length) {
|
||||
let note = notes.shift();
|
||||
const contentStream = stringToStream(note.bodyXml);
|
||||
chain.push(() => {
|
||||
return enexXmlToMd(contentStream, note.resources).then((body) => {
|
||||
delete note.bodyXml;
|
||||
|
||||
// console.info('-----------------------------------------------------------');
|
||||
// console.info(body);
|
||||
// console.info('-----------------------------------------------------------');
|
||||
|
||||
note.id = uuid.create();
|
||||
note.parent_id = parentFolderId;
|
||||
note.body = body;
|
||||
|
||||
// Notes in enex files always have a created timestamp but not always an
|
||||
// updated timestamp (it the note has never been modified). For sync
|
||||
// we require an updated_time property, so set it to create_time in that case
|
||||
if (!note.updated_time) note.updated_time = note.created_time;
|
||||
|
||||
return saveNoteToStorage(note, importOptions.fuzzyMatching);
|
||||
}).then((result) => {
|
||||
if (result.noteUpdated) {
|
||||
progressState.updated++;
|
||||
} else if (result.noteCreated) {
|
||||
progressState.created++;
|
||||
} else if (result.noteSkipped) {
|
||||
progressState.skipped++;
|
||||
}
|
||||
progressState.resourcesCreated += result.resourcesCreated;
|
||||
progressState.notesTagged += result.notesTagged;
|
||||
importOptions.onProgress(progressState);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
return promiseChain(chain).then(() => {
|
||||
stream.resume();
|
||||
processingNotes = false;
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
saxStream.on('error', (error) => {
|
||||
importOptions.onError(error);
|
||||
});
|
||||
|
||||
saxStream.on('text', function(text) {
|
||||
let n = currentNodeName();
|
||||
|
||||
if (noteAttributes) {
|
||||
noteAttributes[n] = text;
|
||||
} else if (noteResourceAttributes) {
|
||||
noteResourceAttributes[n] = text;
|
||||
} else if (noteResource) {
|
||||
if (n == 'data') {
|
||||
let attr = currentNodeAttributes();
|
||||
noteResource.dataEncoding = attr.encoding;
|
||||
}
|
||||
noteResource[n] = text;
|
||||
} else if (note) {
|
||||
if (n == 'title') {
|
||||
note.title = text;
|
||||
} else if (n == 'created') {
|
||||
note.created_time = dateToTimestamp(text);
|
||||
} else if (n == 'updated') {
|
||||
note.updated_time = dateToTimestamp(text);
|
||||
} else if (n == 'tag') {
|
||||
note.tags.push(text);
|
||||
} else {
|
||||
console.warn('Unsupported note tag: ' + n);
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
saxStream.on('opentag', function(node) {
|
||||
let n = node.name.toLowerCase();
|
||||
nodes.push(node);
|
||||
|
||||
if (n == 'note') {
|
||||
note = {
|
||||
resources: [],
|
||||
tags: [],
|
||||
};
|
||||
} else if (n == 'resource-attributes') {
|
||||
noteResourceAttributes = {};
|
||||
} else if (n == 'recognition') {
|
||||
if (noteResource) noteResourceRecognition = {};
|
||||
} else if (n == 'note-attributes') {
|
||||
noteAttributes = {};
|
||||
} else if (n == 'resource') {
|
||||
noteResource = {};
|
||||
}
|
||||
});
|
||||
|
||||
saxStream.on('cdata', function(data) {
|
||||
let n = currentNodeName();
|
||||
|
||||
if (noteResourceRecognition) {
|
||||
noteResourceRecognition.objID = extractRecognitionObjId(data);
|
||||
} else if (note) {
|
||||
if (n == 'content') {
|
||||
note.bodyXml = data;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
saxStream.on('closetag', function(n) {
|
||||
nodes.pop();
|
||||
|
||||
if (n == 'note') {
|
||||
note = removeUndefinedProperties(note);
|
||||
|
||||
progressState.loaded++;
|
||||
importOptions.onProgress(progressState);
|
||||
|
||||
notes.push(note);
|
||||
|
||||
if (notes.length >= 10) {
|
||||
processNotes().catch((error) => {
|
||||
importOptions.onError(error);
|
||||
});
|
||||
}
|
||||
note = null;
|
||||
} else if (n == 'recognition' && noteResource) {
|
||||
noteResource.id = noteResourceRecognition.objID;
|
||||
noteResourceRecognition = null;
|
||||
} else if (n == 'resource-attributes') {
|
||||
noteResource.filename = noteResourceAttributes['file-name'];
|
||||
noteResourceAttributes = null;
|
||||
} else if (n == 'note-attributes') {
|
||||
note.latitude = noteAttributes.latitude;
|
||||
note.longitude = noteAttributes.longitude;
|
||||
note.altitude = noteAttributes.altitude;
|
||||
note.author = noteAttributes.author;
|
||||
note.is_todo = !!noteAttributes['reminder-order'];
|
||||
note.todo_due = dateToTimestamp(noteAttributes['reminder-time'], true);
|
||||
note.todo_completed = dateToTimestamp(noteAttributes['reminder-done-time'], true);
|
||||
note.order = dateToTimestamp(noteAttributes['reminder-order'], true);
|
||||
note.source = !!noteAttributes.source ? 'evernote.' + noteAttributes.source : 'evernote';
|
||||
|
||||
// if (noteAttributes['reminder-time']) {
|
||||
// console.info('======================================================');
|
||||
// console.info(noteAttributes);
|
||||
// console.info('------------------------------------------------------');
|
||||
// console.info(note);
|
||||
// console.info('======================================================');
|
||||
// }
|
||||
|
||||
noteAttributes = null;
|
||||
} else if (n == 'resource') {
|
||||
let decodedData = null;
|
||||
if (noteResource.dataEncoding == 'base64') {
|
||||
try {
|
||||
decodedData = Buffer.from(noteResource.data, 'base64');
|
||||
} catch (error) {
|
||||
importOptions.onError(error);
|
||||
}
|
||||
} else {
|
||||
importOptions.onError(new Error('Cannot decode resource with encoding: ' + noteResource.dataEncoding));
|
||||
decodedData = noteResource.data; // Just put the encoded data directly in the file so it can, potentially, be manually decoded later
|
||||
}
|
||||
|
||||
let r = {
|
||||
id: noteResource.id,
|
||||
data: decodedData,
|
||||
mime: noteResource.mime,
|
||||
title: noteResource.filename ? noteResource.filename : '',
|
||||
filename: noteResource.filename ? noteResource.filename : '',
|
||||
};
|
||||
|
||||
note.resources.push(r);
|
||||
noteResource = null;
|
||||
}
|
||||
});
|
||||
|
||||
saxStream.on('end', function() {
|
||||
// Wait till there is no more notes to process.
|
||||
let iid = setInterval(() => {
|
||||
processNotes().then((allDone) => {
|
||||
if (allDone) {
|
||||
clearTimeout(iid);
|
||||
resolve();
|
||||
}
|
||||
});
|
||||
}, 500);
|
||||
});
|
||||
|
||||
stream.pipe(saxStream);
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = { importEnex };
|
||||
Reference in New Issue
Block a user