You've already forked joplin
mirror of
https://github.com/laurent22/joplin.git
synced 2025-07-16 00:14:34 +02:00
Merge pull request #409 from petrz12/better-evernote-import
Merge monospace text lines when importing from Evernote
This commit is contained in:
@ -5,8 +5,217 @@ const BLOCK_CLOSE = "[[BLOCK_CLOSE]]";
|
|||||||
const NEWLINE = "[[NEWLINE]]";
|
const NEWLINE = "[[NEWLINE]]";
|
||||||
const NEWLINE_MERGED = "[[MERGED]]";
|
const NEWLINE_MERGED = "[[MERGED]]";
|
||||||
const SPACE = "[[SPACE]]";
|
const SPACE = "[[SPACE]]";
|
||||||
|
// For monospace font detection (Courier, Menlo, Moncaco)
|
||||||
|
const MONOSPACE_OPEN = "[[MONOSPACE_OPEN]]";
|
||||||
|
const MONOSPACE_CLOSE = "[[MONOSPACE_CLOSE]]";
|
||||||
|
|
||||||
|
// This function will return a list of all monospace sections with a flag saying whether they can be merged or not
|
||||||
|
function findMonospaceSections(md) {
|
||||||
|
let temp = [];
|
||||||
|
|
||||||
|
let sections = [];
|
||||||
|
let section = null;
|
||||||
|
// This variable is used twice: to detected if a newline is between monospace sections and if a newline is inside monospace section
|
||||||
|
let mergeWithPrevious = true;
|
||||||
|
|
||||||
|
let last = "";
|
||||||
|
for (let i = 0; i < md.length; i++) {
|
||||||
|
let v = md[i];
|
||||||
|
|
||||||
|
if (v == MONOSPACE_OPEN) {
|
||||||
|
if (section != null) throw new Error('Monospace open tag detected while the previous was not closed'); // Sanity check, but normally not possible
|
||||||
|
|
||||||
|
let monospaceSection = {
|
||||||
|
openIndex: null,
|
||||||
|
closeIndex: null,
|
||||||
|
mergeAllowed: true,
|
||||||
|
mergeWithPrevious: mergeWithPrevious,
|
||||||
|
isEmptyLine: false,
|
||||||
|
}
|
||||||
|
section = monospaceSection;
|
||||||
|
|
||||||
|
// Remember where monospace section begins, later it will be replaced with appropriate markdown (` or ```)
|
||||||
|
section.openIndex = temp.push(v) - 1;
|
||||||
|
// Add an empty string, it can be later replaced with newline if necessary
|
||||||
|
temp.push("");
|
||||||
|
|
||||||
|
if (last != BLOCK_OPEN) {
|
||||||
|
// We cannot merge inline code
|
||||||
|
section.mergeAllowed = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset to detect if monospace section contains a newline
|
||||||
|
mergeWithPrevious = true;
|
||||||
|
|
||||||
|
} else if (v == MONOSPACE_CLOSE) {
|
||||||
|
if (section == null) throw new Error('Monospace tag was closed without being open before'); // Sanity check, but normally not possible
|
||||||
|
if (section.closeIndex != null) throw new Error('Monospace tag is closed for the second time'); // Sanity check, but normally not possible
|
||||||
|
|
||||||
|
// Add an empty string, it can be later replaced with newline if necessary
|
||||||
|
temp.push("");
|
||||||
|
// Remember where monospace section ends, later it will be replaced with appropriate markdown (` or ```)
|
||||||
|
section.closeIndex = temp.push(v) - 1;
|
||||||
|
|
||||||
|
if (md[i+1] != BLOCK_CLOSE) {
|
||||||
|
// We cannot merge inline code
|
||||||
|
section.mergeAllowed = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
section.isEmptyLine = mergeWithPrevious;
|
||||||
|
sections.push(section);
|
||||||
|
|
||||||
|
// Reset
|
||||||
|
section = null;
|
||||||
|
mergeWithPrevious = true;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// We can merge only if monospace sections are separated by newlines
|
||||||
|
if (v != NEWLINE && v != BLOCK_OPEN && v != BLOCK_CLOSE) {
|
||||||
|
mergeWithPrevious = false;
|
||||||
|
}
|
||||||
|
temp.push(v);
|
||||||
|
}
|
||||||
|
last = v;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
md: temp,
|
||||||
|
monospaceSections: sections,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function is looping over monospace sections and merging what it can merge
|
||||||
|
function mergeMonospaceSections(md, sections) {
|
||||||
|
|
||||||
|
const USE_BLOCK_TAG = 1;
|
||||||
|
const USE_INLINE_TAG = 2;
|
||||||
|
const USE_EMPTY_TAG = 3;
|
||||||
|
|
||||||
|
const toMonospace = (md, section, startTag, endTag) => {
|
||||||
|
|
||||||
|
// It looks better when empty lines are not inlined
|
||||||
|
if (startTag == USE_INLINE_TAG && section.isEmptyLine) {
|
||||||
|
startTag = USE_EMPTY_TAG;
|
||||||
|
endTag = USE_EMPTY_TAG;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (startTag) {
|
||||||
|
case USE_BLOCK_TAG:
|
||||||
|
md[section.openIndex] = "```";
|
||||||
|
md[section.openIndex + 1] = NEWLINE;
|
||||||
|
break;
|
||||||
|
case USE_INLINE_TAG:
|
||||||
|
md[section.openIndex] = "`";
|
||||||
|
break;
|
||||||
|
case USE_EMPTY_TAG:
|
||||||
|
md[section.openIndex] = "";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
switch (endTag) {
|
||||||
|
case USE_BLOCK_TAG:
|
||||||
|
// We don't add a NEWLINE if there already is a NEWLINE
|
||||||
|
if (md[section.closeIndex - 2] == NEWLINE) {
|
||||||
|
md[section.closeIndex - 1] = "";
|
||||||
|
} else {
|
||||||
|
md[section.closeIndex - 1] = NEWLINE;
|
||||||
|
}
|
||||||
|
md[section.closeIndex] = "```";
|
||||||
|
break;
|
||||||
|
case USE_INLINE_TAG:
|
||||||
|
md[section.closeIndex] = "`";
|
||||||
|
break;
|
||||||
|
case USE_EMPTY_TAG:
|
||||||
|
md[section.closeIndex] = "";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const getSection = () => {
|
||||||
|
return sections.shift();
|
||||||
|
}
|
||||||
|
|
||||||
|
const getMergeableSection = (first = null) => {
|
||||||
|
if (first) {
|
||||||
|
sections.unshift(first);
|
||||||
|
}
|
||||||
|
while (sections.length) {
|
||||||
|
s = sections.shift();
|
||||||
|
if (s.mergeAllowed) {
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
// If cannot merge then convert into inline code
|
||||||
|
toMonospace(md, s, USE_INLINE_TAG, USE_INLINE_TAG);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
let left = getMergeableSection();
|
||||||
|
let right = null;
|
||||||
|
|
||||||
|
while (left) {
|
||||||
|
let isFirst = true;
|
||||||
|
|
||||||
|
right = getSection();
|
||||||
|
while (right && right.mergeAllowed && right.mergeWithPrevious) {
|
||||||
|
// We can merge left and right
|
||||||
|
if (isFirst) {
|
||||||
|
isFirst = false;
|
||||||
|
// First section
|
||||||
|
toMonospace(md, left, USE_BLOCK_TAG, USE_EMPTY_TAG);
|
||||||
|
} else {
|
||||||
|
// Middle section
|
||||||
|
toMonospace(md, left, USE_EMPTY_TAG, USE_EMPTY_TAG);
|
||||||
|
}
|
||||||
|
left = right;
|
||||||
|
right = getSection();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isFirst) {
|
||||||
|
// Could not merge, convert to inline code
|
||||||
|
toMonospace(md, left, USE_INLINE_TAG, USE_INLINE_TAG);
|
||||||
|
} else {
|
||||||
|
// Was merged, add block end tag
|
||||||
|
toMonospace(md, left, USE_EMPTY_TAG, USE_BLOCK_TAG);
|
||||||
|
}
|
||||||
|
|
||||||
|
left = getMergeableSection(right);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function will try to merge monospace sections
|
||||||
|
// It works in two phases:
|
||||||
|
// 1) It will find all monospace sections
|
||||||
|
// 2) It will merge all monospace sections where merge is allowed
|
||||||
|
function mergeMonospaceSectionsWrapper(md, ignoreMonospace = false) {
|
||||||
|
|
||||||
|
if (!ignoreMonospace) {
|
||||||
|
const result = findMonospaceSections(md);
|
||||||
|
|
||||||
|
if (result.monospaceSections.length > 0) {
|
||||||
|
mergeMonospaceSections(result.md, result.monospaceSections);
|
||||||
|
}
|
||||||
|
md = result.md;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove empty items, it is necessary for correct function of newline merging happening outside this function
|
||||||
|
let temp = []
|
||||||
|
for (let i = 0; i < md.length; i++) {
|
||||||
|
let v = md[i];
|
||||||
|
if (ignoreMonospace && (v == MONOSPACE_OPEN || v == MONOSPACE_CLOSE)) {
|
||||||
|
continue; // skip
|
||||||
|
}
|
||||||
|
if (v != "") {
|
||||||
|
temp.push(v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
function processMdArrayNewLines(md, isTable = false) {
|
||||||
|
// Try to merge MONOSPACE sections, works good when when not parsing a table
|
||||||
|
md = mergeMonospaceSectionsWrapper(md, isTable);
|
||||||
|
|
||||||
function processMdArrayNewLines(md) {
|
|
||||||
while (md.length && md[0] == BLOCK_OPEN) {
|
while (md.length && md[0] == BLOCK_OPEN) {
|
||||||
md.shift();
|
md.shift();
|
||||||
}
|
}
|
||||||
@ -287,6 +496,8 @@ function enexXmlToMdArray(stream, resources) {
|
|||||||
let state = {
|
let state = {
|
||||||
inCode: false,
|
inCode: false,
|
||||||
inQuote: false,
|
inQuote: false,
|
||||||
|
inMonospaceFont: false,
|
||||||
|
inCodeblock: 0,
|
||||||
lists: [],
|
lists: [],
|
||||||
anchorAttributes: [],
|
anchorAttributes: [],
|
||||||
};
|
};
|
||||||
@ -315,6 +526,19 @@ function enexXmlToMdArray(stream, resources) {
|
|||||||
const nodeAttributes = attributeToLowerCase(node);
|
const nodeAttributes = attributeToLowerCase(node);
|
||||||
|
|
||||||
let n = node.name.toLowerCase();
|
let n = node.name.toLowerCase();
|
||||||
|
|
||||||
|
if (n == "div") {
|
||||||
|
// div tags are recursive, in order to find the end we have to count the depth
|
||||||
|
if (state.inCodeblock > 0) {
|
||||||
|
state.inCodeblock++;
|
||||||
|
} else if (nodeAttributes && nodeAttributes.style && nodeAttributes.style.indexOf("box-sizing: border-box") >= 0) {
|
||||||
|
// Evernote code block start
|
||||||
|
state.inCodeblock = 1;
|
||||||
|
section.lines.push("```");
|
||||||
|
return; // skip further processing
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (n == 'en-note') {
|
if (n == 'en-note') {
|
||||||
// Start of note
|
// Start of note
|
||||||
} else if (isBlockTag(n)) {
|
} else if (isBlockTag(n)) {
|
||||||
@ -502,6 +726,26 @@ function enexXmlToMdArray(stream, resources) {
|
|||||||
if (resource && !!resource.id) {
|
if (resource && !!resource.id) {
|
||||||
section.lines = addResourceTag(section.lines, resource, nodeAttributes.alt);
|
section.lines = addResourceTag(section.lines, resource, nodeAttributes.alt);
|
||||||
}
|
}
|
||||||
|
} else if (n == "span" || n == "font") {
|
||||||
|
// Check for monospace font. It can come from being specified in either from
|
||||||
|
// <span style="..."> or <font face="...">.
|
||||||
|
// Monospace sections are already in monospace for Evernote code blocks
|
||||||
|
if (state.inCodeblock == 0 && nodeAttributes) {
|
||||||
|
let style = null;
|
||||||
|
|
||||||
|
if (nodeAttributes.style) {
|
||||||
|
style = nodeAttributes.style.toLowerCase();
|
||||||
|
} else if (nodeAttributes.face) {
|
||||||
|
style = nodeAttributes.face.toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
monospace = style.match(/monospace|courier|menlo|monaco/) != null;
|
||||||
|
|
||||||
|
if (monospace) {
|
||||||
|
state.inMonospaceFont = true;
|
||||||
|
section.lines.push(MONOSPACE_OPEN);
|
||||||
|
}
|
||||||
|
}
|
||||||
} else if (["span", "font", 'sup', 'cite', 'abbr', 'small', 'tt', 'sub', 'colgroup', 'col', 'ins', 'caption', 'var', 'map', 'area'].indexOf(n) >= 0) {
|
} else if (["span", "font", 'sup', 'cite', 'abbr', 'small', 'tt', 'sub', 'colgroup', 'col', 'ins', 'caption', 'var', 'map', 'area'].indexOf(n) >= 0) {
|
||||||
// Inline tags that can be ignored in Markdown
|
// Inline tags that can be ignored in Markdown
|
||||||
} else {
|
} else {
|
||||||
@ -512,6 +756,17 @@ function enexXmlToMdArray(stream, resources) {
|
|||||||
saxStream.on('closetag', function(n) {
|
saxStream.on('closetag', function(n) {
|
||||||
n = n ? n.toLowerCase() : n;
|
n = n ? n.toLowerCase() : n;
|
||||||
|
|
||||||
|
if (n == "div") {
|
||||||
|
if (state.inCodeblock >= 1) {
|
||||||
|
state.inCodeblock--;
|
||||||
|
if (state.inCodeblock == 0) {
|
||||||
|
// Evernote code block end
|
||||||
|
section.lines.push("```");
|
||||||
|
return; // skip further processing
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (n == 'en-note') {
|
if (n == 'en-note') {
|
||||||
// End of note
|
// End of note
|
||||||
} else if (isNewLineOnlyEndTag(n)) {
|
} else if (isNewLineOnlyEndTag(n)) {
|
||||||
@ -522,6 +777,11 @@ function enexXmlToMdArray(stream, resources) {
|
|||||||
if (section && section.parent) section = section.parent;
|
if (section && section.parent) section = section.parent;
|
||||||
} else if (n == 'table') {
|
} else if (n == 'table') {
|
||||||
if (section && section.parent) section = section.parent;
|
if (section && section.parent) section = section.parent;
|
||||||
|
} else if (n == "span" || n == "font") {
|
||||||
|
if (state.inCodeblock == 0 && state.inMonospaceFont) {
|
||||||
|
state.inMonospaceFont = false;
|
||||||
|
section.lines.push(MONOSPACE_CLOSE);
|
||||||
|
}
|
||||||
} else if (isIgnoredEndTag(n)) {
|
} else if (isIgnoredEndTag(n)) {
|
||||||
// Skip
|
// Skip
|
||||||
} else if (isListTag(n)) {
|
} else if (isListTag(n)) {
|
||||||
@ -662,7 +922,7 @@ function drawTable(table) {
|
|||||||
|
|
||||||
const renderCurrentCells = () => {
|
const renderCurrentCells = () => {
|
||||||
if (!currentCells.length) return;
|
if (!currentCells.length) return;
|
||||||
const cellText = processMdArrayNewLines(currentCells);
|
const cellText = processMdArrayNewLines(currentCells, true);
|
||||||
line.push(cellText);
|
line.push(cellText);
|
||||||
currentCells = [];
|
currentCells = [];
|
||||||
}
|
}
|
||||||
@ -685,7 +945,7 @@ function drawTable(table) {
|
|||||||
|
|
||||||
// A cell in a Markdown table cannot have actual new lines so replace
|
// A cell in a Markdown table cannot have actual new lines so replace
|
||||||
// them with <br>, which are supported by the markdown renderers.
|
// them with <br>, which are supported by the markdown renderers.
|
||||||
let cellText = processMdArrayNewLines(td.lines).replace(/\n+/g, "<br>");
|
let cellText = processMdArrayNewLines(td.lines, true).replace(/\n+/g, "<br>");
|
||||||
|
|
||||||
// Inside tables cells, "|" needs to be escaped
|
// Inside tables cells, "|" needs to be escaped
|
||||||
cellText = cellText.replace(/\|/g, "\\|");
|
cellText = cellText.replace(/\|/g, "\\|");
|
||||||
|
Reference in New Issue
Block a user