1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-02-04 19:16:07 +02:00

Merge pull request #409 from petrz12/better-evernote-import

Merge monospace text lines when importing from Evernote
This commit is contained in:
Laurent Cozic 2018-05-11 15:25:26 +01:00 committed by GitHub
commit 0f0ff86ffa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -5,8 +5,217 @@ const BLOCK_CLOSE = "[[BLOCK_CLOSE]]";
const NEWLINE = "[[NEWLINE]]";
const NEWLINE_MERGED = "[[MERGED]]";
const SPACE = "[[SPACE]]";
// For monospace font detection (Courier, Menlo, Moncaco)
const MONOSPACE_OPEN = "[[MONOSPACE_OPEN]]";
const MONOSPACE_CLOSE = "[[MONOSPACE_CLOSE]]";
// This function will return a list of all monospace sections with a flag saying whether they can be merged or not
function findMonospaceSections(md) {
let temp = [];
let sections = [];
let section = null;
// This variable is used twice: to detected if a newline is between monospace sections and if a newline is inside monospace section
let mergeWithPrevious = true;
let last = "";
for (let i = 0; i < md.length; i++) {
let v = md[i];
if (v == MONOSPACE_OPEN) {
if (section != null) throw new Error('Monospace open tag detected while the previous was not closed'); // Sanity check, but normally not possible
let monospaceSection = {
openIndex: null,
closeIndex: null,
mergeAllowed: true,
mergeWithPrevious: mergeWithPrevious,
isEmptyLine: false,
}
section = monospaceSection;
// Remember where monospace section begins, later it will be replaced with appropriate markdown (` or ```)
section.openIndex = temp.push(v) - 1;
// Add an empty string, it can be later replaced with newline if necessary
temp.push("");
if (last != BLOCK_OPEN) {
// We cannot merge inline code
section.mergeAllowed = false;
}
// Reset to detect if monospace section contains a newline
mergeWithPrevious = true;
} else if (v == MONOSPACE_CLOSE) {
if (section == null) throw new Error('Monospace tag was closed without being open before'); // Sanity check, but normally not possible
if (section.closeIndex != null) throw new Error('Monospace tag is closed for the second time'); // Sanity check, but normally not possible
// Add an empty string, it can be later replaced with newline if necessary
temp.push("");
// Remember where monospace section ends, later it will be replaced with appropriate markdown (` or ```)
section.closeIndex = temp.push(v) - 1;
if (md[i+1] != BLOCK_CLOSE) {
// We cannot merge inline code
section.mergeAllowed = false;
}
section.isEmptyLine = mergeWithPrevious;
sections.push(section);
// Reset
section = null;
mergeWithPrevious = true;
} else {
// We can merge only if monospace sections are separated by newlines
if (v != NEWLINE && v != BLOCK_OPEN && v != BLOCK_CLOSE) {
mergeWithPrevious = false;
}
temp.push(v);
}
last = v;
}
return {
md: temp,
monospaceSections: sections,
};
}
// This function is looping over monospace sections and merging what it can merge
function mergeMonospaceSections(md, sections) {
const USE_BLOCK_TAG = 1;
const USE_INLINE_TAG = 2;
const USE_EMPTY_TAG = 3;
const toMonospace = (md, section, startTag, endTag) => {
// It looks better when empty lines are not inlined
if (startTag == USE_INLINE_TAG && section.isEmptyLine) {
startTag = USE_EMPTY_TAG;
endTag = USE_EMPTY_TAG;
}
switch (startTag) {
case USE_BLOCK_TAG:
md[section.openIndex] = "```";
md[section.openIndex + 1] = NEWLINE;
break;
case USE_INLINE_TAG:
md[section.openIndex] = "`";
break;
case USE_EMPTY_TAG:
md[section.openIndex] = "";
break;
}
switch (endTag) {
case USE_BLOCK_TAG:
// We don't add a NEWLINE if there already is a NEWLINE
if (md[section.closeIndex - 2] == NEWLINE) {
md[section.closeIndex - 1] = "";
} else {
md[section.closeIndex - 1] = NEWLINE;
}
md[section.closeIndex] = "```";
break;
case USE_INLINE_TAG:
md[section.closeIndex] = "`";
break;
case USE_EMPTY_TAG:
md[section.closeIndex] = "";
break;
}
}
const getSection = () => {
return sections.shift();
}
const getMergeableSection = (first = null) => {
if (first) {
sections.unshift(first);
}
while (sections.length) {
s = sections.shift();
if (s.mergeAllowed) {
return s;
}
// If cannot merge then convert into inline code
toMonospace(md, s, USE_INLINE_TAG, USE_INLINE_TAG);
}
return null;
}
let left = getMergeableSection();
let right = null;
while (left) {
let isFirst = true;
right = getSection();
while (right && right.mergeAllowed && right.mergeWithPrevious) {
// We can merge left and right
if (isFirst) {
isFirst = false;
// First section
toMonospace(md, left, USE_BLOCK_TAG, USE_EMPTY_TAG);
} else {
// Middle section
toMonospace(md, left, USE_EMPTY_TAG, USE_EMPTY_TAG);
}
left = right;
right = getSection();
}
if (isFirst) {
// Could not merge, convert to inline code
toMonospace(md, left, USE_INLINE_TAG, USE_INLINE_TAG);
} else {
// Was merged, add block end tag
toMonospace(md, left, USE_EMPTY_TAG, USE_BLOCK_TAG);
}
left = getMergeableSection(right);
}
}
// This function will try to merge monospace sections
// It works in two phases:
// 1) It will find all monospace sections
// 2) It will merge all monospace sections where merge is allowed
function mergeMonospaceSectionsWrapper(md, ignoreMonospace = false) {
if (!ignoreMonospace) {
const result = findMonospaceSections(md);
if (result.monospaceSections.length > 0) {
mergeMonospaceSections(result.md, result.monospaceSections);
}
md = result.md;
}
// Remove empty items, it is necessary for correct function of newline merging happening outside this function
let temp = []
for (let i = 0; i < md.length; i++) {
let v = md[i];
if (ignoreMonospace && (v == MONOSPACE_OPEN || v == MONOSPACE_CLOSE)) {
continue; // skip
}
if (v != "") {
temp.push(v);
}
}
return temp;
}
function processMdArrayNewLines(md, isTable = false) {
// Try to merge MONOSPACE sections, works good when when not parsing a table
md = mergeMonospaceSectionsWrapper(md, isTable);
function processMdArrayNewLines(md) {
while (md.length && md[0] == BLOCK_OPEN) {
md.shift();
}
@ -287,6 +496,8 @@ function enexXmlToMdArray(stream, resources) {
let state = {
inCode: false,
inQuote: false,
inMonospaceFont: false,
inCodeblock: 0,
lists: [],
anchorAttributes: [],
};
@ -315,6 +526,19 @@ function enexXmlToMdArray(stream, resources) {
const nodeAttributes = attributeToLowerCase(node);
let n = node.name.toLowerCase();
if (n == "div") {
// div tags are recursive, in order to find the end we have to count the depth
if (state.inCodeblock > 0) {
state.inCodeblock++;
} else if (nodeAttributes && nodeAttributes.style && nodeAttributes.style.indexOf("box-sizing: border-box") >= 0) {
// Evernote code block start
state.inCodeblock = 1;
section.lines.push("```");
return; // skip further processing
}
}
if (n == 'en-note') {
// Start of note
} else if (isBlockTag(n)) {
@ -502,6 +726,26 @@ function enexXmlToMdArray(stream, resources) {
if (resource && !!resource.id) {
section.lines = addResourceTag(section.lines, resource, nodeAttributes.alt);
}
} else if (n == "span" || n == "font") {
// Check for monospace font. It can come from being specified in either from
// <span style="..."> or <font face="...">.
// Monospace sections are already in monospace for Evernote code blocks
if (state.inCodeblock == 0 && nodeAttributes) {
let style = null;
if (nodeAttributes.style) {
style = nodeAttributes.style.toLowerCase();
} else if (nodeAttributes.face) {
style = nodeAttributes.face.toLowerCase();
}
monospace = style.match(/monospace|courier|menlo|monaco/) != null;
if (monospace) {
state.inMonospaceFont = true;
section.lines.push(MONOSPACE_OPEN);
}
}
} else if (["span", "font", 'sup', 'cite', 'abbr', 'small', 'tt', 'sub', 'colgroup', 'col', 'ins', 'caption', 'var', 'map', 'area'].indexOf(n) >= 0) {
// Inline tags that can be ignored in Markdown
} else {
@ -512,6 +756,17 @@ function enexXmlToMdArray(stream, resources) {
saxStream.on('closetag', function(n) {
n = n ? n.toLowerCase() : n;
if (n == "div") {
if (state.inCodeblock >= 1) {
state.inCodeblock--;
if (state.inCodeblock == 0) {
// Evernote code block end
section.lines.push("```");
return; // skip further processing
}
}
}
if (n == 'en-note') {
// End of note
} else if (isNewLineOnlyEndTag(n)) {
@ -522,6 +777,11 @@ function enexXmlToMdArray(stream, resources) {
if (section && section.parent) section = section.parent;
} else if (n == 'table') {
if (section && section.parent) section = section.parent;
} else if (n == "span" || n == "font") {
if (state.inCodeblock == 0 && state.inMonospaceFont) {
state.inMonospaceFont = false;
section.lines.push(MONOSPACE_CLOSE);
}
} else if (isIgnoredEndTag(n)) {
// Skip
} else if (isListTag(n)) {
@ -662,7 +922,7 @@ function drawTable(table) {
const renderCurrentCells = () => {
if (!currentCells.length) return;
const cellText = processMdArrayNewLines(currentCells);
const cellText = processMdArrayNewLines(currentCells, true);
line.push(cellText);
currentCells = [];
}
@ -685,7 +945,7 @@ function drawTable(table) {
// A cell in a Markdown table cannot have actual new lines so replace
// them with <br>, which are supported by the markdown renderers.
let cellText = processMdArrayNewLines(td.lines).replace(/\n+/g, "<br>");
let cellText = processMdArrayNewLines(td.lines, true).replace(/\n+/g, "<br>");
// Inside tables cells, "|" needs to be escaped
cellText = cellText.replace(/\|/g, "\\|");