mirror of
https://github.com/laurent22/joplin.git
synced 2025-02-04 19:16:07 +02:00
Merge pull request #409 from petrz12/better-evernote-import
Merge monospace text lines when importing from Evernote
This commit is contained in:
commit
0f0ff86ffa
@ -5,8 +5,217 @@ const BLOCK_CLOSE = "[[BLOCK_CLOSE]]";
|
||||
const NEWLINE = "[[NEWLINE]]";
|
||||
const NEWLINE_MERGED = "[[MERGED]]";
|
||||
const SPACE = "[[SPACE]]";
|
||||
// For monospace font detection (Courier, Menlo, Moncaco)
|
||||
const MONOSPACE_OPEN = "[[MONOSPACE_OPEN]]";
|
||||
const MONOSPACE_CLOSE = "[[MONOSPACE_CLOSE]]";
|
||||
|
||||
// This function will return a list of all monospace sections with a flag saying whether they can be merged or not
|
||||
function findMonospaceSections(md) {
|
||||
let temp = [];
|
||||
|
||||
let sections = [];
|
||||
let section = null;
|
||||
// This variable is used twice: to detected if a newline is between monospace sections and if a newline is inside monospace section
|
||||
let mergeWithPrevious = true;
|
||||
|
||||
let last = "";
|
||||
for (let i = 0; i < md.length; i++) {
|
||||
let v = md[i];
|
||||
|
||||
if (v == MONOSPACE_OPEN) {
|
||||
if (section != null) throw new Error('Monospace open tag detected while the previous was not closed'); // Sanity check, but normally not possible
|
||||
|
||||
let monospaceSection = {
|
||||
openIndex: null,
|
||||
closeIndex: null,
|
||||
mergeAllowed: true,
|
||||
mergeWithPrevious: mergeWithPrevious,
|
||||
isEmptyLine: false,
|
||||
}
|
||||
section = monospaceSection;
|
||||
|
||||
// Remember where monospace section begins, later it will be replaced with appropriate markdown (` or ```)
|
||||
section.openIndex = temp.push(v) - 1;
|
||||
// Add an empty string, it can be later replaced with newline if necessary
|
||||
temp.push("");
|
||||
|
||||
if (last != BLOCK_OPEN) {
|
||||
// We cannot merge inline code
|
||||
section.mergeAllowed = false;
|
||||
}
|
||||
|
||||
// Reset to detect if monospace section contains a newline
|
||||
mergeWithPrevious = true;
|
||||
|
||||
} else if (v == MONOSPACE_CLOSE) {
|
||||
if (section == null) throw new Error('Monospace tag was closed without being open before'); // Sanity check, but normally not possible
|
||||
if (section.closeIndex != null) throw new Error('Monospace tag is closed for the second time'); // Sanity check, but normally not possible
|
||||
|
||||
// Add an empty string, it can be later replaced with newline if necessary
|
||||
temp.push("");
|
||||
// Remember where monospace section ends, later it will be replaced with appropriate markdown (` or ```)
|
||||
section.closeIndex = temp.push(v) - 1;
|
||||
|
||||
if (md[i+1] != BLOCK_CLOSE) {
|
||||
// We cannot merge inline code
|
||||
section.mergeAllowed = false;
|
||||
}
|
||||
|
||||
section.isEmptyLine = mergeWithPrevious;
|
||||
sections.push(section);
|
||||
|
||||
// Reset
|
||||
section = null;
|
||||
mergeWithPrevious = true;
|
||||
|
||||
} else {
|
||||
// We can merge only if monospace sections are separated by newlines
|
||||
if (v != NEWLINE && v != BLOCK_OPEN && v != BLOCK_CLOSE) {
|
||||
mergeWithPrevious = false;
|
||||
}
|
||||
temp.push(v);
|
||||
}
|
||||
last = v;
|
||||
}
|
||||
|
||||
return {
|
||||
md: temp,
|
||||
monospaceSections: sections,
|
||||
};
|
||||
}
|
||||
|
||||
// This function is looping over monospace sections and merging what it can merge
|
||||
function mergeMonospaceSections(md, sections) {
|
||||
|
||||
const USE_BLOCK_TAG = 1;
|
||||
const USE_INLINE_TAG = 2;
|
||||
const USE_EMPTY_TAG = 3;
|
||||
|
||||
const toMonospace = (md, section, startTag, endTag) => {
|
||||
|
||||
// It looks better when empty lines are not inlined
|
||||
if (startTag == USE_INLINE_TAG && section.isEmptyLine) {
|
||||
startTag = USE_EMPTY_TAG;
|
||||
endTag = USE_EMPTY_TAG;
|
||||
}
|
||||
|
||||
switch (startTag) {
|
||||
case USE_BLOCK_TAG:
|
||||
md[section.openIndex] = "```";
|
||||
md[section.openIndex + 1] = NEWLINE;
|
||||
break;
|
||||
case USE_INLINE_TAG:
|
||||
md[section.openIndex] = "`";
|
||||
break;
|
||||
case USE_EMPTY_TAG:
|
||||
md[section.openIndex] = "";
|
||||
break;
|
||||
}
|
||||
switch (endTag) {
|
||||
case USE_BLOCK_TAG:
|
||||
// We don't add a NEWLINE if there already is a NEWLINE
|
||||
if (md[section.closeIndex - 2] == NEWLINE) {
|
||||
md[section.closeIndex - 1] = "";
|
||||
} else {
|
||||
md[section.closeIndex - 1] = NEWLINE;
|
||||
}
|
||||
md[section.closeIndex] = "```";
|
||||
break;
|
||||
case USE_INLINE_TAG:
|
||||
md[section.closeIndex] = "`";
|
||||
break;
|
||||
case USE_EMPTY_TAG:
|
||||
md[section.closeIndex] = "";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const getSection = () => {
|
||||
return sections.shift();
|
||||
}
|
||||
|
||||
const getMergeableSection = (first = null) => {
|
||||
if (first) {
|
||||
sections.unshift(first);
|
||||
}
|
||||
while (sections.length) {
|
||||
s = sections.shift();
|
||||
if (s.mergeAllowed) {
|
||||
return s;
|
||||
}
|
||||
// If cannot merge then convert into inline code
|
||||
toMonospace(md, s, USE_INLINE_TAG, USE_INLINE_TAG);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
let left = getMergeableSection();
|
||||
let right = null;
|
||||
|
||||
while (left) {
|
||||
let isFirst = true;
|
||||
|
||||
right = getSection();
|
||||
while (right && right.mergeAllowed && right.mergeWithPrevious) {
|
||||
// We can merge left and right
|
||||
if (isFirst) {
|
||||
isFirst = false;
|
||||
// First section
|
||||
toMonospace(md, left, USE_BLOCK_TAG, USE_EMPTY_TAG);
|
||||
} else {
|
||||
// Middle section
|
||||
toMonospace(md, left, USE_EMPTY_TAG, USE_EMPTY_TAG);
|
||||
}
|
||||
left = right;
|
||||
right = getSection();
|
||||
}
|
||||
|
||||
if (isFirst) {
|
||||
// Could not merge, convert to inline code
|
||||
toMonospace(md, left, USE_INLINE_TAG, USE_INLINE_TAG);
|
||||
} else {
|
||||
// Was merged, add block end tag
|
||||
toMonospace(md, left, USE_EMPTY_TAG, USE_BLOCK_TAG);
|
||||
}
|
||||
|
||||
left = getMergeableSection(right);
|
||||
}
|
||||
}
|
||||
|
||||
// This function will try to merge monospace sections
|
||||
// It works in two phases:
|
||||
// 1) It will find all monospace sections
|
||||
// 2) It will merge all monospace sections where merge is allowed
|
||||
function mergeMonospaceSectionsWrapper(md, ignoreMonospace = false) {
|
||||
|
||||
if (!ignoreMonospace) {
|
||||
const result = findMonospaceSections(md);
|
||||
|
||||
if (result.monospaceSections.length > 0) {
|
||||
mergeMonospaceSections(result.md, result.monospaceSections);
|
||||
}
|
||||
md = result.md;
|
||||
}
|
||||
|
||||
// Remove empty items, it is necessary for correct function of newline merging happening outside this function
|
||||
let temp = []
|
||||
for (let i = 0; i < md.length; i++) {
|
||||
let v = md[i];
|
||||
if (ignoreMonospace && (v == MONOSPACE_OPEN || v == MONOSPACE_CLOSE)) {
|
||||
continue; // skip
|
||||
}
|
||||
if (v != "") {
|
||||
temp.push(v);
|
||||
}
|
||||
}
|
||||
|
||||
return temp;
|
||||
}
|
||||
|
||||
function processMdArrayNewLines(md, isTable = false) {
|
||||
// Try to merge MONOSPACE sections, works good when when not parsing a table
|
||||
md = mergeMonospaceSectionsWrapper(md, isTable);
|
||||
|
||||
function processMdArrayNewLines(md) {
|
||||
while (md.length && md[0] == BLOCK_OPEN) {
|
||||
md.shift();
|
||||
}
|
||||
@ -287,6 +496,8 @@ function enexXmlToMdArray(stream, resources) {
|
||||
let state = {
|
||||
inCode: false,
|
||||
inQuote: false,
|
||||
inMonospaceFont: false,
|
||||
inCodeblock: 0,
|
||||
lists: [],
|
||||
anchorAttributes: [],
|
||||
};
|
||||
@ -315,6 +526,19 @@ function enexXmlToMdArray(stream, resources) {
|
||||
const nodeAttributes = attributeToLowerCase(node);
|
||||
|
||||
let n = node.name.toLowerCase();
|
||||
|
||||
if (n == "div") {
|
||||
// div tags are recursive, in order to find the end we have to count the depth
|
||||
if (state.inCodeblock > 0) {
|
||||
state.inCodeblock++;
|
||||
} else if (nodeAttributes && nodeAttributes.style && nodeAttributes.style.indexOf("box-sizing: border-box") >= 0) {
|
||||
// Evernote code block start
|
||||
state.inCodeblock = 1;
|
||||
section.lines.push("```");
|
||||
return; // skip further processing
|
||||
}
|
||||
}
|
||||
|
||||
if (n == 'en-note') {
|
||||
// Start of note
|
||||
} else if (isBlockTag(n)) {
|
||||
@ -502,6 +726,26 @@ function enexXmlToMdArray(stream, resources) {
|
||||
if (resource && !!resource.id) {
|
||||
section.lines = addResourceTag(section.lines, resource, nodeAttributes.alt);
|
||||
}
|
||||
} else if (n == "span" || n == "font") {
|
||||
// Check for monospace font. It can come from being specified in either from
|
||||
// <span style="..."> or <font face="...">.
|
||||
// Monospace sections are already in monospace for Evernote code blocks
|
||||
if (state.inCodeblock == 0 && nodeAttributes) {
|
||||
let style = null;
|
||||
|
||||
if (nodeAttributes.style) {
|
||||
style = nodeAttributes.style.toLowerCase();
|
||||
} else if (nodeAttributes.face) {
|
||||
style = nodeAttributes.face.toLowerCase();
|
||||
}
|
||||
|
||||
monospace = style.match(/monospace|courier|menlo|monaco/) != null;
|
||||
|
||||
if (monospace) {
|
||||
state.inMonospaceFont = true;
|
||||
section.lines.push(MONOSPACE_OPEN);
|
||||
}
|
||||
}
|
||||
} else if (["span", "font", 'sup', 'cite', 'abbr', 'small', 'tt', 'sub', 'colgroup', 'col', 'ins', 'caption', 'var', 'map', 'area'].indexOf(n) >= 0) {
|
||||
// Inline tags that can be ignored in Markdown
|
||||
} else {
|
||||
@ -512,6 +756,17 @@ function enexXmlToMdArray(stream, resources) {
|
||||
saxStream.on('closetag', function(n) {
|
||||
n = n ? n.toLowerCase() : n;
|
||||
|
||||
if (n == "div") {
|
||||
if (state.inCodeblock >= 1) {
|
||||
state.inCodeblock--;
|
||||
if (state.inCodeblock == 0) {
|
||||
// Evernote code block end
|
||||
section.lines.push("```");
|
||||
return; // skip further processing
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (n == 'en-note') {
|
||||
// End of note
|
||||
} else if (isNewLineOnlyEndTag(n)) {
|
||||
@ -522,6 +777,11 @@ function enexXmlToMdArray(stream, resources) {
|
||||
if (section && section.parent) section = section.parent;
|
||||
} else if (n == 'table') {
|
||||
if (section && section.parent) section = section.parent;
|
||||
} else if (n == "span" || n == "font") {
|
||||
if (state.inCodeblock == 0 && state.inMonospaceFont) {
|
||||
state.inMonospaceFont = false;
|
||||
section.lines.push(MONOSPACE_CLOSE);
|
||||
}
|
||||
} else if (isIgnoredEndTag(n)) {
|
||||
// Skip
|
||||
} else if (isListTag(n)) {
|
||||
@ -662,7 +922,7 @@ function drawTable(table) {
|
||||
|
||||
const renderCurrentCells = () => {
|
||||
if (!currentCells.length) return;
|
||||
const cellText = processMdArrayNewLines(currentCells);
|
||||
const cellText = processMdArrayNewLines(currentCells, true);
|
||||
line.push(cellText);
|
||||
currentCells = [];
|
||||
}
|
||||
@ -685,7 +945,7 @@ function drawTable(table) {
|
||||
|
||||
// A cell in a Markdown table cannot have actual new lines so replace
|
||||
// them with <br>, which are supported by the markdown renderers.
|
||||
let cellText = processMdArrayNewLines(td.lines).replace(/\n+/g, "<br>");
|
||||
let cellText = processMdArrayNewLines(td.lines, true).replace(/\n+/g, "<br>");
|
||||
|
||||
// Inside tables cells, "|" needs to be escaped
|
||||
cellText = cellText.replace(/\|/g, "\\|");
|
||||
|
Loading…
x
Reference in New Issue
Block a user