1
0
mirror of https://github.com/laurent22/joplin.git synced 2024-12-24 10:27:10 +02:00

Merge monospace text lines when importing from Evernote

This commit is contained in:
petrz12 2018-04-10 22:10:33 +02:00
parent d5574098f0
commit 16554b22c7

View File

@ -5,8 +5,235 @@ const BLOCK_CLOSE = "[[BLOCK_CLOSE]]";
const NEWLINE = "[[NEWLINE]]";
const NEWLINE_MERGED = "[[MERGED]]";
const SPACE = "[[SPACE]]";
// For monospace font detection (Courier, Menlo, Moncaco)
const MONOSPACE_OPEN = "[[MONOSPACE_OPEN]]";
const MONOSPACE_CLOSE = "[[MONOSPACE_CLOSE]]";
// Enable debugging
const DEBUG_MONOSPACE_MERGE = false;
function debugMD(text, md) {
if (DEBUG_MONOSPACE_MERGE) {
console.log("< " + text + " START>");
for (let i = 0; i < md.length; i++) {
console.log("%i: \"%s\"", i, md[i]);
}
console.log("< " + text + " STOP>");
}
}
// This function will return a list of all monospace sections with a flag saying whether they can be merged or not
function findMonospaceSections(md, ignoreMonospace = false) {
let temp = [];
let sections = [];
let section = null;
let mergeWithPrevious = true;
let last = "";
for (let i = 0; i < md.length; i++) {
let v = md[i];
if (v == MONOSPACE_OPEN) {
// Remember where monospace section begins, later it will be replaced with appropriate markdown (` or ```)
if (section != null) throw new Error('Monospace open tag detected while the previous was not closed'); // Sanity check, but normally not possible
let monospaceSection = {
openIndex: null,
closeIndex: null,
mergeAllowed: true,
mergeWithPrevious: mergeWithPrevious,
}
section = monospaceSection;
if (!ignoreMonospace) {
section.openIndex = temp.push(v) - 1;
}
// Add an empty string, it can be later replaced with newline if necessary
temp.push("");
if (last != BLOCK_OPEN) {
// We cannot merge inline code
section.mergeAllowed = false;
}
if (DEBUG_MONOSPACE_MERGE) {
console.log("> MONOSPACE_OPEN, openIndex: %o, closeIndex: %o, mergeAllowed: %o, mergeWithPrevious: %o",
section.openIndex, section.closeIndex, section.mergeAllowed, section.mergeWithPrevious);
}
} else if (v == MONOSPACE_CLOSE) {
// Remember where monospace section begins, later it will be replaced with appropriate markdown (` or ```)
if (section == null) throw new Error('Monospace tag was closed without being open before'); // Sanity check, but normally not possible
if (section.closeIndex != null) throw new Error('Monospace tag is closed for the second time'); // Sanity check, but normally not possible
// Add an empty string, it can be later replaced with newline if necessary
temp.push("");
if (!ignoreMonospace) {
section.closeIndex = temp.push(v) - 1;
}
if (md[i+1] != BLOCK_CLOSE) {
// We cannot merge inline code
section.mergeAllowed = false;
}
if (DEBUG_MONOSPACE_MERGE) {
console.log("> \"" + md[i-1] + "\"");
console.log("> MONOSPACE_CLOSE, openIndex: %o, closeIndex: %o, mergeAllowed: %o, mergeWithPrevious: %o",
section.openIndex, section.closeIndex, section.mergeAllowed, section.mergeWithPrevious);
}
sections.push(section);
// Reset
section = null;
mergeWithPrevious = true;
} else {
// We can merge only if monospace sections are separated by newlines
if (v != NEWLINE && v != BLOCK_OPEN && v != BLOCK_CLOSE) {
mergeWithPrevious = false;
}
temp.push(v);
}
last = v;
}
return {
md: temp,
monospaceSections: sections,
};
}
// This function is looping over monospace sections and collapsing what it can merge
function mergeMonospaceSections(md, sections, ignoreMonospace = false) {
const USE_BLOCK_TAG = 1;
const USE_INLINE_TAG = 2;
const USE_EMPTY_TAG = 3;
const toMonospace = (md, section, startTag, endTag, dbg = "") => {
if (DEBUG_MONOSPACE_MERGE) {
console.log("> TO_MONOSPACE, openIndex: %o, closeIndex: %o, startTag: %o, endTag: %o, DBG: %o",
section.openIndex, section.closeIndex, startTag, endTag, dbg);
}
switch (startTag) {
case USE_BLOCK_TAG:
md[section.openIndex] = "```";
md[section.openIndex + 1] = NEWLINE;
break;
case USE_INLINE_TAG:
md[section.openIndex] = "`";
break;
case USE_EMPTY_TAG:
md[section.openIndex] = "";
break;
}
switch (endTag) {
case USE_BLOCK_TAG:
// We don't add a NEWLINE if there already is a NEWLINE
if (md[section.closeIndex - 2] == NEWLINE) {
md[section.closeIndex - 1] = "";
} else {
md[section.closeIndex - 1] = NEWLINE;
}
md[section.closeIndex] = "```";
break;
case USE_INLINE_TAG:
md[section.closeIndex] = "`";
break;
case USE_EMPTY_TAG:
md[section.closeIndex] = "";
break;
}
}
const getSection = () => {
return sections.shift();
}
const getMergeableSection = (first = null) => {
if (first) {
sections.unshift(first);
}
while (sections.length) {
s = sections.shift();
if (s.mergeAllowed) {
return s;
}
// If cannot merge then convert onto inline code
toMonospace(md, s, USE_INLINE_TAG, USE_INLINE_TAG, "getCollapsibleSection");
}
return null;
}
let left = getMergeableSection();
let right = null;
while (left) {
let isFirst = true;
right = getSection();
while (right && right.mergeAllowed && right.mergeWithPrevious) {
// We can merge left and right
if (isFirst) {
isFirst = false;
toMonospace(md, left, USE_BLOCK_TAG, USE_EMPTY_TAG, "First section");
} else {
toMonospace(md, left, USE_EMPTY_TAG, USE_EMPTY_TAG, "Middle section");
}
left = right;
right = getSection();
}
if (isFirst) {
// Could not merge, convert to inline code
toMonospace(md, left, USE_INLINE_TAG, USE_INLINE_TAG, "Left inline section");
} else {
// Was merged, add block end tag
toMonospace(md, left, USE_EMPTY_TAG, USE_BLOCK_TAG, "Final section");
}
left = getMergeableSection(right);
}
}
// This function will try to merge monospace sections
// It works in two phases:
// 1) It will find all monospace sections
// 2) It will merge all monospace sections where merge is allowed
function mergeMonospaceSectionsWrapper(md, ignoreMonospace = false) {
const result = findMonospaceSections(md, ignoreMonospace);
mergeMonospaceSections(result.md, result.monospaceSections, ignoreMonospace);
// Remove empty items, it is necessary for correct function of newline merging happening outside this function
let temp = []
for (let i = 0; i < result.md.length; i++) {
let v = result.md[i];
if (v != "") {
temp.push(v);
}
}
debugMD("DEBUG: after merging monospace sections", temp);
return temp;
}
function processMdArrayNewLines(md, isTable = false) {
// Try to merge MONOSPACE sections, works good when when not parsing a table
md = mergeMonospaceSectionsWrapper(md, isTable);
function processMdArrayNewLines(md) {
while (md.length && md[0] == BLOCK_OPEN) {
md.shift();
}
@ -271,7 +498,11 @@ function attributeToLowerCase(node) {
return output;
}
function enexXmlToMdArray(stream, resources) {
function enexXmlToMdArray(stream, resources, importOptions = null) {
// TODO: Receive importOptions from upstream
if (!importOptions) importOptions = {};
if (!('mergeMonospaceSections' in importOptions)) importOptions.mergeMonospaceSections = true;
let remainingResources = resources.slice();
const removeRemainingResource = (id) => {
@ -287,6 +518,7 @@ function enexXmlToMdArray(stream, resources) {
let state = {
inCode: false,
inQuote: false,
inMonospaceFont: false,
lists: [],
anchorAttributes: [],
};
@ -502,6 +734,26 @@ function enexXmlToMdArray(stream, resources) {
if (resource && !!resource.id) {
section.lines = addResourceTag(section.lines, resource, nodeAttributes.alt);
}
} else if (n == "span" || n == "font") {
// Check for monospace font. It can come from being specified in either from
// <span style="..."> or <font face="...">.
if (importOptions.mergeMonospaceSections && nodeAttributes) {
let style = null;
if (nodeAttributes.style) {
style = nodeAttributes.style.toLowerCase();
} else if (nodeAttributes.face) {
style = nodeAttributes.face.toLowerCase();
}
monospace = style.match(/monospace|courier|menlo|monaco/) != null;
if (monospace) {
state.inMonospaceFont = true;
section.lines.push(MONOSPACE_OPEN);
//console.log("OPEN: tag: %s, style: ", n, style);
}
}
} else if (["span", "font", 'sup', 'cite', 'abbr', 'small', 'tt', 'sub', 'colgroup', 'col', 'ins', 'caption', 'var', 'map', 'area'].indexOf(n) >= 0) {
// Inline tags that can be ignored in Markdown
} else {
@ -522,6 +774,13 @@ function enexXmlToMdArray(stream, resources) {
if (section && section.parent) section = section.parent;
} else if (n == 'table') {
if (section && section.parent) section = section.parent;
} else if (n == "span" || n == "font") {
if (importOptions.mergeMonospaceSections && state.inMonospaceFont) {
state.inMonospaceFont = false;
section.lines.push(MONOSPACE_CLOSE);
//console.log("CLOSE: tag: %s, lines[n-1]: '%s', lines[n]: '%s'", n, section.lines[section.lines.length - 2], section.lines[section.lines.length - 1]);
}
} else if (isIgnoredEndTag(n)) {
// Skip
} else if (isListTag(n)) {
@ -662,7 +921,7 @@ function drawTable(table) {
const renderCurrentCells = () => {
if (!currentCells.length) return;
const cellText = processMdArrayNewLines(currentCells);
const cellText = processMdArrayNewLines(currentCells, true);
line.push(cellText);
currentCells = [];
}
@ -685,7 +944,7 @@ function drawTable(table) {
// A cell in a Markdown table cannot have actual new lines so replace
// them with <br>, which are supported by the markdown renderers.
let cellText = processMdArrayNewLines(td.lines).replace(/\n+/g, "<br>");
let cellText = processMdArrayNewLines(td.lines, true).replace(/\n+/g, "<br>");
// Inside tables cells, "|" needs to be escaped
cellText = cellText.replace(/\|/g, "\\|");
@ -760,6 +1019,9 @@ async function enexXmlToMd(stream, resources) {
firstAttachment = false;
}
//console.log(mdLines);
debugMD("DEBUG: raw MdLines", mdLines);
return processMdArrayNewLines(mdLines);
}