diff --git a/ReactNativeClient/lib/import-enex-md-gen.js b/ReactNativeClient/lib/import-enex-md-gen.js index 1e217b733..a272dab5e 100644 --- a/ReactNativeClient/lib/import-enex-md-gen.js +++ b/ReactNativeClient/lib/import-enex-md-gen.js @@ -5,8 +5,235 @@ const BLOCK_CLOSE = "[[BLOCK_CLOSE]]"; const NEWLINE = "[[NEWLINE]]"; const NEWLINE_MERGED = "[[MERGED]]"; const SPACE = "[[SPACE]]"; +// For monospace font detection (Courier, Menlo, Moncaco) +const MONOSPACE_OPEN = "[[MONOSPACE_OPEN]]"; +const MONOSPACE_CLOSE = "[[MONOSPACE_CLOSE]]"; + +// Enable debugging +const DEBUG_MONOSPACE_MERGE = false; + + +function debugMD(text, md) { + if (DEBUG_MONOSPACE_MERGE) { + console.log("< " + text + " START>"); + for (let i = 0; i < md.length; i++) { + console.log("%i: \"%s\"", i, md[i]); + } + console.log("< " + text + " STOP>"); + } +} + + +// This function will return a list of all monospace sections with a flag saying whether they can be merged or not +function findMonospaceSections(md, ignoreMonospace = false) { + let temp = []; + + let sections = []; + let section = null; + let mergeWithPrevious = true; + + let last = ""; + for (let i = 0; i < md.length; i++) { + let v = md[i]; + + if (v == MONOSPACE_OPEN) { + // Remember where monospace section begins, later it will be replaced with appropriate markdown (` or ```) + + if (section != null) throw new Error('Monospace open tag detected while the previous was not closed'); // Sanity check, but normally not possible + + let monospaceSection = { + openIndex: null, + closeIndex: null, + mergeAllowed: true, + mergeWithPrevious: mergeWithPrevious, + } + section = monospaceSection; + + if (!ignoreMonospace) { + section.openIndex = temp.push(v) - 1; + } + // Add an empty string, it can be later replaced with newline if necessary + temp.push(""); + + if (last != BLOCK_OPEN) { + // We cannot merge inline code + section.mergeAllowed = false; + } + + if (DEBUG_MONOSPACE_MERGE) { + console.log("> MONOSPACE_OPEN, openIndex: %o, closeIndex: %o, mergeAllowed: %o, mergeWithPrevious: %o", + section.openIndex, section.closeIndex, section.mergeAllowed, section.mergeWithPrevious); + } + + } else if (v == MONOSPACE_CLOSE) { + // Remember where monospace section begins, later it will be replaced with appropriate markdown (` or ```) + + if (section == null) throw new Error('Monospace tag was closed without being open before'); // Sanity check, but normally not possible + if (section.closeIndex != null) throw new Error('Monospace tag is closed for the second time'); // Sanity check, but normally not possible + + // Add an empty string, it can be later replaced with newline if necessary + temp.push(""); + if (!ignoreMonospace) { + section.closeIndex = temp.push(v) - 1; + } + + if (md[i+1] != BLOCK_CLOSE) { + // We cannot merge inline code + section.mergeAllowed = false; + } + + if (DEBUG_MONOSPACE_MERGE) { + console.log("> \"" + md[i-1] + "\""); + console.log("> MONOSPACE_CLOSE, openIndex: %o, closeIndex: %o, mergeAllowed: %o, mergeWithPrevious: %o", + section.openIndex, section.closeIndex, section.mergeAllowed, section.mergeWithPrevious); + } + + sections.push(section); + + // Reset + section = null; + mergeWithPrevious = true; + + } else { + // We can merge only if monospace sections are separated by newlines + if (v != NEWLINE && v != BLOCK_OPEN && v != BLOCK_CLOSE) { + mergeWithPrevious = false; + } + temp.push(v); + } + last = v; + } + + return { + md: temp, + monospaceSections: sections, + }; +} + + +// This function is looping over monospace sections and collapsing what it can merge +function mergeMonospaceSections(md, sections, ignoreMonospace = false) { + + const USE_BLOCK_TAG = 1; + const USE_INLINE_TAG = 2; + const USE_EMPTY_TAG = 3; + + const toMonospace = (md, section, startTag, endTag, dbg = "") => { + if (DEBUG_MONOSPACE_MERGE) { + console.log("> TO_MONOSPACE, openIndex: %o, closeIndex: %o, startTag: %o, endTag: %o, DBG: %o", + section.openIndex, section.closeIndex, startTag, endTag, dbg); + } + switch (startTag) { + case USE_BLOCK_TAG: + md[section.openIndex] = "```"; + md[section.openIndex + 1] = NEWLINE; + break; + case USE_INLINE_TAG: + md[section.openIndex] = "`"; + break; + case USE_EMPTY_TAG: + md[section.openIndex] = ""; + break; + } + switch (endTag) { + case USE_BLOCK_TAG: + // We don't add a NEWLINE if there already is a NEWLINE + if (md[section.closeIndex - 2] == NEWLINE) { + md[section.closeIndex - 1] = ""; + } else { + md[section.closeIndex - 1] = NEWLINE; + } + md[section.closeIndex] = "```"; + break; + case USE_INLINE_TAG: + md[section.closeIndex] = "`"; + break; + case USE_EMPTY_TAG: + md[section.closeIndex] = ""; + break; + } + } + + const getSection = () => { + return sections.shift(); + } + + const getMergeableSection = (first = null) => { + if (first) { + sections.unshift(first); + } + while (sections.length) { + s = sections.shift(); + if (s.mergeAllowed) { + return s; + } + // If cannot merge then convert onto inline code + toMonospace(md, s, USE_INLINE_TAG, USE_INLINE_TAG, "getCollapsibleSection"); + } + return null; + } + + let left = getMergeableSection(); + let right = null; + + while (left) { + let isFirst = true; + + right = getSection(); + while (right && right.mergeAllowed && right.mergeWithPrevious) { + // We can merge left and right + if (isFirst) { + isFirst = false; + toMonospace(md, left, USE_BLOCK_TAG, USE_EMPTY_TAG, "First section"); + } else { + toMonospace(md, left, USE_EMPTY_TAG, USE_EMPTY_TAG, "Middle section"); + } + left = right; + right = getSection(); + } + + if (isFirst) { + // Could not merge, convert to inline code + toMonospace(md, left, USE_INLINE_TAG, USE_INLINE_TAG, "Left inline section"); + } else { + // Was merged, add block end tag + toMonospace(md, left, USE_EMPTY_TAG, USE_BLOCK_TAG, "Final section"); + } + + left = getMergeableSection(right); + } +} + + +// This function will try to merge monospace sections +// It works in two phases: +// 1) It will find all monospace sections +// 2) It will merge all monospace sections where merge is allowed +function mergeMonospaceSectionsWrapper(md, ignoreMonospace = false) { + + const result = findMonospaceSections(md, ignoreMonospace); + + mergeMonospaceSections(result.md, result.monospaceSections, ignoreMonospace); + + // Remove empty items, it is necessary for correct function of newline merging happening outside this function + let temp = [] + for (let i = 0; i < result.md.length; i++) { + let v = result.md[i]; + if (v != "") { + temp.push(v); + } + } + + debugMD("DEBUG: after merging monospace sections", temp); + + return temp; +} + + +function processMdArrayNewLines(md, isTable = false) { + // Try to merge MONOSPACE sections, works good when when not parsing a table + md = mergeMonospaceSectionsWrapper(md, isTable); -function processMdArrayNewLines(md) { while (md.length && md[0] == BLOCK_OPEN) { md.shift(); } @@ -271,7 +498,11 @@ function attributeToLowerCase(node) { return output; } -function enexXmlToMdArray(stream, resources) { +function enexXmlToMdArray(stream, resources, importOptions = null) { + // TODO: Receive importOptions from upstream + if (!importOptions) importOptions = {}; + if (!('mergeMonospaceSections' in importOptions)) importOptions.mergeMonospaceSections = true; + let remainingResources = resources.slice(); const removeRemainingResource = (id) => { @@ -287,6 +518,7 @@ function enexXmlToMdArray(stream, resources) { let state = { inCode: false, inQuote: false, + inMonospaceFont: false, lists: [], anchorAttributes: [], }; @@ -502,6 +734,26 @@ function enexXmlToMdArray(stream, resources) { if (resource && !!resource.id) { section.lines = addResourceTag(section.lines, resource, nodeAttributes.alt); } + } else if (n == "span" || n == "font") { + // Check for monospace font. It can come from being specified in either from + // or . + if (importOptions.mergeMonospaceSections && nodeAttributes) { + let style = null; + + if (nodeAttributes.style) { + style = nodeAttributes.style.toLowerCase(); + } else if (nodeAttributes.face) { + style = nodeAttributes.face.toLowerCase(); + } + + monospace = style.match(/monospace|courier|menlo|monaco/) != null; + + if (monospace) { + state.inMonospaceFont = true; + section.lines.push(MONOSPACE_OPEN); + //console.log("OPEN: tag: %s, style: ", n, style); + } + } } else if (["span", "font", 'sup', 'cite', 'abbr', 'small', 'tt', 'sub', 'colgroup', 'col', 'ins', 'caption', 'var', 'map', 'area'].indexOf(n) >= 0) { // Inline tags that can be ignored in Markdown } else { @@ -522,6 +774,13 @@ function enexXmlToMdArray(stream, resources) { if (section && section.parent) section = section.parent; } else if (n == 'table') { if (section && section.parent) section = section.parent; + + } else if (n == "span" || n == "font") { + if (importOptions.mergeMonospaceSections && state.inMonospaceFont) { + state.inMonospaceFont = false; + section.lines.push(MONOSPACE_CLOSE); + //console.log("CLOSE: tag: %s, lines[n-1]: '%s', lines[n]: '%s'", n, section.lines[section.lines.length - 2], section.lines[section.lines.length - 1]); + } } else if (isIgnoredEndTag(n)) { // Skip } else if (isListTag(n)) { @@ -662,7 +921,7 @@ function drawTable(table) { const renderCurrentCells = () => { if (!currentCells.length) return; - const cellText = processMdArrayNewLines(currentCells); + const cellText = processMdArrayNewLines(currentCells, true); line.push(cellText); currentCells = []; } @@ -685,7 +944,7 @@ function drawTable(table) { // A cell in a Markdown table cannot have actual new lines so replace // them with
, which are supported by the markdown renderers. - let cellText = processMdArrayNewLines(td.lines).replace(/\n+/g, "
"); + let cellText = processMdArrayNewLines(td.lines, true).replace(/\n+/g, "
"); // Inside tables cells, "|" needs to be escaped cellText = cellText.replace(/\|/g, "\\|"); @@ -760,6 +1019,9 @@ async function enexXmlToMd(stream, resources) { firstAttachment = false; } + //console.log(mdLines); + debugMD("DEBUG: raw MdLines", mdLines); + return processMdArrayNewLines(mdLines); }