diff --git a/ReactNativeClient/lib/import-enex-md-gen.js b/ReactNativeClient/lib/import-enex-md-gen.js index a7b3b8ae4a..1e217b7334 100644 --- a/ReactNativeClient/lib/import-enex-md-gen.js +++ b/ReactNativeClient/lib/import-enex-md-gen.js @@ -1,224 +1,12 @@ const stringPadding = require('string-padding'); -const stringToStream = require('string-to-stream') const BLOCK_OPEN = "[[BLOCK_OPEN]]"; const BLOCK_CLOSE = "[[BLOCK_CLOSE]]"; const NEWLINE = "[[NEWLINE]]"; const NEWLINE_MERGED = "[[MERGED]]"; const SPACE = "[[SPACE]]"; -// For monospace font detection (Courier, Menlo, Moncaco) -const MONOSPACE_OPEN = "[[MONOSPACE_OPEN]]"; -const MONOSPACE_CLOSE = "[[MONOSPACE_CLOSE]]"; - -// This function will return a list of all monospace sections with a flag saying whether they can be merged or not -function findMonospaceSections(md) { - let temp = []; - - let sections = []; - let section = null; - // This variable is used twice: to detected if a newline is between monospace sections and if a newline is inside monospace section - let mergeWithPrevious = true; - - let last = ""; - for (let i = 0; i < md.length; i++) { - let v = md[i]; - - if (v == MONOSPACE_OPEN) { - if (section != null) throw new Error('Monospace open tag detected while the previous was not closed'); // Sanity check, but normally not possible - - let monospaceSection = { - openIndex: null, - closeIndex: null, - mergeAllowed: true, - mergeWithPrevious: mergeWithPrevious, - isEmptyLine: false, - } - section = monospaceSection; - - // Remember where monospace section begins, later it will be replaced with appropriate markdown (` or ```) - section.openIndex = temp.push(v) - 1; - // Add an empty string, it can be later replaced with newline if necessary - temp.push(""); - - if (last != BLOCK_OPEN) { - // We cannot merge inline code - section.mergeAllowed = false; - } - - // Reset to detect if monospace section contains a newline - mergeWithPrevious = true; - - } else if (v == MONOSPACE_CLOSE) { - if (section == null) throw new Error('Monospace tag was closed without being open before'); // Sanity check, but normally not possible - if (section.closeIndex != null) throw new Error('Monospace tag is closed for the second time'); // Sanity check, but normally not possible - - // Add an empty string, it can be later replaced with newline if necessary - temp.push(""); - // Remember where monospace section ends, later it will be replaced with appropriate markdown (` or ```) - section.closeIndex = temp.push(v) - 1; - - if (md[i+1] != BLOCK_CLOSE) { - // We cannot merge inline code - section.mergeAllowed = false; - } - - section.isEmptyLine = mergeWithPrevious; - sections.push(section); - - // Reset - section = null; - mergeWithPrevious = true; - - } else { - // We can merge only if monospace sections are separated by newlines - if (v != NEWLINE && v != BLOCK_OPEN && v != BLOCK_CLOSE) { - mergeWithPrevious = false; - } - temp.push(v); - } - last = v; - } - - return { - md: temp, - monospaceSections: sections, - }; -} - -// This function is looping over monospace sections and merging what it can merge -function mergeMonospaceSections(md, sections) { - - const USE_BLOCK_TAG = 1; - const USE_INLINE_TAG = 2; - const USE_EMPTY_TAG = 3; - - const toMonospace = (md, section, startTag, endTag) => { - - // It looks better when empty lines are not inlined - if (startTag == USE_INLINE_TAG && section.isEmptyLine) { - startTag = USE_EMPTY_TAG; - endTag = USE_EMPTY_TAG; - } - - switch (startTag) { - case USE_BLOCK_TAG: - md[section.openIndex] = "```"; - md[section.openIndex + 1] = NEWLINE; - break; - case USE_INLINE_TAG: - md[section.openIndex] = "`"; - break; - case USE_EMPTY_TAG: - md[section.openIndex] = ""; - break; - } - switch (endTag) { - case USE_BLOCK_TAG: - // We don't add a NEWLINE if there already is a NEWLINE - if (md[section.closeIndex - 2] == NEWLINE) { - md[section.closeIndex - 1] = ""; - } else { - md[section.closeIndex - 1] = NEWLINE; - } - md[section.closeIndex] = "```"; - break; - case USE_INLINE_TAG: - md[section.closeIndex] = "`"; - break; - case USE_EMPTY_TAG: - md[section.closeIndex] = ""; - break; - } - } - - const getSection = () => { - return sections.shift(); - } - - const getMergeableSection = (first = null) => { - if (first) { - sections.unshift(first); - } - while (sections.length) { - s = sections.shift(); - if (s.mergeAllowed) { - return s; - } - // If cannot merge then convert into inline code - toMonospace(md, s, USE_INLINE_TAG, USE_INLINE_TAG); - } - return null; - } - - let left = getMergeableSection(); - let right = null; - - while (left) { - let isFirst = true; - - right = getSection(); - while (right && right.mergeAllowed && right.mergeWithPrevious) { - // We can merge left and right - if (isFirst) { - isFirst = false; - // First section - toMonospace(md, left, USE_BLOCK_TAG, USE_EMPTY_TAG); - } else { - // Middle section - toMonospace(md, left, USE_EMPTY_TAG, USE_EMPTY_TAG); - } - left = right; - right = getSection(); - } - - if (isFirst) { - // Could not merge, convert to inline code - toMonospace(md, left, USE_INLINE_TAG, USE_INLINE_TAG); - } else { - // Was merged, add block end tag - toMonospace(md, left, USE_EMPTY_TAG, USE_BLOCK_TAG); - } - - left = getMergeableSection(right); - } -} - -// This function will try to merge monospace sections -// It works in two phases: -// 1) It will find all monospace sections -// 2) It will merge all monospace sections where merge is allowed -function mergeMonospaceSectionsWrapper(md, ignoreMonospace = false) { - - if (!ignoreMonospace) { - const result = findMonospaceSections(md); - - if (result.monospaceSections.length > 0) { - mergeMonospaceSections(result.md, result.monospaceSections); - } - md = result.md; - } - - // Remove empty items, it is necessary for correct function of newline merging happening outside this function - let temp = [] - for (let i = 0; i < md.length; i++) { - let v = md[i]; - if (ignoreMonospace && (v == MONOSPACE_OPEN || v == MONOSPACE_CLOSE)) { - continue; // skip - } - if (v != "") { - temp.push(v); - } - } - - return temp; -} - -function processMdArrayNewLines(md, isTable = false) { - // console.info(md); - - // Try to merge MONOSPACE sections, works good when when not parsing a table - // md = mergeMonospaceSectionsWrapper(md, isTable); +function processMdArrayNewLines(md) { while (md.length && md[0] == BLOCK_OPEN) { md.shift(); } @@ -292,8 +80,6 @@ function processMdArrayNewLines(md, isTable = false) { } } - // console.info(md); - let output = ''; let previous = ''; let start = true; @@ -317,195 +103,9 @@ function processMdArrayNewLines(md, isTable = false) { if (!output.trim().length) return ''; - // To simplify the result, we only allow up to one empty line between blocks of text - const mergeMultipleNewLines = function(lines) { - let output = []; - let newlineCount = 0; - for (let i = 0; i < lines.length; i++) { - const line = lines[i]; - if (!line.trim()) { - newlineCount++; - } else { - newlineCount = 0; - } - - if (newlineCount >= 2) continue; - - output.push(line); - } - return output; - } - - let lines = output.replace(/\\r/g, '').split('\n'); - - // console.info(lines); - - lines = formatMdLayout(lines) - // lines = convertSingleLineCodeBlocksToInline(lines) - lines = mergeMultipleNewLines(lines); - return lines.join('\n'); + return output; } -// While the processMdArrayNewLines() function adds newlines in a way that's technically correct, the resulting Markdown can look messy. -// This is because while a "block" element should be surrounded by newlines, in practice, some should be surrounded by TWO new lines, while -// others by only ONE. -// -// For instance, this: -// -//
  • one
  • -//
  • two
  • -//
  • three
  • -// -// should result in this: -// -// - one -// - two -// - three -// -// While this: -// -//

    Some long paragraph

    And another one

    And the last paragraph

    -// -// should result in this: -// -// Some long paragraph -// -// And another one -// -// And the last paragraph -// -// So in one case, one newline between tags, and in another two newlines. In HTML this would be done via CSS, but in Markdown we need -// to add new lines. It's also important to get these newlines right because two blocks of text next to each others might be renderered -// differently than if there's a newlines between them. So the function below parses the almost final MD and add new lines depending -// on various rules. - - const isHeading = function(line) { - return !!line.match(/^#+\s/); - } - - const isListItem = function(line) { - return line && line.trim().indexOf('- ') === 0; - } - - const isCodeLine = function(line) { - return line && line.indexOf('\t') === 0; - } - - const isTableLine = function(line) { - return line.indexOf('| ') === 0; - } - - const isPlainParagraph = function(line) { - // Note: if a line is no longer than 80 characters, we don't consider it's a paragraph, which - // means no newlines will be added before or after. This is to handle text that has been - // written with "hard" new lines. - if (!line || line.length < 80) return false; - - if (isListItem(line)) return false; - if (isHeading(line)) return false; - if (isCodeLine(line)) return false; - if (isTableLine(line)) return false; - - return true; - } - -function formatMdLayout(lines) { - let previous = ''; - let newLines = []; - for (let i = 0; i < lines.length; i++) { - const line = lines[i]; - - // Add a new line at the end of a list of items - if (isListItem(previous) && line && !isListItem(line)) { - newLines.push(''); - - // Add a new line at the beginning of a list of items - } else if (isListItem(line) && previous && !isListItem(previous)) { - newLines.push(''); - - // Add a new line before a heading - } else if (isHeading(line) && previous) { - newLines.push(''); - - // Add a new line after a heading - } else if (isHeading(previous) && line) { - newLines.push(''); - - } else if (isCodeLine(line) && !isCodeLine(previous)) { - newLines.push(''); - - } else if (!isCodeLine(line) && isCodeLine(previous)) { - newLines.push(''); - - } else if (isTableLine(line) && !isTableLine(previous)) { - newLines.push(''); - - } else if (!isTableLine(line) && isTableLine(previous)) { - newLines.push(''); - - // Add a new line at beginning of paragraph - } else if (isPlainParagraph(line) && previous) { - newLines.push(''); - - // Add a new line at end of paragraph - } else if (isPlainParagraph(previous) && line) { - newLines.push(''); - } - - newLines.push(line); - previous = newLines[newLines.length - 1]; - } - - return newLines; -} - -function lineStartsWithDelimiter(line) { - if (!line || !line.length) return false; - return ' ,.;:)]}'.indexOf(line[0]) >= 0; -} - -// function convertSingleLineCodeBlocksToInline(lines) { -// let newLines = []; -// let currentCodeLines = []; -// let codeLineCount = 0; - - -// const processCurrentCodeLines = (line) => { -// if (codeLineCount === 1) { -// const inlineCode = currentCodeLines.join('').trim(); -// newLines[newLines.length - 1] += '`' + inlineCode + '`'; -// if (line) newLines[newLines.length - 1] += (lineStartsWithDelimiter(line) ? '' : ' ') + line; -// } else { -// newLines = newLines.concat(currentCodeLines); -// newLines.push(line); -// } - -// currentCodeLines = []; -// codeLineCount = 0; -// } - -// for (let i = 0; i < lines.length; i++) { -// const line = lines[i]; - -// if (isCodeLine(line)) { -// currentCodeLines.push(line); -// codeLineCount++; -// } else if (!line.trim()) { -// currentCodeLines.push(line); -// } else { -// if (currentCodeLines.length) { -// processCurrentCodeLines(line); -// } else { -// newLines.push(line); -// } -// } -// } - -// if (currentCodeLines.length) processCurrentCodeLines(''); - -// return newLines; -// } - function isWhiteSpace(c) { return c == '\n' || c == '\r' || c == '\v' || c == '\f' || c == '\t' || c == ' '; } @@ -533,46 +133,14 @@ function simplifyString(s) { } function collapseWhiteSpaceAndAppend(lines, state, text) { - // console.info([text]); - - if (state.inCode.length) { + if (state.inCode) { + text = "\t" + text; lines.push(text); - - // state.currentCode += text; - - - // let previous = lines.length ? lines[lines.length - 1] : ''; - - // // If the preceding item is a block limit, then the current line should start with a TAB - // if ([BLOCK_OPEN, BLOCK_CLOSE, NEWLINE, NEWLINE_MERGED, MONOSPACE_OPEN, MONOSPACE_CLOSE].indexOf(previous) >= 0 || !previous) { - // //text = "\t" + text; - // lines.push('\t'); - // lines.push(text); - // } else { - // // If the current text contains one or more \n, then the last one should be immediately followed by a TAB - // const idx = text.lastIndexOf('\n'); - // if (idx >= 0) { - // text = text.substr(0, idx+1) + '\t' + text.substr(idx+1); - // } - - // lines.push(text); - // } } else { - - // console.info(lines); - - if (!!text.match(/^\n+$/)) { - lines.push(' '); - return lines; - } - // Remove all \n and \r from the left and right of the text while (text.length && (text[0] == "\n" || text[0] == "\r")) text = text.substr(1); while (text.length && (text[text.length - 1] == "\n" || text[text.length - 1] == "\r")) text = text.substr(0, text.length - 1); - // Replace the inner \n with a space - text = text.replace(/[\n\r]+/g, ' '); - // Collapse all white spaces to just one. If there are spaces to the left and right of the string // also collapse them to just one space. let spaceLeft = text.length && text[0] == ' '; @@ -626,7 +194,7 @@ function addResourceTag(lines, resource, alt = "") { function isBlockTag(n) { - return ["div", "p", "dl", "dd", 'dt', "center", 'address', 'form', 'input', 'section', 'nav', 'header', 'article', 'textarea', 'footer', 'fieldset', 'summary', 'details'].indexOf(n) >= 0; + return ["div", "p", "dl", "dd", 'dt', "center", 'address'].indexOf(n) >= 0; } function isStrongTag(n) { @@ -646,7 +214,7 @@ function isAnchor(n) { } function isIgnoredEndTag(n) { - return ["en-note", "en-todo", "span", "body", "html", "font", "br", 'hr', 'tbody', 'sup', 'img', 'abbr', 'cite', 'thead', 'small', 'tt', 'sub', 'colgroup', 'col', 'ins', 'caption', 'var', 'map', 'area', 'label', 'legend', 'time-ago', 'relative-time'].indexOf(n) >= 0; + return ["en-note", "en-todo", "span", "body", "html", "font", "br", 'hr', 'tbody', 'sup', 'img', 'abbr', 'cite', 'thead', 'small', 'tt', 'sub', 'colgroup', 'col', 'ins', 'caption', 'var', 'map', 'area'].indexOf(n) >= 0; } function isListTag(n) { @@ -655,12 +223,29 @@ function isListTag(n) { // Elements that don't require any special treatment beside adding a newline character function isNewLineOnlyEndTag(n) { - return ["div", "p", "h1", "h2", "h3", "h4", "h5", 'h6', "dl", "dd", 'dt', "center", 'address', 'form', 'input', 'section', 'nav', 'header', 'article', 'textarea', 'footer', 'fieldset', 'summary', 'details'].indexOf(n) >= 0; + return ["div", "p", "li", "h1", "h2", "h3", "h4", "h5", 'h6', "dl", "dd", 'dt', "center", 'address'].indexOf(n) >= 0; } -// Tags that must be ignored - both the tag and its content. -function isIgnoredContentTag(n) { - return ['script', 'style', 'iframe', 'select', 'option', 'button', 'video', 'source', 'svg', 'path'].indexOf(n) >= 0 +function isCodeTag(n) { + // NOTE: This handles "code" tags that were copied and pasted from a browser to Evernote. Evernote also has its own code block, which + // of course is way more complicated and currently not fully supported (the code will be imported and indented properly, but it won't + // have the extra Markdown indentation that identifies the block as code). For reference this is an example of Evernote-style code block: + // + //
    function justTesting() {
         someCodeBlock();
    + //
         return true;
    }
    + // + // Which in normal HTML would be: + // + // + // function justTesting() { + // someCodeBlock(); + // return true; + // } + // + return n == "pre" || n == "code"; } function isInlineCodeTag(n) { @@ -686,36 +271,7 @@ function attributeToLowerCase(node) { return output; } -function urlWithoutPath(url) { - const parsed = require('url').parse(url, true); - return parsed.protocol + '//' + parsed.host; -} - -function urlProtocol(url) { - const parsed = require('url').parse(url, true); - return parsed.protocol; -} - -const schemeRegex = /[a-zA-Z0-9\+\-\.]+:\/\// -// Make sure baseUrl doesn't end with a slash -function prependBaseUrl(url, baseUrl) { - if (!url) url = ''; - if (!baseUrl) return url; - const matches = schemeRegex.exec(url); - if (matches) return url; // Don't prepend the base URL if the URL already has a scheme - - if (url.length >= 2 && url.indexOf('//') === 0) { // If it starts with // it's a protcol-relative URL - return urlProtocol(baseUrl) + url; - } else if (url && url[0] === '/') { // If it starts with a slash, it's an absolute URL so it should be relative to the domain (and not to the full baseUrl) - return urlWithoutPath(baseUrl) + url; - } else { - return baseUrl + '/' + url; - } -} - -function enexXmlToMdArray(stream, resources, options = {}) { - if (options.baseUrl) options.baseUrl = options.baseUrl.replace(/[\/]+$/, ''); - +function enexXmlToMdArray(stream, resources) { let remainingResources = resources.slice(); const removeRemainingResource = (id) => { @@ -729,30 +285,15 @@ function enexXmlToMdArray(stream, resources, options = {}) { return new Promise((resolve, reject) => { let state = { - inCode: [], - inPre: false, + inCode: false, inQuote: false, - inMonospaceFont: false, - inCodeblock: 0, lists: [], anchorAttributes: [], - ignoreContents: [], - ignoreWhiteSpace: [], - warningsTags: [], }; - // In some cases white space should be ignored. For example, this: - //