Merge pull request #409 from petrz12/better-evernote-import

Merge monospace text lines when importing from Evernote
2025-07-16 00:14:34 +02:00 · 2018-05-11 15:25:26 +01:00
parent 8b38752cbf 9f2666aef9
commit 0f0ff86ffa
1 changed files with 263 additions and 3 deletions
--- a/ReactNativeClient/lib/import-enex-md-gen.js
+++ b/ReactNativeClient/lib/import-enex-md-gen.js
@ -5,8 +5,217 @@ const BLOCK_CLOSE = "[[BLOCK_CLOSE]]";
 const NEWLINE = "[[NEWLINE]]";
 const NEWLINE_MERGED = "[[MERGED]]";
 const SPACE = "[[SPACE]]";
 // For monospace font detection (Courier, Menlo, Moncaco)
 const MONOSPACE_OPEN = "[[MONOSPACE_OPEN]]";
 const MONOSPACE_CLOSE = "[[MONOSPACE_CLOSE]]";
 // This function will return a list of all monospace sections with a flag saying whether they can be merged or not
 function findMonospaceSections(md) {
 	let temp = [];
 	let sections = [];
 	let section = null;
 	// This variable is used twice: to detected if a newline is between monospace sections and if a newline is inside monospace section
 	let mergeWithPrevious = true;
 	let last = "";
 	for (let i = 0; i < md.length; i++) { 
 		let v = md[i];
 		if (v == MONOSPACE_OPEN) {
 			if (section != null) throw new Error('Monospace open tag detected while the previous was not closed'); // Sanity check, but normally not possible
 			let monospaceSection = {
 				openIndex: null,
 				closeIndex: null,
 				mergeAllowed: true, 
 				mergeWithPrevious: mergeWithPrevious,
 				isEmptyLine: false,
 			}
 			section = monospaceSection;
 			// Remember where monospace section begins, later it will be replaced with appropriate markdown (` or ```) 
 			section.openIndex = temp.push(v) - 1;
 			// Add an empty string, it can be later replaced with newline if necessary
 			temp.push("");
 			if (last != BLOCK_OPEN) {
 				// We cannot merge inline code
 				section.mergeAllowed = false;
 			}
 			// Reset to detect if monospace section contains a newline
 			mergeWithPrevious = true;
 		} else if (v == MONOSPACE_CLOSE) {
 			if (section == null) throw new Error('Monospace tag was closed without being open before'); // Sanity check, but normally not possible
 			if (section.closeIndex != null) throw new Error('Monospace tag is closed for the second time'); // Sanity check, but normally not possible
 			// Add an empty string, it can be later replaced with newline if necessary
 			temp.push("");
 			// Remember where monospace section ends, later it will be replaced with appropriate markdown (` or ```) 
 			section.closeIndex = temp.push(v) - 1;
 			if (md[i+1] != BLOCK_CLOSE) {
 				// We cannot merge inline code
 				section.mergeAllowed = false;
 			}
 			section.isEmptyLine = mergeWithPrevious;
 			sections.push(section);
 			// Reset
 			section = null;
 			mergeWithPrevious = true;
 		} else {
 			// We can merge only if monospace sections are separated by newlines
 			if (v != NEWLINE && v != BLOCK_OPEN && v != BLOCK_CLOSE) {
 				mergeWithPrevious = false;
 			}
 			temp.push(v);
 		}
 		last = v;
 	}
 	return {
 		md: temp,
 		monospaceSections: sections,
 	};
 }
 // This function is looping over monospace sections and merging what it can merge
 function mergeMonospaceSections(md, sections) {
 	const USE_BLOCK_TAG = 1;
 	const USE_INLINE_TAG = 2;
 	const USE_EMPTY_TAG = 3;
 	const toMonospace = (md, section, startTag, endTag) => {
 		// It looks better when empty lines are not inlined
 		if (startTag == USE_INLINE_TAG && section.isEmptyLine) {
 			startTag = USE_EMPTY_TAG;
 			endTag = USE_EMPTY_TAG;
 		}
 		switch (startTag) {
 			case USE_BLOCK_TAG:
 				md[section.openIndex] = "```";
 				md[section.openIndex + 1] = NEWLINE;
 				break;
 			case USE_INLINE_TAG:
 				md[section.openIndex] = "`";
 				break;
 			case USE_EMPTY_TAG:
 				md[section.openIndex] = "";
 				break;
 		}
 		switch (endTag) {
 			case USE_BLOCK_TAG:
 				// We don't add a NEWLINE if there already is a NEWLINE
 				if (md[section.closeIndex - 2] == NEWLINE) {
 					md[section.closeIndex - 1] = "";
 				} else {
 					md[section.closeIndex - 1] = NEWLINE;
 				}
 				md[section.closeIndex] = "```";
 				break;
 			case USE_INLINE_TAG:
 				md[section.closeIndex] = "`";
 				break;
 			case USE_EMPTY_TAG:
 				md[section.closeIndex] = "";
 				break;
 		}
 	}
 	const getSection = () => {
 		return sections.shift();
 	}
 	const getMergeableSection = (first = null) => {
 		if (first) {
 			sections.unshift(first);
 		}
 		while (sections.length) {
 			s = sections.shift();
 			if (s.mergeAllowed) {
 				return s;
 			}
 			// If cannot merge then convert into inline code
 			toMonospace(md, s, USE_INLINE_TAG, USE_INLINE_TAG);
 		}
 		return null;
 	}
 	let left = getMergeableSection();
 	let right = null;
 	while (left) {
 		let isFirst = true;
 		right = getSection();
 		while (right && right.mergeAllowed && right.mergeWithPrevious) {
 			// We can merge left and right
 			if (isFirst) {
 				isFirst = false;
 				// First section
 				toMonospace(md, left, USE_BLOCK_TAG, USE_EMPTY_TAG);
 			} else {
 				// Middle section
 				toMonospace(md, left, USE_EMPTY_TAG, USE_EMPTY_TAG);
 			}
 			left = right;
 			right = getSection();
 		}
 		if (isFirst) {
 			// Could not merge, convert to inline code
 			toMonospace(md, left, USE_INLINE_TAG, USE_INLINE_TAG);
 		} else {
 			// Was merged, add block end tag
 			toMonospace(md, left, USE_EMPTY_TAG, USE_BLOCK_TAG);
 		}
 		left = getMergeableSection(right);
 	}
 }
 // This function will try to merge monospace sections
 // It works in two phases:
 //   1) It will find all monospace sections
 //   2) It will merge all monospace sections where merge is allowed
 function mergeMonospaceSectionsWrapper(md, ignoreMonospace = false) {	
 	if (!ignoreMonospace) {
 		const result = findMonospaceSections(md);
 		if (result.monospaceSections.length > 0) {
 			mergeMonospaceSections(result.md, result.monospaceSections);
 		}
 		md = result.md;
 	} 
 	// Remove empty items, it is necessary for correct function of newline merging happening outside this function
 	let temp = []
 	for (let i = 0; i < md.length; i++) {
 		let v = md[i];
 		if (ignoreMonospace && (v == MONOSPACE_OPEN || v == MONOSPACE_CLOSE)) {
 			continue; // skip
 		}
 		if (v != "") {
 			temp.push(v);
 		}
 	} 
 	return temp;		
 }
 function processMdArrayNewLines(md, isTable = false) {
 	// Try to merge MONOSPACE sections, works good when when not parsing a table
 	md = mergeMonospaceSectionsWrapper(md, isTable);
 function processMdArrayNewLines(md) {
 	while (md.length && md[0] == BLOCK_OPEN) {
 		md.shift();
 	}
@ -287,6 +496,8 @@ function enexXmlToMdArray(stream, resources) {
 		let state = {
 			inCode: false,
 			inQuote: false,
 			inMonospaceFont: false,
 			inCodeblock: 0,
 			lists: [],
 			anchorAttributes: [],
 		};
@ -315,6 +526,19 @@ function enexXmlToMdArray(stream, resources) {
 			const nodeAttributes = attributeToLowerCase(node);
 			let n = node.name.toLowerCase();
 			if (n == "div") {
 				// div tags are recursive, in order to find the end we have to count the depth
 				if (state.inCodeblock > 0) {
 					state.inCodeblock++;
 				} else if (nodeAttributes && nodeAttributes.style && nodeAttributes.style.indexOf("box-sizing: border-box") >= 0) {
 					// Evernote code block start
 					state.inCodeblock = 1;
 					section.lines.push("```");
 					return; // skip further processing
 				}
 			}
 			if (n == 'en-note') {
 				// Start of note
 			} else if (isBlockTag(n)) {
@ -502,6 +726,26 @@ function enexXmlToMdArray(stream, resources) {
 				if (resource && !!resource.id) {
 					section.lines = addResourceTag(section.lines, resource, nodeAttributes.alt);
 				}
 		 	} else if (n == "span" || n == "font") {
 				// Check for monospace font. It can come from being specified in either from
 				// <span style="..."> or <font face="...">.
 				// Monospace sections are already in monospace for Evernote code blocks
 				if (state.inCodeblock == 0 && nodeAttributes) {
 					let style = null;
 					if (nodeAttributes.style) {
 						style = nodeAttributes.style.toLowerCase();
 					} else if (nodeAttributes.face) {
 						style = nodeAttributes.face.toLowerCase();
 					}
 					monospace = style.match(/monospace|courier|menlo|monaco/) != null;
 					if (monospace) {
 						state.inMonospaceFont = true;
 						section.lines.push(MONOSPACE_OPEN);
 					}
 				} 
 			} else if (["span", "font", 'sup', 'cite', 'abbr', 'small', 'tt', 'sub', 'colgroup', 'col', 'ins', 'caption', 'var', 'map', 'area'].indexOf(n) >= 0) {
 				// Inline tags that can be ignored in Markdown
 			} else {
@ -512,6 +756,17 @@ function enexXmlToMdArray(stream, resources) {
 		saxStream.on('closetag', function(n) {
 			n = n ? n.toLowerCase() : n;
 			if (n == "div") {
 				if (state.inCodeblock >= 1) {
 					state.inCodeblock--;
 					if (state.inCodeblock == 0) {
 						// Evernote code block end
 						section.lines.push("```");
 						return; // skip further processing
 					}
 				}
 			}
 			if (n == 'en-note') {
 				// End of note
 			} else if (isNewLineOnlyEndTag(n)) {
@ -522,6 +777,11 @@ function enexXmlToMdArray(stream, resources) {
 				if (section && section.parent) section = section.parent;
 			} else if (n == 'table') {
 				if (section && section.parent) section = section.parent;
 			} else if (n == "span" || n == "font") {
 				if (state.inCodeblock == 0 && state.inMonospaceFont) {
 					state.inMonospaceFont = false;
 					section.lines.push(MONOSPACE_CLOSE);
 				}
 			} else if (isIgnoredEndTag(n)) {
 				// Skip
 			} else if (isListTag(n)) {
@ -662,7 +922,7 @@ function drawTable(table) {
 				const renderCurrentCells = () => {
 					if (!currentCells.length) return;
-					const cellText = processMdArrayNewLines(currentCells);
+					const cellText = processMdArrayNewLines(currentCells, true);
 					line.push(cellText);
 					currentCells = [];
 				}
@ -685,7 +945,7 @@ function drawTable(table) {
 				// A cell in a Markdown table cannot have actual new lines so replace
 				// them with <br>, which are supported by the markdown renderers.
-				let cellText = processMdArrayNewLines(td.lines).replace(/\n+/g, "<br>");
+				let cellText = processMdArrayNewLines(td.lines, true).replace(/\n+/g, "<br>");
 				// Inside tables cells, "|" needs to be escaped
 				cellText = cellText.replace(/\|/g, "\\|");