1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-11-26 22:41:17 +02:00

HtmlToMd: Fixed various tests

This commit is contained in:
Laurent Cozic
2018-05-14 18:46:04 +01:00
parent 30bfd82683
commit ba9598682c
11 changed files with 141 additions and 11 deletions

View File

@@ -316,8 +316,30 @@ function processMdArrayNewLines(md, isTable = false) {
if (!output.trim().length) return '';
// To simplify the result, we only allow up to one empty line between blocks of text
const mergeMultipleNewLines = function(lines) {
let output = [];
let newlineCount = 0;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (!line.trim()) {
newlineCount++;
} else {
newlineCount = 0;
}
if (newlineCount >= 2) continue;
output.push(line);
}
return output;
}
let lines = output.replace(/\\r/g, '').split('\n');
return convertSingleLineCodeBlocksToInline(formatMdLayout(lines)).join('\n');
lines = formatMdLayout(lines)
lines = convertSingleLineCodeBlocksToInline(lines)
lines = mergeMultipleNewLines(lines);
return lines.join('\n');
}
// While the processMdArrayNewLines() function adds newlines in a way that's technically correct, the resulting Markdown can look messy.
@@ -370,7 +392,10 @@ function processMdArrayNewLines(md, isTable = false) {
}
const isPlainParagraph = function(line) {
if (!line || !line.length) return false;
// Note: if a line is no longer than 80 characters, we don't consider it's a paragraph, which
// means no newlines will be added before or after. This is to handle text that has been
// written with "hard" new lines.
if (!line || line.length < 80) return false;
if (isListItem(line)) return false;
if (isHeading(line)) return false;
@@ -401,6 +426,12 @@ function formatMdLayout(lines) {
// Add a new line after a heading
} else if (isHeading(previous) && line) {
newLines.push('');
} else if (isCodeLine(line) && !isCodeLine(previous)) {
newLines.push('');
} else if (!isCodeLine(line) && isCodeLine(previous)) {
newLines.push('');
// Add a new line at beginning of paragraph
} else if (isPlainParagraph(line) && previous) {
@@ -510,10 +541,14 @@ function collapseWhiteSpaceAndAppend(lines, state, text) {
lines.push(text);
}
} else {
// Remove all \n and \r from the left and right of the text
while (text.length && (text[0] == "\n" || text[0] == "\r")) text = text.substr(1);
while (text.length && (text[text.length - 1] == "\n" || text[text.length - 1] == "\r")) text = text.substr(0, text.length - 1);
// Replace the inner \n with a space
text = text.replace(/[\n\r]+/g, ' ');
// Collapse all white spaces to just one. If there are spaces to the left and right of the string
// also collapse them to just one space.
let spaceLeft = text.length && text[0] == ' ';
@@ -831,7 +866,7 @@ function enexXmlToMdArray(stream, resources, options = {}) {
} else if (n == "hr") {
// Needs to be surrounded by new lines so that it's properly rendered as a line when converting to HTML
section.lines.push(NEWLINE);
section.lines.push('----------------------------------------');
section.lines.push('* * *');
section.lines.push(NEWLINE);
section.lines.push(NEWLINE);
} else if (n == "h1") {
@@ -1024,7 +1059,7 @@ function enexXmlToMdArray(stream, resources, options = {}) {
let previous = null;
for (let i = section.lines.length - 1; i >= 0; i--) {
previous = section.lines[i];
if ([BLOCK_OPEN, BLOCK_CLOSE, NEWLINE, NEWLINE_MERGED, SPACE].indexOf(previous) >= 0) {
if ([BLOCK_OPEN, BLOCK_CLOSE, NEWLINE, NEWLINE_MERGED, SPACE].indexOf(previous) >= 0 || !previous) {
continue;
} else {
break;
@@ -1088,6 +1123,42 @@ function enexXmlToMdArray(stream, resources, options = {}) {
}
section.lines.push(url);
} else {
// Eg. converts:
// [ Sign in ](https://example.com)
// to:
// [Sign in](https://example.com)
const trimTextStartAndEndSpaces = function(lines) {
let firstBracketIndex = 0;
let foundFirstNonWhite = false;
for (let i = lines.length - 1; i >= 0; i--) {
const l = lines[i];
if (!foundFirstNonWhite && (l === SPACE || l === ' ' || !l)) {
lines.pop();
} else {
foundFirstNonWhite = true;
}
if (l === '[') {
firstBracketIndex = i;
break;
}
}
for (let i = firstBracketIndex + 1; i < lines.length; i++) {
const l = lines[i];
if (l === SPACE || l === ' ' ||!l) {
lines.splice(i, 1);
} else {
break;
}
}
return lines;
}
section.lines = trimTextStartAndEndSpaces(section.lines);
section.lines.push('](' + url + ')');
}
}
@@ -1263,7 +1334,34 @@ async function enexXmlToMd(stream, resources, options = {}) {
firstAttachment = false;
}
return processMdArrayNewLines(mdLines);
let output = processMdArrayNewLines(mdLines);
// After importing HTML, the resulting Markdown often has empty lines at the beginning and end due to
// block start/end or elements that were ignored, etc. If these white spaces were intended it's not really
// possible to detect it, so simply trim them all so that the result is more deterministic and can be
// easily unit tested.
const trimEmptyLines = function(text) {
const lines = text.split('\n');
while (lines.length) {
if (!lines[0].trim()) {
lines.splice(0, 1);
} else {
break;
}
}
while (lines.length) {
if (!lines[lines.length - 1].trim()) {
lines.pop();
} else {
break;
}
}
return lines.join('\n');
}
return trimEmptyLines(output);
}
module.exports = { enexXmlToMd, processMdArrayNewLines, NEWLINE, addResourceTag };