1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-01-26 18:58:21 +02:00

HtmlToMd: Fixed various tests

This commit is contained in:
Laurent Cozic 2018-05-14 18:46:04 +01:00
parent 30bfd82683
commit ba9598682c
11 changed files with 141 additions and 11 deletions

View File

@ -35,12 +35,12 @@ describe('HtmlToMd', function() {
const htmlPath = basePath + '/' + htmlFilename;
const mdPath = basePath + '/' + filename(htmlFilename) + '.md';
// if (htmlFilename !== 'tableWithNewLines.html') continue;
// if (htmlFilename !== 'list.html') continue;
const html = await shim.fsDriver().readFile(htmlPath);
const expectedMd = await shim.fsDriver().readFile(mdPath);
const contentStream = stringToStream(html);
const contentStream = stringToStream('<div>' + html + '</div>');
const actualMd = await enexXmlToMd(contentStream, []);
if (actualMd !== expectedMd) {
@ -54,7 +54,7 @@ describe('HtmlToMd', function() {
console.info('');
expect(false).toBe(true);
return;
// return;
} else {
expect(true).toBe(true)
}

View File

@ -0,0 +1,4 @@
<a href="https://arstechnica.com/civis/ucp.php?mode=login&return_to=%2Ftech-policy%2F2018%2F05%2Fjails-are-replacing-in-person-visits-with-video-calling-services-theyre-awful%2F" class="dropdown-toggle">
Sign in
<span class="icon dropdown-indicator icon-drop-indicator"></span>
</a>

View File

@ -0,0 +1 @@
[Sign in](https://arstechnica.com/civis/ucp.php?mode=login&return_to=%2Ftech-policy%2F2018%2F05%2Fjails-are-replacing-in-person-visits-with-video-calling-services-theyre-awful%2F)

View File

@ -0,0 +1,11 @@
<ul class="find-me-on">
<li>
<a href="https://github.com/zetter">Github</a>
</li>
<li>
<a href="https://twitter.com/czetter">Twitter</a>
</li>
<li>
<a href="http://lanyrd.com/profile/czetter/">Lanyrd</a>
</li>
</ul>

View File

@ -0,0 +1,3 @@
- [Github](https://github.com/zetter)
- [Twitter](https://twitter.com/czetter)
- [Lanyrd](http://lanyrd.com/profile/czetter/)

View File

@ -1,5 +1,11 @@
<div>
<p>Short paragraphs are merged together:</p>
<p>Something something</p>
<p>Blablbla blabla lbla</p>
<p>Last line</p>
</div>
<div>
<p>Longer ones are separated by new lines. In 1894 Joplin arrived in Sedalia, Missouri. At first, Joplin stayed with the family of Arthur Marshall, at the time a 13-year-old boy but later one of Joplin's students and a rag-time composer in his own right.[26] There is no record of Joplin having a permanent residence in the town until 1904, as Joplin was making a living as a touring musician.</p>
<p>There is little precise evidence known about Joplin's activities at this time, although he performed as a solo musician at dances and at the major black clubs in Sedalia, the Black 400 club and the Maple Leaf Club. He performed in the Queen City Cornet Band, and his own six-piece dance orchestra.</p>
</div>

View File

@ -1,5 +1,8 @@
Short paragraphs are merged together:
Something something
Blablbla blabla lbla
Last line
Longer ones are separated by new lines. In 1894 Joplin arrived in Sedalia, Missouri. At first, Joplin stayed with the family of Arthur Marshall, at the time a 13-year-old boy but later one of Joplin's students and a rag-time composer in his own right.[26] There is no record of Joplin having a permanent residence in the town until 1904, as Joplin was making a living as a touring musician.
There is little precise evidence known about Joplin's activities at this time, although he performed as a solo musician at dances and at the major black clubs in Sedalia, the Black 400 club and the Maple Leaf Club. He performed in the Queen City Cornet Band, and his own six-piece dance orchestra.

View File

@ -0,0 +1,2 @@
<span style="line-height: 107%;">Kurt Gödel published a very short
but profound paper titled</span>

View File

@ -0,0 +1 @@
Kurt Gödel published a very short but profound paper titled

View File

@ -316,8 +316,30 @@ function processMdArrayNewLines(md, isTable = false) {
if (!output.trim().length) return '';
// To simplify the result, we only allow up to one empty line between blocks of text
const mergeMultipleNewLines = function(lines) {
let output = [];
let newlineCount = 0;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (!line.trim()) {
newlineCount++;
} else {
newlineCount = 0;
}
if (newlineCount >= 2) continue;
output.push(line);
}
return output;
}
let lines = output.replace(/\\r/g, '').split('\n');
return convertSingleLineCodeBlocksToInline(formatMdLayout(lines)).join('\n');
lines = formatMdLayout(lines)
lines = convertSingleLineCodeBlocksToInline(lines)
lines = mergeMultipleNewLines(lines);
return lines.join('\n');
}
// While the processMdArrayNewLines() function adds newlines in a way that's technically correct, the resulting Markdown can look messy.
@ -370,7 +392,10 @@ function processMdArrayNewLines(md, isTable = false) {
}
const isPlainParagraph = function(line) {
if (!line || !line.length) return false;
// Note: if a line is no longer than 80 characters, we don't consider it's a paragraph, which
// means no newlines will be added before or after. This is to handle text that has been
// written with "hard" new lines.
if (!line || line.length < 80) return false;
if (isListItem(line)) return false;
if (isHeading(line)) return false;
@ -402,6 +427,12 @@ function formatMdLayout(lines) {
} else if (isHeading(previous) && line) {
newLines.push('');
} else if (isCodeLine(line) && !isCodeLine(previous)) {
newLines.push('');
} else if (!isCodeLine(line) && isCodeLine(previous)) {
newLines.push('');
// Add a new line at beginning of paragraph
} else if (isPlainParagraph(line) && previous) {
newLines.push('');
@ -510,10 +541,14 @@ function collapseWhiteSpaceAndAppend(lines, state, text) {
lines.push(text);
}
} else {
// Remove all \n and \r from the left and right of the text
while (text.length && (text[0] == "\n" || text[0] == "\r")) text = text.substr(1);
while (text.length && (text[text.length - 1] == "\n" || text[text.length - 1] == "\r")) text = text.substr(0, text.length - 1);
// Replace the inner \n with a space
text = text.replace(/[\n\r]+/g, ' ');
// Collapse all white spaces to just one. If there are spaces to the left and right of the string
// also collapse them to just one space.
let spaceLeft = text.length && text[0] == ' ';
@ -831,7 +866,7 @@ function enexXmlToMdArray(stream, resources, options = {}) {
} else if (n == "hr") {
// Needs to be surrounded by new lines so that it's properly rendered as a line when converting to HTML
section.lines.push(NEWLINE);
section.lines.push('----------------------------------------');
section.lines.push('* * *');
section.lines.push(NEWLINE);
section.lines.push(NEWLINE);
} else if (n == "h1") {
@ -1024,7 +1059,7 @@ function enexXmlToMdArray(stream, resources, options = {}) {
let previous = null;
for (let i = section.lines.length - 1; i >= 0; i--) {
previous = section.lines[i];
if ([BLOCK_OPEN, BLOCK_CLOSE, NEWLINE, NEWLINE_MERGED, SPACE].indexOf(previous) >= 0) {
if ([BLOCK_OPEN, BLOCK_CLOSE, NEWLINE, NEWLINE_MERGED, SPACE].indexOf(previous) >= 0 || !previous) {
continue;
} else {
break;
@ -1088,6 +1123,42 @@ function enexXmlToMdArray(stream, resources, options = {}) {
}
section.lines.push(url);
} else {
// Eg. converts:
// [ Sign in ](https://example.com)
// to:
// [Sign in](https://example.com)
const trimTextStartAndEndSpaces = function(lines) {
let firstBracketIndex = 0;
let foundFirstNonWhite = false;
for (let i = lines.length - 1; i >= 0; i--) {
const l = lines[i];
if (!foundFirstNonWhite && (l === SPACE || l === ' ' || !l)) {
lines.pop();
} else {
foundFirstNonWhite = true;
}
if (l === '[') {
firstBracketIndex = i;
break;
}
}
for (let i = firstBracketIndex + 1; i < lines.length; i++) {
const l = lines[i];
if (l === SPACE || l === ' ' ||!l) {
lines.splice(i, 1);
} else {
break;
}
}
return lines;
}
section.lines = trimTextStartAndEndSpaces(section.lines);
section.lines.push('](' + url + ')');
}
}
@ -1263,7 +1334,34 @@ async function enexXmlToMd(stream, resources, options = {}) {
firstAttachment = false;
}
return processMdArrayNewLines(mdLines);
let output = processMdArrayNewLines(mdLines);
// After importing HTML, the resulting Markdown often has empty lines at the beginning and end due to
// block start/end or elements that were ignored, etc. If these white spaces were intended it's not really
// possible to detect it, so simply trim them all so that the result is more deterministic and can be
// easily unit tested.
const trimEmptyLines = function(text) {
const lines = text.split('\n');
while (lines.length) {
if (!lines[0].trim()) {
lines.splice(0, 1);
} else {
break;
}
}
while (lines.length) {
if (!lines[lines.length - 1].trim()) {
lines.pop();
} else {
break;
}
}
return lines.join('\n');
}
return trimEmptyLines(output);
}
module.exports = { enexXmlToMd, processMdArrayNewLines, NEWLINE, addResourceTag };

View File

@ -40,7 +40,8 @@ function safeFileExtension(e) {
return e.replace(/[^a-zA-Z0-9]/g, '')
}
function toSystemSlashes(path, os) {
function toSystemSlashes(path, os = null) {
if (os === null) os = process.platform;
if (os === 'win32') return path.replace(/\//g, "\\");
return path.replace(/\\/g, "/");
}