mirror of
https://github.com/laurent22/joplin.git
synced 2025-01-26 18:58:21 +02:00
HtmlToMd: Fixed various tests
This commit is contained in:
parent
30bfd82683
commit
ba9598682c
@ -35,12 +35,12 @@ describe('HtmlToMd', function() {
|
||||
const htmlPath = basePath + '/' + htmlFilename;
|
||||
const mdPath = basePath + '/' + filename(htmlFilename) + '.md';
|
||||
|
||||
// if (htmlFilename !== 'tableWithNewLines.html') continue;
|
||||
// if (htmlFilename !== 'list.html') continue;
|
||||
|
||||
const html = await shim.fsDriver().readFile(htmlPath);
|
||||
const expectedMd = await shim.fsDriver().readFile(mdPath);
|
||||
|
||||
const contentStream = stringToStream(html);
|
||||
const contentStream = stringToStream('<div>' + html + '</div>');
|
||||
const actualMd = await enexXmlToMd(contentStream, []);
|
||||
|
||||
if (actualMd !== expectedMd) {
|
||||
@ -54,7 +54,7 @@ describe('HtmlToMd', function() {
|
||||
console.info('');
|
||||
|
||||
expect(false).toBe(true);
|
||||
return;
|
||||
// return;
|
||||
} else {
|
||||
expect(true).toBe(true)
|
||||
}
|
||||
|
4
CliClient/tests/html_to_md/link1.html
Normal file
4
CliClient/tests/html_to_md/link1.html
Normal file
@ -0,0 +1,4 @@
|
||||
<a href="https://arstechnica.com/civis/ucp.php?mode=login&return_to=%2Ftech-policy%2F2018%2F05%2Fjails-are-replacing-in-person-visits-with-video-calling-services-theyre-awful%2F" class="dropdown-toggle">
|
||||
Sign in
|
||||
<span class="icon dropdown-indicator icon-drop-indicator"></span>
|
||||
</a>
|
1
CliClient/tests/html_to_md/link1.md
Normal file
1
CliClient/tests/html_to_md/link1.md
Normal file
@ -0,0 +1 @@
|
||||
[Sign in](https://arstechnica.com/civis/ucp.php?mode=login&return_to=%2Ftech-policy%2F2018%2F05%2Fjails-are-replacing-in-person-visits-with-video-calling-services-theyre-awful%2F)
|
11
CliClient/tests/html_to_md/list2.html
Normal file
11
CliClient/tests/html_to_md/list2.html
Normal file
@ -0,0 +1,11 @@
|
||||
<ul class="find-me-on">
|
||||
<li>
|
||||
<a href="https://github.com/zetter">Github</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="https://twitter.com/czetter">Twitter</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="http://lanyrd.com/profile/czetter/">Lanyrd</a>
|
||||
</li>
|
||||
</ul>
|
3
CliClient/tests/html_to_md/list2.md
Normal file
3
CliClient/tests/html_to_md/list2.md
Normal file
@ -0,0 +1,3 @@
|
||||
- [Github](https://github.com/zetter)
|
||||
- [Twitter](https://twitter.com/czetter)
|
||||
- [Lanyrd](http://lanyrd.com/profile/czetter/)
|
@ -1,5 +1,11 @@
|
||||
<div>
|
||||
<p>Short paragraphs are merged together:</p>
|
||||
<p>Something something</p>
|
||||
<p>Blablbla blabla lbla</p>
|
||||
<p>Last line</p>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<p>Longer ones are separated by new lines. In 1894 Joplin arrived in Sedalia, Missouri. At first, Joplin stayed with the family of Arthur Marshall, at the time a 13-year-old boy but later one of Joplin's students and a rag-time composer in his own right.[26] There is no record of Joplin having a permanent residence in the town until 1904, as Joplin was making a living as a touring musician.</p>
|
||||
<p>There is little precise evidence known about Joplin's activities at this time, although he performed as a solo musician at dances and at the major black clubs in Sedalia, the Black 400 club and the Maple Leaf Club. He performed in the Queen City Cornet Band, and his own six-piece dance orchestra.</p>
|
||||
</div>
|
@ -1,5 +1,8 @@
|
||||
Short paragraphs are merged together:
|
||||
Something something
|
||||
|
||||
Blablbla blabla lbla
|
||||
|
||||
Last line
|
||||
|
||||
Longer ones are separated by new lines. In 1894 Joplin arrived in Sedalia, Missouri. At first, Joplin stayed with the family of Arthur Marshall, at the time a 13-year-old boy but later one of Joplin's students and a rag-time composer in his own right.[26] There is no record of Joplin having a permanent residence in the town until 1904, as Joplin was making a living as a touring musician.
|
||||
|
||||
There is little precise evidence known about Joplin's activities at this time, although he performed as a solo musician at dances and at the major black clubs in Sedalia, the Black 400 club and the Maple Leaf Club. He performed in the Queen City Cornet Band, and his own six-piece dance orchestra.
|
2
CliClient/tests/html_to_md/text1.html
Normal file
2
CliClient/tests/html_to_md/text1.html
Normal file
@ -0,0 +1,2 @@
|
||||
<span style="line-height: 107%;">Kurt Gödel published a very short
|
||||
but profound paper titled</span>
|
1
CliClient/tests/html_to_md/text1.md
Normal file
1
CliClient/tests/html_to_md/text1.md
Normal file
@ -0,0 +1 @@
|
||||
Kurt Gödel published a very short but profound paper titled
|
@ -316,8 +316,30 @@ function processMdArrayNewLines(md, isTable = false) {
|
||||
|
||||
if (!output.trim().length) return '';
|
||||
|
||||
// To simplify the result, we only allow up to one empty line between blocks of text
|
||||
const mergeMultipleNewLines = function(lines) {
|
||||
let output = [];
|
||||
let newlineCount = 0;
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const line = lines[i];
|
||||
if (!line.trim()) {
|
||||
newlineCount++;
|
||||
} else {
|
||||
newlineCount = 0;
|
||||
}
|
||||
|
||||
if (newlineCount >= 2) continue;
|
||||
|
||||
output.push(line);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
let lines = output.replace(/\\r/g, '').split('\n');
|
||||
return convertSingleLineCodeBlocksToInline(formatMdLayout(lines)).join('\n');
|
||||
lines = formatMdLayout(lines)
|
||||
lines = convertSingleLineCodeBlocksToInline(lines)
|
||||
lines = mergeMultipleNewLines(lines);
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
// While the processMdArrayNewLines() function adds newlines in a way that's technically correct, the resulting Markdown can look messy.
|
||||
@ -370,7 +392,10 @@ function processMdArrayNewLines(md, isTable = false) {
|
||||
}
|
||||
|
||||
const isPlainParagraph = function(line) {
|
||||
if (!line || !line.length) return false;
|
||||
// Note: if a line is no longer than 80 characters, we don't consider it's a paragraph, which
|
||||
// means no newlines will be added before or after. This is to handle text that has been
|
||||
// written with "hard" new lines.
|
||||
if (!line || line.length < 80) return false;
|
||||
|
||||
if (isListItem(line)) return false;
|
||||
if (isHeading(line)) return false;
|
||||
@ -402,6 +427,12 @@ function formatMdLayout(lines) {
|
||||
} else if (isHeading(previous) && line) {
|
||||
newLines.push('');
|
||||
|
||||
} else if (isCodeLine(line) && !isCodeLine(previous)) {
|
||||
newLines.push('');
|
||||
|
||||
} else if (!isCodeLine(line) && isCodeLine(previous)) {
|
||||
newLines.push('');
|
||||
|
||||
// Add a new line at beginning of paragraph
|
||||
} else if (isPlainParagraph(line) && previous) {
|
||||
newLines.push('');
|
||||
@ -510,10 +541,14 @@ function collapseWhiteSpaceAndAppend(lines, state, text) {
|
||||
lines.push(text);
|
||||
}
|
||||
} else {
|
||||
|
||||
// Remove all \n and \r from the left and right of the text
|
||||
while (text.length && (text[0] == "\n" || text[0] == "\r")) text = text.substr(1);
|
||||
while (text.length && (text[text.length - 1] == "\n" || text[text.length - 1] == "\r")) text = text.substr(0, text.length - 1);
|
||||
|
||||
// Replace the inner \n with a space
|
||||
text = text.replace(/[\n\r]+/g, ' ');
|
||||
|
||||
// Collapse all white spaces to just one. If there are spaces to the left and right of the string
|
||||
// also collapse them to just one space.
|
||||
let spaceLeft = text.length && text[0] == ' ';
|
||||
@ -831,7 +866,7 @@ function enexXmlToMdArray(stream, resources, options = {}) {
|
||||
} else if (n == "hr") {
|
||||
// Needs to be surrounded by new lines so that it's properly rendered as a line when converting to HTML
|
||||
section.lines.push(NEWLINE);
|
||||
section.lines.push('----------------------------------------');
|
||||
section.lines.push('* * *');
|
||||
section.lines.push(NEWLINE);
|
||||
section.lines.push(NEWLINE);
|
||||
} else if (n == "h1") {
|
||||
@ -1024,7 +1059,7 @@ function enexXmlToMdArray(stream, resources, options = {}) {
|
||||
let previous = null;
|
||||
for (let i = section.lines.length - 1; i >= 0; i--) {
|
||||
previous = section.lines[i];
|
||||
if ([BLOCK_OPEN, BLOCK_CLOSE, NEWLINE, NEWLINE_MERGED, SPACE].indexOf(previous) >= 0) {
|
||||
if ([BLOCK_OPEN, BLOCK_CLOSE, NEWLINE, NEWLINE_MERGED, SPACE].indexOf(previous) >= 0 || !previous) {
|
||||
continue;
|
||||
} else {
|
||||
break;
|
||||
@ -1088,6 +1123,42 @@ function enexXmlToMdArray(stream, resources, options = {}) {
|
||||
}
|
||||
section.lines.push(url);
|
||||
} else {
|
||||
|
||||
// Eg. converts:
|
||||
// [ Sign in ](https://example.com)
|
||||
// to:
|
||||
// [Sign in](https://example.com)
|
||||
const trimTextStartAndEndSpaces = function(lines) {
|
||||
let firstBracketIndex = 0;
|
||||
let foundFirstNonWhite = false;
|
||||
for (let i = lines.length - 1; i >= 0; i--) {
|
||||
const l = lines[i];
|
||||
if (!foundFirstNonWhite && (l === SPACE || l === ' ' || !l)) {
|
||||
lines.pop();
|
||||
} else {
|
||||
foundFirstNonWhite = true;
|
||||
}
|
||||
|
||||
if (l === '[') {
|
||||
firstBracketIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (let i = firstBracketIndex + 1; i < lines.length; i++) {
|
||||
const l = lines[i];
|
||||
if (l === SPACE || l === ' ' ||!l) {
|
||||
lines.splice(i, 1);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return lines;
|
||||
}
|
||||
|
||||
section.lines = trimTextStartAndEndSpaces(section.lines);
|
||||
|
||||
section.lines.push('](' + url + ')');
|
||||
}
|
||||
}
|
||||
@ -1263,7 +1334,34 @@ async function enexXmlToMd(stream, resources, options = {}) {
|
||||
firstAttachment = false;
|
||||
}
|
||||
|
||||
return processMdArrayNewLines(mdLines);
|
||||
let output = processMdArrayNewLines(mdLines);
|
||||
|
||||
// After importing HTML, the resulting Markdown often has empty lines at the beginning and end due to
|
||||
// block start/end or elements that were ignored, etc. If these white spaces were intended it's not really
|
||||
// possible to detect it, so simply trim them all so that the result is more deterministic and can be
|
||||
// easily unit tested.
|
||||
const trimEmptyLines = function(text) {
|
||||
const lines = text.split('\n');
|
||||
while (lines.length) {
|
||||
if (!lines[0].trim()) {
|
||||
lines.splice(0, 1);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
while (lines.length) {
|
||||
if (!lines[lines.length - 1].trim()) {
|
||||
lines.pop();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
return trimEmptyLines(output);
|
||||
}
|
||||
|
||||
module.exports = { enexXmlToMd, processMdArrayNewLines, NEWLINE, addResourceTag };
|
@ -40,7 +40,8 @@ function safeFileExtension(e) {
|
||||
return e.replace(/[^a-zA-Z0-9]/g, '')
|
||||
}
|
||||
|
||||
function toSystemSlashes(path, os) {
|
||||
function toSystemSlashes(path, os = null) {
|
||||
if (os === null) os = process.platform;
|
||||
if (os === 'win32') return path.replace(/\//g, "\\");
|
||||
return path.replace(/\\/g, "/");
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user