mirror of
https://github.com/laurent22/joplin.git
synced 2024-11-24 08:12:24 +02:00
HtmlToMd: Fixed handling of inline tags
This commit is contained in:
parent
8cce2f17d5
commit
a2b1181f7c
93
CliClient/package-lock.json
generated
93
CliClient/package-lock.json
generated
@ -122,6 +122,15 @@
|
||||
"concat-map": "0.0.1"
|
||||
}
|
||||
},
|
||||
"camel-case": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/camel-case/-/camel-case-3.0.0.tgz",
|
||||
"integrity": "sha1-yjw2iKTpzzpM2nd9xNy8cTJJz3M=",
|
||||
"requires": {
|
||||
"no-case": "^2.2.0",
|
||||
"upper-case": "^1.1.1"
|
||||
}
|
||||
},
|
||||
"camelcase": {
|
||||
"version": "4.1.0",
|
||||
"resolved": "https://registry.npmjs.org/camelcase/-/camelcase-4.1.0.tgz",
|
||||
@ -163,6 +172,14 @@
|
||||
"resolved": "https://registry.npmjs.org/chownr/-/chownr-1.0.1.tgz",
|
||||
"integrity": "sha1-4qdQQqlVGQi+vSW4Uj1fl2nXkYE="
|
||||
},
|
||||
"clean-css": {
|
||||
"version": "4.1.11",
|
||||
"resolved": "https://registry.npmjs.org/clean-css/-/clean-css-4.1.11.tgz",
|
||||
"integrity": "sha1-Ls3xRaujj1R0DybO/Q/z4D4SXWo=",
|
||||
"requires": {
|
||||
"source-map": "0.5.x"
|
||||
}
|
||||
},
|
||||
"co": {
|
||||
"version": "4.6.0",
|
||||
"resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz",
|
||||
@ -207,6 +224,11 @@
|
||||
"delayed-stream": "~1.0.0"
|
||||
}
|
||||
},
|
||||
"commander": {
|
||||
"version": "2.15.1",
|
||||
"resolved": "https://registry.npmjs.org/commander/-/commander-2.15.1.tgz",
|
||||
"integrity": "sha512-VlfT9F3V0v+jr4yxPc5gg9s62/fIVWsd2Bk2iD435um1NlGMYdVCq+MjcXnhYq2icNOizHr1kK+5TI6H0Hy0ag=="
|
||||
},
|
||||
"compare-version": {
|
||||
"version": "0.1.2",
|
||||
"resolved": "https://registry.npmjs.org/compare-version/-/compare-version-0.1.2.tgz",
|
||||
@ -573,6 +595,11 @@
|
||||
"sntp": "2.x.x"
|
||||
}
|
||||
},
|
||||
"he": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/he/-/he-1.1.1.tgz",
|
||||
"integrity": "sha1-k0EP0hsAlzUVH4howvJx80J+I/0="
|
||||
},
|
||||
"highlight.js": {
|
||||
"version": "9.12.0",
|
||||
"resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-9.12.0.tgz",
|
||||
@ -588,6 +615,20 @@
|
||||
"resolved": "https://registry.npmjs.org/html-entities/-/html-entities-1.2.1.tgz",
|
||||
"integrity": "sha1-DfKTUfByEWNRXfueVUPl9u7VFi8="
|
||||
},
|
||||
"html-minifier": {
|
||||
"version": "3.5.15",
|
||||
"resolved": "https://registry.npmjs.org/html-minifier/-/html-minifier-3.5.15.tgz",
|
||||
"integrity": "sha512-OZa4rfb6tZOZ3Z8Xf0jKxXkiDcFWldQePGYFDcgKqES2sXeWaEv9y6QQvWUtX3ySI3feApQi5uCsHLINQ6NoAw==",
|
||||
"requires": {
|
||||
"camel-case": "3.0.x",
|
||||
"clean-css": "4.1.x",
|
||||
"commander": "2.15.x",
|
||||
"he": "1.1.x",
|
||||
"param-case": "2.1.x",
|
||||
"relateurl": "0.2.x",
|
||||
"uglify-js": "3.3.x"
|
||||
}
|
||||
},
|
||||
"http-signature": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/http-signature/-/http-signature-1.2.0.tgz",
|
||||
@ -815,6 +856,11 @@
|
||||
"js-tokens": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"lower-case": {
|
||||
"version": "1.1.4",
|
||||
"resolved": "https://registry.npmjs.org/lower-case/-/lower-case-1.1.4.tgz",
|
||||
"integrity": "sha1-miyr0bno4K6ZOkv31YdcOcQujqw="
|
||||
},
|
||||
"lowlight": {
|
||||
"version": "1.9.2",
|
||||
"resolved": "https://registry.npmjs.org/lowlight/-/lowlight-1.9.2.tgz",
|
||||
@ -933,6 +979,14 @@
|
||||
"resolved": "https://registry.npmjs.org/nextgen-events/-/nextgen-events-0.11.3.tgz",
|
||||
"integrity": "sha512-dC4v/dOF6m8/M05eU712KXjRJ0e/187rx5CMS/fTnulv2QGPps1U/c/J1D3wtegEhK+EE7LuJc3jly3pyfV46g=="
|
||||
},
|
||||
"no-case": {
|
||||
"version": "2.3.2",
|
||||
"resolved": "https://registry.npmjs.org/no-case/-/no-case-2.3.2.tgz",
|
||||
"integrity": "sha512-rmTZ9kz+f3rCvK2TD1Ue/oZlns7OGoIWP4fc3llxxRXlOkHKoWPPWJOfFYpITabSow43QJbRIoHQXtt10VldyQ==",
|
||||
"requires": {
|
||||
"lower-case": "^1.1.1"
|
||||
}
|
||||
},
|
||||
"node-bitmap": {
|
||||
"version": "0.0.1",
|
||||
"resolved": "https://registry.npmjs.org/node-bitmap/-/node-bitmap-0.0.1.tgz",
|
||||
@ -997,6 +1051,14 @@
|
||||
"wrappy": "1"
|
||||
}
|
||||
},
|
||||
"param-case": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/param-case/-/param-case-2.1.1.tgz",
|
||||
"integrity": "sha1-35T9jPZTHs915r75oIWPvHK+Ikc=",
|
||||
"requires": {
|
||||
"no-case": "^2.2.0"
|
||||
}
|
||||
},
|
||||
"parse-data-uri": {
|
||||
"version": "0.2.0",
|
||||
"resolved": "https://registry.npmjs.org/parse-data-uri/-/parse-data-uri-0.2.0.tgz",
|
||||
@ -1107,6 +1169,11 @@
|
||||
"symbol-observable": "^1.0.3"
|
||||
}
|
||||
},
|
||||
"relateurl": {
|
||||
"version": "0.2.7",
|
||||
"resolved": "https://registry.npmjs.org/relateurl/-/relateurl-0.2.7.tgz",
|
||||
"integrity": "sha1-VNvzd+UUQKypCkzSdGANP/LYiKk="
|
||||
},
|
||||
"request": {
|
||||
"version": "2.85.0",
|
||||
"resolved": "https://registry.npmjs.org/request/-/request-2.85.0.tgz",
|
||||
@ -1233,6 +1300,11 @@
|
||||
"hoek": "4.x.x"
|
||||
}
|
||||
},
|
||||
"source-map": {
|
||||
"version": "0.5.7",
|
||||
"resolved": "https://registry.npmjs.org/source-map/-/source-map-0.5.7.tgz",
|
||||
"integrity": "sha1-igOdLRAh0i0eoUyA2OpGi6LvP8w="
|
||||
},
|
||||
"sprintf-js": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.1.1.tgz",
|
||||
@ -2138,6 +2210,22 @@
|
||||
"integrity": "sha1-WuaBd/GS1EViadEIr6k/+HQ/T2Q=",
|
||||
"optional": true
|
||||
},
|
||||
"uglify-js": {
|
||||
"version": "3.3.25",
|
||||
"resolved": "https://registry.npmjs.org/uglify-js/-/uglify-js-3.3.25.tgz",
|
||||
"integrity": "sha512-hobogryjDV36VrLK3Y69ou4REyrTApzUblVFmdQOYRe8cYaSmFJXMb4dR9McdvYDSbeNdzUgYr2YVukJaErJcA==",
|
||||
"requires": {
|
||||
"commander": "~2.15.0",
|
||||
"source-map": "~0.6.1"
|
||||
},
|
||||
"dependencies": {
|
||||
"source-map": {
|
||||
"version": "0.6.1",
|
||||
"resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
|
||||
"integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g=="
|
||||
}
|
||||
}
|
||||
},
|
||||
"unc-path-regex": {
|
||||
"version": "0.1.2",
|
||||
"resolved": "https://registry.npmjs.org/unc-path-regex/-/unc-path-regex-0.1.2.tgz",
|
||||
@ -2153,6 +2241,11 @@
|
||||
"resolved": "https://registry.npmjs.org/universalify/-/universalify-0.1.1.tgz",
|
||||
"integrity": "sha1-+nG63UQ3r0wUiEHjs7Fl+enlkLc="
|
||||
},
|
||||
"upper-case": {
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/upper-case/-/upper-case-1.1.3.tgz",
|
||||
"integrity": "sha1-9rRQHC7EzdJrp4vnIilh3ndiFZg="
|
||||
},
|
||||
"url-parse": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.2.0.tgz",
|
||||
|
@ -35,6 +35,7 @@
|
||||
"form-data": "^2.1.4",
|
||||
"fs-extra": "^5.0.0",
|
||||
"html-entities": "^1.2.1",
|
||||
"html-minifier": "^3.5.15",
|
||||
"jssha": "^2.3.0",
|
||||
"levenshtein": "^1.0.5",
|
||||
"lodash": "^4.17.4",
|
||||
|
@ -34,7 +34,7 @@ describe('HtmlToMd', function() {
|
||||
const htmlPath = basePath + '/' + htmlFilename;
|
||||
const mdPath = basePath + '/' + filename(htmlFilename) + '.md';
|
||||
|
||||
// if (htmlFilename !== 'code2.html') continue;
|
||||
// if (htmlFilename !== 'text2.html') continue;
|
||||
|
||||
const html = await shim.fsDriver().readFile(htmlPath);
|
||||
const expectedMd = await shim.fsDriver().readFile(mdPath);
|
||||
|
@ -1,5 +1,5 @@
|
||||
| |
|
||||
| --- |
|
||||
| $ sudo ethtool --set-priv-flags p2p1 mlx4_rss_xor_hash_function on<br># Three empty lines follow<br> |
|
||||
| $ sudo ethtool --set-priv-flags p2p1 mlx4_rss_xor_hash_function on<br># Three empty lines follow |
|
||||
|
||||
Some text
|
@ -337,6 +337,9 @@ function processMdArrayNewLines(md, isTable = false) {
|
||||
}
|
||||
|
||||
let lines = output.replace(/\\r/g, '').split('\n');
|
||||
|
||||
// console.info(lines);
|
||||
|
||||
lines = formatMdLayout(lines)
|
||||
// lines = convertSingleLineCodeBlocksToInline(lines)
|
||||
lines = mergeMultipleNewLines(lines);
|
||||
@ -530,6 +533,8 @@ function simplifyString(s) {
|
||||
}
|
||||
|
||||
function collapseWhiteSpaceAndAppend(lines, state, text) {
|
||||
// console.info([text]);
|
||||
|
||||
if (state.inCode.length) {
|
||||
lines.push(text);
|
||||
|
||||
@ -556,6 +561,11 @@ function collapseWhiteSpaceAndAppend(lines, state, text) {
|
||||
|
||||
// console.info(lines);
|
||||
|
||||
if (!!text.match(/^\n+$/)) {
|
||||
lines.push(' ');
|
||||
return lines;
|
||||
}
|
||||
|
||||
// Remove all \n and \r from the left and right of the text
|
||||
while (text.length && (text[0] == "\n" || text[0] == "\r")) text = text.substr(1);
|
||||
while (text.length && (text[text.length - 1] == "\n" || text[text.length - 1] == "\r")) text = text.substr(0, text.length - 1);
|
||||
@ -1316,7 +1326,10 @@ function drawTable(table) {
|
||||
|
||||
// A cell in a Markdown table cannot have actual new lines so replace
|
||||
// them with <br>, which are supported by the markdown renderers.
|
||||
let cellText = processMdArrayNewLines(td.lines, true).replace(/\n+/g, "<br>");
|
||||
let cellText = processMdArrayNewLines(td.lines, true)
|
||||
let lines = cellText.split('\n');
|
||||
lines = postProcessMarkdown(lines);
|
||||
cellText = lines.join('\n').replace(/\n+/g, "<br>");
|
||||
|
||||
// Inside tables cells, "|" needs to be escaped
|
||||
cellText = cellText.replace(/\|/g, "\\|");
|
||||
@ -1397,44 +1410,7 @@ function minifyHtml(html) {
|
||||
return output;
|
||||
}
|
||||
|
||||
async function enexXmlToMd(xmlString, resources, options = {}) {
|
||||
// This allows simplifying the HTML, which results in better Markdown. In particular, it removes all
|
||||
// non-significant newlines and convert them to spaces.
|
||||
// xmlString = minifyHtml(xmlString);
|
||||
// console.info([xmlString]);
|
||||
|
||||
const stream = stringToStream(xmlString);
|
||||
let result = await enexXmlToMdArray(stream, resources, options);
|
||||
|
||||
let mdLines = [];
|
||||
|
||||
for (let i = 0; i < result.content.lines.length; i++) {
|
||||
let line = result.content.lines[i];
|
||||
if (typeof line === 'object' && line.type === 'table') { // A table
|
||||
const table = line;
|
||||
const tableLines = drawTable(table);
|
||||
mdLines = mdLines.concat(tableLines);
|
||||
} else if (typeof line === 'object' && line.type === 'code') {
|
||||
mdLines = mdLines.concat(line.lines);
|
||||
} else if (typeof line === 'object') {
|
||||
console.warn('Unhandled object type:', line);
|
||||
mdLines = mdLines.concat(line.lines);
|
||||
} else { // an actual line
|
||||
mdLines.push(line);
|
||||
}
|
||||
}
|
||||
|
||||
let firstAttachment = true;
|
||||
for (let i = 0; i < result.resources.length; i++) {
|
||||
let r = result.resources[i];
|
||||
if (firstAttachment) mdLines.push(NEWLINE);
|
||||
mdLines.push(NEWLINE);
|
||||
mdLines = addResourceTag(mdLines, r, r.filename);
|
||||
firstAttachment = false;
|
||||
}
|
||||
|
||||
let output = processMdArrayNewLines(mdLines).split('\n')
|
||||
|
||||
function postProcessMarkdown(lines) {
|
||||
// After importing HTML, the resulting Markdown often has empty lines at the beginning and end due to
|
||||
// block start/end or elements that were ignored, etc. If these white spaces were intended it's not really
|
||||
// possible to detect it, so simply trim them all so that the result is more deterministic and can be
|
||||
@ -1490,8 +1466,51 @@ async function enexXmlToMd(xmlString, resources, options = {}) {
|
||||
return output;
|
||||
}
|
||||
|
||||
output = trimEmptyLines(output)
|
||||
output = cleanUpSpaces(output)
|
||||
lines = trimEmptyLines(lines)
|
||||
lines = cleanUpSpaces(lines)
|
||||
|
||||
return lines;
|
||||
}
|
||||
|
||||
async function enexXmlToMd(xmlString, resources, options = {}) {
|
||||
// This allows simplifying the HTML, which results in better Markdown. In particular, it removes all
|
||||
// non-significant newlines and convert them to spaces.
|
||||
// xmlString = minifyHtml(xmlString);
|
||||
// console.info([xmlString]);
|
||||
|
||||
const stream = stringToStream(xmlString);
|
||||
let result = await enexXmlToMdArray(stream, resources, options);
|
||||
|
||||
let mdLines = [];
|
||||
|
||||
for (let i = 0; i < result.content.lines.length; i++) {
|
||||
let line = result.content.lines[i];
|
||||
if (typeof line === 'object' && line.type === 'table') { // A table
|
||||
const table = line;
|
||||
const tableLines = drawTable(table);
|
||||
mdLines = mdLines.concat(tableLines);
|
||||
} else if (typeof line === 'object' && line.type === 'code') {
|
||||
mdLines = mdLines.concat(line.lines);
|
||||
} else if (typeof line === 'object') {
|
||||
console.warn('Unhandled object type:', line);
|
||||
mdLines = mdLines.concat(line.lines);
|
||||
} else { // an actual line
|
||||
mdLines.push(line);
|
||||
}
|
||||
}
|
||||
|
||||
let firstAttachment = true;
|
||||
for (let i = 0; i < result.resources.length; i++) {
|
||||
let r = result.resources[i];
|
||||
if (firstAttachment) mdLines.push(NEWLINE);
|
||||
mdLines.push(NEWLINE);
|
||||
mdLines = addResourceTag(mdLines, r, r.filename);
|
||||
firstAttachment = false;
|
||||
}
|
||||
|
||||
let output = processMdArrayNewLines(mdLines).split('\n')
|
||||
|
||||
output = postProcessMarkdown(output);
|
||||
|
||||
return output.join('\n');
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user