1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-11-29 22:48:10 +02:00

HtmlToMd: Fixed handling of inline tags

This commit is contained in:
Laurent Cozic
2018-05-15 13:26:53 +01:00
parent 8cce2f17d5
commit a2b1181f7c
5 changed files with 156 additions and 43 deletions

View File

@@ -337,6 +337,9 @@ function processMdArrayNewLines(md, isTable = false) {
}
let lines = output.replace(/\\r/g, '').split('\n');
// console.info(lines);
lines = formatMdLayout(lines)
// lines = convertSingleLineCodeBlocksToInline(lines)
lines = mergeMultipleNewLines(lines);
@@ -530,6 +533,8 @@ function simplifyString(s) {
}
function collapseWhiteSpaceAndAppend(lines, state, text) {
// console.info([text]);
if (state.inCode.length) {
lines.push(text);
@@ -556,6 +561,11 @@ function collapseWhiteSpaceAndAppend(lines, state, text) {
// console.info(lines);
if (!!text.match(/^\n+$/)) {
lines.push(' ');
return lines;
}
// Remove all \n and \r from the left and right of the text
while (text.length && (text[0] == "\n" || text[0] == "\r")) text = text.substr(1);
while (text.length && (text[text.length - 1] == "\n" || text[text.length - 1] == "\r")) text = text.substr(0, text.length - 1);
@@ -1316,7 +1326,10 @@ function drawTable(table) {
// A cell in a Markdown table cannot have actual new lines so replace
// them with <br>, which are supported by the markdown renderers.
let cellText = processMdArrayNewLines(td.lines, true).replace(/\n+/g, "<br>");
let cellText = processMdArrayNewLines(td.lines, true)
let lines = cellText.split('\n');
lines = postProcessMarkdown(lines);
cellText = lines.join('\n').replace(/\n+/g, "<br>");
// Inside tables cells, "|" needs to be escaped
cellText = cellText.replace(/\|/g, "\\|");
@@ -1397,44 +1410,7 @@ function minifyHtml(html) {
return output;
}
async function enexXmlToMd(xmlString, resources, options = {}) {
// This allows simplifying the HTML, which results in better Markdown. In particular, it removes all
// non-significant newlines and convert them to spaces.
// xmlString = minifyHtml(xmlString);
// console.info([xmlString]);
const stream = stringToStream(xmlString);
let result = await enexXmlToMdArray(stream, resources, options);
let mdLines = [];
for (let i = 0; i < result.content.lines.length; i++) {
let line = result.content.lines[i];
if (typeof line === 'object' && line.type === 'table') { // A table
const table = line;
const tableLines = drawTable(table);
mdLines = mdLines.concat(tableLines);
} else if (typeof line === 'object' && line.type === 'code') {
mdLines = mdLines.concat(line.lines);
} else if (typeof line === 'object') {
console.warn('Unhandled object type:', line);
mdLines = mdLines.concat(line.lines);
} else { // an actual line
mdLines.push(line);
}
}
let firstAttachment = true;
for (let i = 0; i < result.resources.length; i++) {
let r = result.resources[i];
if (firstAttachment) mdLines.push(NEWLINE);
mdLines.push(NEWLINE);
mdLines = addResourceTag(mdLines, r, r.filename);
firstAttachment = false;
}
let output = processMdArrayNewLines(mdLines).split('\n')
function postProcessMarkdown(lines) {
// After importing HTML, the resulting Markdown often has empty lines at the beginning and end due to
// block start/end or elements that were ignored, etc. If these white spaces were intended it's not really
// possible to detect it, so simply trim them all so that the result is more deterministic and can be
@@ -1490,8 +1466,51 @@ async function enexXmlToMd(xmlString, resources, options = {}) {
return output;
}
output = trimEmptyLines(output)
output = cleanUpSpaces(output)
lines = trimEmptyLines(lines)
lines = cleanUpSpaces(lines)
return lines;
}
async function enexXmlToMd(xmlString, resources, options = {}) {
// This allows simplifying the HTML, which results in better Markdown. In particular, it removes all
// non-significant newlines and convert them to spaces.
// xmlString = minifyHtml(xmlString);
// console.info([xmlString]);
const stream = stringToStream(xmlString);
let result = await enexXmlToMdArray(stream, resources, options);
let mdLines = [];
for (let i = 0; i < result.content.lines.length; i++) {
let line = result.content.lines[i];
if (typeof line === 'object' && line.type === 'table') { // A table
const table = line;
const tableLines = drawTable(table);
mdLines = mdLines.concat(tableLines);
} else if (typeof line === 'object' && line.type === 'code') {
mdLines = mdLines.concat(line.lines);
} else if (typeof line === 'object') {
console.warn('Unhandled object type:', line);
mdLines = mdLines.concat(line.lines);
} else { // an actual line
mdLines.push(line);
}
}
let firstAttachment = true;
for (let i = 0; i < result.resources.length; i++) {
let r = result.resources[i];
if (firstAttachment) mdLines.push(NEWLINE);
mdLines.push(NEWLINE);
mdLines = addResourceTag(mdLines, r, r.filename);
firstAttachment = false;
}
let output = processMdArrayNewLines(mdLines).split('\n')
output = postProcessMarkdown(output);
return output.join('\n');
}