1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-08-13 22:12:50 +02:00

Started adding support for enex tables

This commit is contained in:
Laurent Cozic
2017-07-13 19:29:10 +00:00
parent e28ccdb05a
commit 1b50bfe960
2 changed files with 166 additions and 61 deletions

View File

@@ -179,7 +179,7 @@ function addResourceTag(lines, resource, alt = "") {
function isBlockTag(n) { function isBlockTag(n) {
return n=="div" || n=="p" || n=="dl" || n=="dd" || n=="center" || n=="table" || n=="tr" || n=="td" || n=="th" || n=="tbody"; return n=="div" || n=="p" || n=="dl" || n=="dd" || n=="center";
} }
function isStrongTag(n) { function isStrongTag(n) {
@@ -195,7 +195,7 @@ function isAnchor(n) {
} }
function isIgnoredEndTag(n) { function isIgnoredEndTag(n) {
return n=="en-note" || n=="en-todo" || n=="span" || n=="body" || n=="html" || n=="font" || n=="br" || n=='hr' || n=='s'; return n=="en-note" || n=="en-todo" || n=="span" || n=="body" || n=="html" || n=="font" || n=="br" || n=='hr' || n=='s' || n == 'tbody';
} }
function isListTag(n) { function isListTag(n) {
@@ -204,7 +204,7 @@ function isListTag(n) {
// Elements that don't require any special treatment beside adding a newline character // Elements that don't require any special treatment beside adding a newline character
function isNewLineOnlyEndTag(n) { function isNewLineOnlyEndTag(n) {
return n=="div" || n=="p" || n=="li" || n=="h1" || n=="h2" || n=="h3" || n=="h4" || n=="h5" || n=="dl" || n=="dd" || n=="center" || n=="table" || n=="tr" || n=="td" || n=="th" || n=="tbody"; return n=="div" || n=="p" || n=="li" || n=="h1" || n=="h2" || n=="h3" || n=="h4" || n=="h5" || n=="dl" || n=="dd" || n=="center";
} }
function isCodeTag(n) { function isCodeTag(n) {
@@ -224,8 +224,6 @@ function enexXmlToMdArray(stream, resources) {
resources = resources.slice(); resources = resources.slice();
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
let output = [];
let state = { let state = {
inCode: false, inCode: false,
lists: [], lists: [],
@@ -236,12 +234,18 @@ function enexXmlToMdArray(stream, resources) {
let strict = true; let strict = true;
var saxStream = require('sax').createStream(strict, options) var saxStream = require('sax').createStream(strict, options)
let section = {
type: 'text',
lines: [],
parent: null,
};
saxStream.on('error', function(e) { saxStream.on('error', function(e) {
reject(e); reject(e);
}) })
saxStream.on('text', function(text) { saxStream.on('text', function(text) {
output = collapseWhiteSpaceAndAppend(output, state, text); section.lines = collapseWhiteSpaceAndAppend(section.lines, state, text);
}) })
// Section: { // Section: {
@@ -250,13 +254,13 @@ function enexXmlToMdArray(stream, resources) {
// } // }
// [
// { // {
// type: "text", // type: 'text',
// lines: [], // lines: [
// }, // 'this is a line',
// '<br>',
// { // {
// type: "table", // type: 'table',
// trs: [ // trs: [
// { // {
// tds: [ // tds: [
@@ -266,19 +270,55 @@ function enexXmlToMdArray(stream, resources) {
// ], // ],
// } // }
// ], // ],
// }
// ] // ]
// }
//output.push(section);
saxStream.on('opentag', function(node) { saxStream.on('opentag', function(node) {
let n = node.name.toLowerCase(); let n = node.name.toLowerCase();
if (n == 'en-note') { if (n == 'en-note') {
// Start of note // Start of note
} else if (isBlockTag(n)) { } else if (isBlockTag(n)) {
output.push(BLOCK_OPEN); section.lines.push(BLOCK_OPEN);
} else if (n == 'table') {
let newSection = {
type: 'table',
lines: [],
parent: section,
};
section.lines.push(newSection);
section = newSection;
} else if (n == 'tbody') {
// Ignore it
} else if (n == 'tr') {
if (section.type != 'table') throw new Error('Found a <tr> tag outside of a table');
let newSection = {
type: 'tr',
lines: [],
parent: section,
}
section.lines.push(newSection);
section = newSection;
} else if (n == 'td' || n == 'th') {
if (section.type != 'tr') throw new Error('Found a <td> tag outside of a <tr>');
let newSection = {
type: 'td',
lines: [],
parent: section,
};
section.lines.push(newSection);
section = newSection;
} else if (isListTag(n)) { } else if (isListTag(n)) {
output.push(BLOCK_OPEN); section.lines.push(BLOCK_OPEN);
state.lists.push({ tag: n, counter: 1 }); state.lists.push({ tag: n, counter: 1 });
} else if (n == 'li') { } else if (n == 'li') {
output.push(BLOCK_OPEN); section.lines.push(BLOCK_OPEN);
if (!state.lists.length) { if (!state.lists.length) {
reject("Found <li> tag without being inside a list"); // TODO: could be a warning, but nothing to handle warnings at the moment reject("Found <li> tag without being inside a list"); // TODO: could be a warning, but nothing to handle warnings at the moment
return; return;
@@ -286,42 +326,42 @@ function enexXmlToMdArray(stream, resources) {
let container = state.lists[state.lists.length - 1]; let container = state.lists[state.lists.length - 1];
if (container.tag == "ul") { if (container.tag == "ul") {
output.push("- "); section.lines.push("- ");
} else { } else {
output.push(container.counter + '. '); section.lines.push(container.counter + '. ');
container.counter++; container.counter++;
} }
} else if (isStrongTag(n)) { } else if (isStrongTag(n)) {
output.push("**"); section.lines.push("**");
} else if (n == 's') { } else if (n == 's') {
// Not supported // Not supported
} else if (isAnchor(n)) { } else if (isAnchor(n)) {
state.anchorAttributes.push(node.attributes); state.anchorAttributes.push(node.attributes);
output.push('['); section.lines.push('[');
} else if (isEmTag(n)) { } else if (isEmTag(n)) {
output.push("*"); section.lines.push("*");
} else if (n == "en-todo") { } else if (n == "en-todo") {
let x = node.attributes && node.attributes.checked && node.attributes.checked.toLowerCase() == 'true' ? 'X' : ' '; let x = node.attributes && node.attributes.checked && node.attributes.checked.toLowerCase() == 'true' ? 'X' : ' ';
output.push('- [' + x + '] '); section.lines.push('- [' + x + '] ');
} else if (n == "hr") { } else if (n == "hr") {
output.push('------------------------------------------------------------------------------'); section.lines.push('------------------------------------------------------------------------------');
} else if (n == "h1") { } else if (n == "h1") {
output.push(BLOCK_OPEN); output.push("# "); section.lines.push(BLOCK_OPEN); section.lines.push("# ");
} else if (n == "h2") { } else if (n == "h2") {
output.push(BLOCK_OPEN); output.push("## "); section.lines.push(BLOCK_OPEN); section.lines.push("## ");
} else if (n == "h3") { } else if (n == "h3") {
output.push(BLOCK_OPEN); output.push("### "); section.lines.push(BLOCK_OPEN); section.lines.push("### ");
} else if (n == "h4") { } else if (n == "h4") {
output.push(BLOCK_OPEN); output.push("#### "); section.lines.push(BLOCK_OPEN); section.lines.push("#### ");
} else if (n == "h5") { } else if (n == "h5") {
output.push(BLOCK_OPEN); output.push("##### "); section.lines.push(BLOCK_OPEN); section.lines.push("##### ");
} else if (n == "h6") { } else if (n == "h6") {
output.push(BLOCK_OPEN); output.push("###### "); section.lines.push(BLOCK_OPEN); section.lines.push("###### ");
} else if (isCodeTag(n)) { } else if (isCodeTag(n)) {
output.push(BLOCK_OPEN); section.lines.push(BLOCK_OPEN);
state.inCode = true; state.inCode = true;
} else if (n == "br") { } else if (n == "br") {
output.push(NEWLINE); section.lines.push(NEWLINE);
} else if (n == "en-media") { } else if (n == "en-media") {
const hash = node.attributes.hash; const hash = node.attributes.hash;
@@ -390,7 +430,7 @@ function enexXmlToMdArray(stream, resources) {
// means it's an attachement. It will be appended along with the // means it's an attachement. It will be appended along with the
// other remaining resources at the bottom of the markdown text. // other remaining resources at the bottom of the markdown text.
if (!!resource.id) { if (!!resource.id) {
output = addResourceTag(output, resource, node.attributes.alt); section.lines = addResourceTag(section.lines, resource, node.attributes.alt);
} }
} }
} else if (n == "span" || n == "font") { } else if (n == "span" || n == "font") {
@@ -404,42 +444,48 @@ function enexXmlToMdArray(stream, resources) {
if (n == 'en-note') { if (n == 'en-note') {
// End of note // End of note
} else if (isNewLineOnlyEndTag(n)) { } else if (isNewLineOnlyEndTag(n)) {
output.push(BLOCK_CLOSE); section.lines.push(BLOCK_CLOSE);
} else if (n == 'td' || n == 'th') {
section = section.parent;
} else if (n == 'tr') {
section = section.parent;
} else if (n == 'table') {
section = section.parent;
} else if (isIgnoredEndTag(n)) { } else if (isIgnoredEndTag(n)) {
// Skip // Skip
} else if (isListTag(n)) { } else if (isListTag(n)) {
output.push(BLOCK_CLOSE); section.lines.push(BLOCK_CLOSE);
state.lists.pop(); state.lists.pop();
} else if (isStrongTag(n)) { } else if (isStrongTag(n)) {
output.push("**"); section.lines.push("**");
} else if (isEmTag(n)) { } else if (isEmTag(n)) {
output.push("*"); section.lines.push("*");
} else if (isCodeTag(n)) { } else if (isCodeTag(n)) {
state.inCode = false; state.inCode = false;
output.push(BLOCK_CLOSE); section.lines.push(BLOCK_CLOSE);
} else if (isAnchor(n)) { } else if (isAnchor(n)) {
let attributes = state.anchorAttributes.pop(); let attributes = state.anchorAttributes.pop();
let url = attributes && attributes.href ? attributes.href : ''; let url = attributes && attributes.href ? attributes.href : '';
if (output.length < 1) throw new Error('Invalid anchor tag closing'); // Sanity check, but normally not possible if (section.lines.length < 1) throw new Error('Invalid anchor tag closing'); // Sanity check, but normally not possible
// When closing the anchor tag, check if there's is any text content. If not // When closing the anchor tag, check if there's is any text content. If not
// put the URL as is (don't wrap it in [](url)). The markdown parser, using // put the URL as is (don't wrap it in [](url)). The markdown parser, using
// GitHub flavour, will turn this URL into a link. This is to generate slightly // GitHub flavour, will turn this URL into a link. This is to generate slightly
// cleaner markdown. // cleaner markdown.
let previous = output[output.length - 1]; let previous = section.lines[section.lines.length - 1];
if (previous == '[') { if (previous == '[') {
output.pop(); section.lines.pop();
output.push(url); section.lines.push(url);
} else if (!previous || previous == url) { } else if (!previous || previous == url) {
output.pop(); section.lines.pop();
output.pop(); section.lines.pop();
output.push(url); section.lines.push(url);
} else { } else {
output.push('](' + url + ')'); section.lines.push('](' + url + ')');
} }
} else if (isListTag(n)) { } else if (isListTag(n)) {
output.push(BLOCK_CLOSE); section.lines.push(BLOCK_CLOSE);
state.lists.pop(); state.lists.pop();
} else if (n == "en-media") { } else if (n == "en-media") {
// Skip // Skip
@@ -457,7 +503,7 @@ function enexXmlToMdArray(stream, resources) {
saxStream.on('end', function() { saxStream.on('end', function() {
resolve({ resolve({
lines: output, content: section,
resources: resources, resources: resources,
}); });
}) })
@@ -466,10 +512,69 @@ function enexXmlToMdArray(stream, resources) {
}); });
} }
async function processMdArrayTables(mdArray) {
let output = [];
for (let i = 0; i < mdArray.length; i++) {
let item = mdArray[i];
if (typeof item == 'string') {
output.push(item);
} else if (item.type == 'table') {
output.push('[[TABLE]]');
output = output.concat(await processMdArrayTables(item.lines));
} else if (item.type == 'tr') {
output.push('[[TR]]');
output = output.concat(await processMdArrayTables(item.lines));
} else if (item.type == 'td') {
output.push('[[TD]]');
output = output.concat(await processMdArrayTables(item.lines));
}
}
return output;
}
function addTableDimensions(mdArray) {
let currentTable = null;
for (let i = 0; i < mdArray.length; i++) {
let item = mdArray[i];
if (typeof item == 'string') {
} else if (item.type == 'table') {
let colWidths = [];
for (let trIndex = 0; trIndex < item.lines.length; trIndex++) {
let tdLines = item.lines[trIndex].lines;
for (let tdIndex = 0; tdIndex < tdLines.length; tdIndex++) {
let tdItem = tdLines[tdIndex];
let tdWidth = 0;
for (let j = 0; j < tdItem.lines.length; j++) {
let s = tdItem.lines[j];
if (s.length > tdWidth) tdWidth = s.length;
}
if (tdWidth > colWidths[tdIndex] || typeof colWidths[tdIndex] === 'undefined') colWidths[tdIndex] = tdWidth;
}
}
item.colWidths = colWidths;
}
}
return mdArray;
}
async function enexXmlToMd(stream, resources) { async function enexXmlToMd(stream, resources) {
let result = await enexXmlToMdArray(stream, resources); let result = await enexXmlToMdArray(stream, resources);
let mdLines = result.lines; // let bla = addTableDimensions(result.content.lines);
// const util = require('util')
// console.log(util.inspect(bla, false, null));
// return '';
let mdLines = result.content.lines; //await processMdArrayTables(result.content.lines);
//let mdLines = result.lines;
let firstAttachment = true; let firstAttachment = true;
for (let i = 0; i < result.resources.length; i++) { for (let i = 0; i < result.resources.length; i++) {
let r = result.resources[i]; let r = result.resources[i];