mirror of
https://github.com/laurent22/joplin.git
synced 2025-03-29 21:21:15 +02:00
Electron: Fixes #318, Fixes #317: ENEX: Improved handling and rendering of plain text links. Improved detection and import of resources. Improved import of tables.
This commit is contained in:
parent
c63bb19cb6
commit
a677b2e844
@ -278,7 +278,7 @@ class MainScreenComponent extends React.Component {
|
|||||||
position: 'absolute',
|
position: 'absolute',
|
||||||
top: 0,
|
top: 0,
|
||||||
left: 0,
|
left: 0,
|
||||||
backgroundColor: theme.backgroundColorTransparent,
|
backgroundColor: theme.backgroundColor,
|
||||||
width: width - 20,
|
width: width - 20,
|
||||||
height: height - 20,
|
height: height - 20,
|
||||||
padding: 10,
|
padding: 10,
|
||||||
|
@ -4,6 +4,7 @@ const htmlentities = (new Entities()).encode;
|
|||||||
const Resource = require('lib/models/Resource.js');
|
const Resource = require('lib/models/Resource.js');
|
||||||
const ModelCache = require('lib/ModelCache');
|
const ModelCache = require('lib/ModelCache');
|
||||||
const { shim } = require('lib/shim.js');
|
const { shim } = require('lib/shim.js');
|
||||||
|
const { _ } = require('lib/locale');
|
||||||
const md5 = require('md5');
|
const md5 = require('md5');
|
||||||
const MdToHtml_Katex = require('lib/MdToHtml_Katex');
|
const MdToHtml_Katex = require('lib/MdToHtml_Katex');
|
||||||
|
|
||||||
@ -54,11 +55,11 @@ class MdToHtml {
|
|||||||
return output.join(' ');
|
return output.join(' ');
|
||||||
}
|
}
|
||||||
|
|
||||||
getAttr_(attrs, name) {
|
getAttr_(attrs, name, defaultValue = null) {
|
||||||
for (let i = 0; i < attrs.length; i++) {
|
for (let i = 0; i < attrs.length; i++) {
|
||||||
if (attrs[i][0] === name) return attrs[i].length > 1 ? attrs[i][1] : null;
|
if (attrs[i][0] === name) return attrs[i].length > 1 ? attrs[i][1] : null;
|
||||||
}
|
}
|
||||||
return null;
|
return defaultValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
setAttr_(attrs, name, value) {
|
setAttr_(attrs, name, value) {
|
||||||
@ -182,11 +183,23 @@ class MdToHtml {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
urldecode_(str) {
|
||||||
|
try {
|
||||||
|
return decodeURIComponent((str+'').replace(/\+/g, '%20'));
|
||||||
|
} catch (error) {
|
||||||
|
// decodeURIComponent can throw if the string contains non-encoded data (for example "100%")
|
||||||
|
// so in this case just return the non encoded string.
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
renderTokens_(markdownIt, tokens, options) {
|
renderTokens_(markdownIt, tokens, options) {
|
||||||
let output = [];
|
let output = [];
|
||||||
let previousToken = null;
|
let previousToken = null;
|
||||||
let anchorAttrs = [];
|
let anchorAttrs = [];
|
||||||
let extraCssBlocks = {};
|
let extraCssBlocks = {};
|
||||||
|
let anchorHrefs = [];
|
||||||
|
|
||||||
for (let i = 0; i < tokens.length; i++) {
|
for (let i = 0; i < tokens.length; i++) {
|
||||||
let t = tokens[i];
|
let t = tokens[i];
|
||||||
@ -202,6 +215,7 @@ class MdToHtml {
|
|||||||
const codeBlockLanguage = t && t.info ? t.info : null;
|
const codeBlockLanguage = t && t.info ? t.info : null;
|
||||||
let rendererPlugin = null;
|
let rendererPlugin = null;
|
||||||
let rendererPluginOptions = { tagType: 'inline' };
|
let rendererPluginOptions = { tagType: 'inline' };
|
||||||
|
let linkHref = null;
|
||||||
|
|
||||||
if (isCodeBlock) rendererPlugin = this.rendererPlugin_(codeBlockLanguage);
|
if (isCodeBlock) rendererPlugin = this.rendererPlugin_(codeBlockLanguage);
|
||||||
|
|
||||||
@ -233,6 +247,7 @@ class MdToHtml {
|
|||||||
if (openTag) {
|
if (openTag) {
|
||||||
if (openTag === 'a') {
|
if (openTag === 'a') {
|
||||||
anchorAttrs.push(attrs);
|
anchorAttrs.push(attrs);
|
||||||
|
anchorHrefs.push(this.getAttr_(attrs, 'href'));
|
||||||
output.push(this.renderOpenLink_(attrs, options));
|
output.push(this.renderOpenLink_(attrs, options));
|
||||||
} else {
|
} else {
|
||||||
const attrsHtml = this.renderAttrs_(attrs);
|
const attrsHtml = this.renderAttrs_(attrs);
|
||||||
@ -317,7 +332,28 @@ class MdToHtml {
|
|||||||
|
|
||||||
if (closeTag) {
|
if (closeTag) {
|
||||||
if (closeTag === 'a') {
|
if (closeTag === 'a') {
|
||||||
output.push(this.renderCloseLink_(anchorAttrs.pop(), options));
|
const currentAnchorAttrs = anchorAttrs.pop();
|
||||||
|
const previousContent = output.length ? output[output.length - 1].trim() : '';
|
||||||
|
const anchorHref = this.getAttr_(currentAnchorAttrs, 'href', '').trim();
|
||||||
|
|
||||||
|
// Optimisation: If the content of the anchor is the same as the URL, we replace the content
|
||||||
|
// by (Link). This is to shorten the text, which is important especially when the note comes
|
||||||
|
// from imported HTML, which can contain many such links and make the text unreadble. An example
|
||||||
|
// would be a movie review that has multiple links to allow a user to rate the film from 1 to 5 stars.
|
||||||
|
// In the original page, it might be rendered as stars, via CSS, but in the imported note it would look like this:
|
||||||
|
// http://example.com/rate/1 http://example.com/rate/2 http://example.com/rate/3
|
||||||
|
// http://example.com/rate/4 http://example.com/rate/5
|
||||||
|
// which would take a lot of screen space even though it doesn't matter since the user is unlikely
|
||||||
|
// to rate the film from the note. This is actually a nice example, still readable, but there is way
|
||||||
|
// worse that this in notes that come from web-clipped content.
|
||||||
|
// With this change, the links will still be preserved but displayed like
|
||||||
|
// (link) (link) (link) (link) (link)
|
||||||
|
if (this.urldecode_(previousContent) === htmlentities(this.urldecode_(anchorHref))) {
|
||||||
|
output.pop();
|
||||||
|
output.push(_('(Link)'));
|
||||||
|
}
|
||||||
|
|
||||||
|
output.push(this.renderCloseLink_(currentAnchorAttrs, options));
|
||||||
} else {
|
} else {
|
||||||
output.push('</' + closeTag + '>');
|
output.push('</' + closeTag + '>');
|
||||||
}
|
}
|
||||||
|
@ -476,12 +476,16 @@ function enexXmlToMdArray(stream, resources) {
|
|||||||
// </note>
|
// </note>
|
||||||
// </en-export>
|
// </en-export>
|
||||||
|
|
||||||
|
// Note that there's also the case of resources with no ID where the ID is actually the MD5 of the content.
|
||||||
|
// This is handled in import-enex.js
|
||||||
|
|
||||||
let found = false;
|
let found = false;
|
||||||
for (let i = 0; i < remainingResources.length; i++) {
|
for (let i = 0; i < remainingResources.length; i++) {
|
||||||
let r = remainingResources[i];
|
let r = remainingResources[i];
|
||||||
if (!r.id) {
|
if (!r.id) {
|
||||||
r.id = hash;
|
resource = Object.assign({}, r);
|
||||||
remainingResources[i] = r;
|
resource.id = hash;
|
||||||
|
remainingResources.splice(i, 1);
|
||||||
found = true;
|
found = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -490,13 +494,13 @@ function enexXmlToMdArray(stream, resources) {
|
|||||||
if (!found) {
|
if (!found) {
|
||||||
console.warn('Hash with no associated resource: ' + hash);
|
console.warn('Hash with no associated resource: ' + hash);
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
// If the resource does not appear among the note's resources, it
|
|
||||||
// means it's an attachement. It will be appended along with the
|
// If the resource does not appear among the note's resources, it
|
||||||
// other remaining resources at the bottom of the markdown text.
|
// means it's an attachement. It will be appended along with the
|
||||||
if (!!resource.id) {
|
// other remaining resources at the bottom of the markdown text.
|
||||||
section.lines = addResourceTag(section.lines, resource, nodeAttributes.alt);
|
if (resource && !!resource.id) {
|
||||||
}
|
section.lines = addResourceTag(section.lines, resource, nodeAttributes.alt);
|
||||||
}
|
}
|
||||||
} else if (["span", "font", 'sup', 'cite', 'abbr', 'small', 'tt', 'sub', 'colgroup', 'col', 'ins', 'caption', 'var', 'map', 'area'].indexOf(n) >= 0) {
|
} else if (["span", "font", 'sup', 'cite', 'abbr', 'small', 'tt', 'sub', 'colgroup', 'col', 'ins', 'caption', 'var', 'map', 'area'].indexOf(n) >= 0) {
|
||||||
// Inline tags that can be ignored in Markdown
|
// Inline tags that can be ignored in Markdown
|
||||||
@ -545,10 +549,6 @@ function enexXmlToMdArray(stream, resources) {
|
|||||||
|
|
||||||
if (section.lines.length < 1) throw new Error('Invalid anchor tag closing'); // Sanity check, but normally not possible
|
if (section.lines.length < 1) throw new Error('Invalid anchor tag closing'); // Sanity check, but normally not possible
|
||||||
|
|
||||||
const pushEmptyAnchor = (url) => {
|
|
||||||
section.lines.push('[link](' + url + ')');
|
|
||||||
}
|
|
||||||
|
|
||||||
// When closing the anchor tag, check if there's is any text content. If not
|
// When closing the anchor tag, check if there's is any text content. If not
|
||||||
// put the URL as is (don't wrap it in [](url)). The markdown parser, using
|
// put the URL as is (don't wrap it in [](url)). The markdown parser, using
|
||||||
// GitHub flavour, will turn this URL into a link. This is to generate slightly
|
// GitHub flavour, will turn this URL into a link. This is to generate slightly
|
||||||
@ -556,11 +556,11 @@ function enexXmlToMdArray(stream, resources) {
|
|||||||
let previous = section.lines[section.lines.length - 1];
|
let previous = section.lines[section.lines.length - 1];
|
||||||
if (previous == '[') {
|
if (previous == '[') {
|
||||||
section.lines.pop();
|
section.lines.pop();
|
||||||
pushEmptyAnchor(url);
|
section.lines.push(url);
|
||||||
} else if (!previous || previous == url) {
|
} else if (!previous || previous == url) {
|
||||||
section.lines.pop();
|
section.lines.pop();
|
||||||
section.lines.pop();
|
section.lines.pop();
|
||||||
pushEmptyAnchor(url);
|
section.lines.push(url);
|
||||||
} else {
|
} else {
|
||||||
// Need to remove any new line character between the current ']' and the previous '['
|
// Need to remove any new line character between the current ']' and the previous '['
|
||||||
// otherwise it won't render properly.
|
// otherwise it won't render properly.
|
||||||
@ -583,8 +583,7 @@ function enexXmlToMdArray(stream, resources) {
|
|||||||
const c = section.lines.pop();
|
const c = section.lines.pop();
|
||||||
if (c === '[') break;
|
if (c === '[') break;
|
||||||
}
|
}
|
||||||
//section.lines.push(url);
|
section.lines.push(url);
|
||||||
pushEmptyAnchor(url);
|
|
||||||
} else {
|
} else {
|
||||||
section.lines.push('](' + url + ')');
|
section.lines.push('](' + url + ')');
|
||||||
}
|
}
|
||||||
@ -644,7 +643,6 @@ function drawTable(table) {
|
|||||||
// https://gist.github.com/IanWang/28965e13cdafdef4e11dc91f578d160d#tables
|
// https://gist.github.com/IanWang/28965e13cdafdef4e11dc91f578d160d#tables
|
||||||
|
|
||||||
const flatRender = tableHasSubTables(table); // Render the table has regular text
|
const flatRender = tableHasSubTables(table); // Render the table has regular text
|
||||||
const minColWidth = 3;
|
|
||||||
let lines = [];
|
let lines = [];
|
||||||
lines.push(BLOCK_OPEN);
|
lines.push(BLOCK_OPEN);
|
||||||
let headerDone = false;
|
let headerDone = false;
|
||||||
@ -687,9 +685,16 @@ function drawTable(table) {
|
|||||||
|
|
||||||
// A cell in a Markdown table cannot have actual new lines so replace
|
// A cell in a Markdown table cannot have actual new lines so replace
|
||||||
// them with <br>, which are supported by the markdown renderers.
|
// them with <br>, which are supported by the markdown renderers.
|
||||||
const cellText = processMdArrayNewLines(td.lines).replace(/\n+/g, "<br>");
|
let cellText = processMdArrayNewLines(td.lines).replace(/\n+/g, "<br>");
|
||||||
|
|
||||||
const width = Math.max(cellText.length, 3);
|
// Inside tables cells, "|" needs to be escaped
|
||||||
|
cellText = cellText.replace(/\|/g, "\\|");
|
||||||
|
|
||||||
|
// Previously the width of the cell was as big as the content since it looks nicer, however that often doesn't work
|
||||||
|
// since the content can be very long, resulting in unreadable markdown. So no solution is perfect but making it a
|
||||||
|
// width of 3 is a bit better. Note that 3 is the minimum width of a cell - below this, it won't be rendered by
|
||||||
|
// markdown parsers.
|
||||||
|
const width = 3;
|
||||||
line.push(stringPadding(cellText, width, ' ', stringPadding.RIGHT));
|
line.push(stringPadding(cellText, width, ' ', stringPadding.RIGHT));
|
||||||
|
|
||||||
if (!headerDone) {
|
if (!headerDone) {
|
||||||
|
@ -11,6 +11,7 @@ const { enexXmlToMd } = require('./import-enex-md-gen.js');
|
|||||||
const { time } = require('lib/time-utils.js');
|
const { time } = require('lib/time-utils.js');
|
||||||
const Levenshtein = require('levenshtein');
|
const Levenshtein = require('levenshtein');
|
||||||
const jsSHA = require("jssha");
|
const jsSHA = require("jssha");
|
||||||
|
const md5 = require('md5');
|
||||||
|
|
||||||
//const Promise = require('promise');
|
//const Promise = require('promise');
|
||||||
const fs = require('fs-extra');
|
const fs = require('fs-extra');
|
||||||
@ -30,8 +31,8 @@ function extractRecognitionObjId(recognitionXml) {
|
|||||||
return r && r.length >= 2 ? r[1] : null;
|
return r && r.length >= 2 ? r[1] : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function filePutContents(filePath, content) {
|
async function filePutContents(filePath, content) {
|
||||||
return fs.writeFile(filePath, content);
|
await fs.writeFile(filePath, content);
|
||||||
}
|
}
|
||||||
|
|
||||||
function removeUndefinedProperties(note) {
|
function removeUndefinedProperties(note) {
|
||||||
@ -255,49 +256,6 @@ function importEnex(parentFolderId, filePath, importOptions = null) {
|
|||||||
stream.resume();
|
stream.resume();
|
||||||
processingNotes = false;
|
processingNotes = false;
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
// let chain = [];
|
|
||||||
// while (notes.length) {
|
|
||||||
// let note = notes.shift();
|
|
||||||
// const contentStream = stringToStream(note.bodyXml);
|
|
||||||
// chain.push(() => {
|
|
||||||
// return enexXmlToMd(contentStream, note.resources).then((body) => {
|
|
||||||
// delete note.bodyXml;
|
|
||||||
|
|
||||||
// // console.info('-----------------------------------------------------------');
|
|
||||||
// // console.info(body);
|
|
||||||
// // console.info('-----------------------------------------------------------');
|
|
||||||
|
|
||||||
// note.id = uuid.create();
|
|
||||||
// note.parent_id = parentFolderId;
|
|
||||||
// note.body = body;
|
|
||||||
|
|
||||||
// // Notes in enex files always have a created timestamp but not always an
|
|
||||||
// // updated timestamp (it the note has never been modified). For sync
|
|
||||||
// // we require an updated_time property, so set it to create_time in that case
|
|
||||||
// if (!note.updated_time) note.updated_time = note.created_time;
|
|
||||||
|
|
||||||
// return saveNoteToStorage(note, importOptions.fuzzyMatching);
|
|
||||||
// }).then((result) => {
|
|
||||||
// if (result.noteUpdated) {
|
|
||||||
// progressState.updated++;
|
|
||||||
// } else if (result.noteCreated) {
|
|
||||||
// progressState.created++;
|
|
||||||
// } else if (result.noteSkipped) {
|
|
||||||
// progressState.skipped++;
|
|
||||||
// }
|
|
||||||
// progressState.resourcesCreated += result.resourcesCreated;
|
|
||||||
// progressState.notesTagged += result.notesTagged;
|
|
||||||
// importOptions.onProgress(progressState);
|
|
||||||
// });
|
|
||||||
// });
|
|
||||||
// }
|
|
||||||
|
|
||||||
// return promiseChain(chain).then(() => {
|
|
||||||
// stream.resume();
|
|
||||||
// processingNotes = false;
|
|
||||||
// return true;
|
|
||||||
// });
|
|
||||||
}
|
}
|
||||||
|
|
||||||
saxStream.on('error', (error) => {
|
saxStream.on('error', (error) => {
|
||||||
@ -418,6 +376,7 @@ function importEnex(parentFolderId, filePath, importOptions = null) {
|
|||||||
noteAttributes = null;
|
noteAttributes = null;
|
||||||
} else if (n == 'resource') {
|
} else if (n == 'resource') {
|
||||||
let decodedData = null;
|
let decodedData = null;
|
||||||
|
let resourceId = noteResource.id;
|
||||||
if (noteResource.dataEncoding == 'base64') {
|
if (noteResource.dataEncoding == 'base64') {
|
||||||
try {
|
try {
|
||||||
decodedData = Buffer.from(noteResource.data, 'base64');
|
decodedData = Buffer.from(noteResource.data, 'base64');
|
||||||
@ -429,8 +388,14 @@ function importEnex(parentFolderId, filePath, importOptions = null) {
|
|||||||
decodedData = noteResource.data; // Just put the encoded data directly in the file so it can, potentially, be manually decoded later
|
decodedData = noteResource.data; // Just put the encoded data directly in the file so it can, potentially, be manually decoded later
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!resourceId && decodedData) {
|
||||||
|
// If no resource ID is present, the resource ID is actually the MD5 of the data.
|
||||||
|
// This ID will match the "hash" attribute of the corresponding <en-media> tag.
|
||||||
|
resourceId = md5(decodedData);
|
||||||
|
}
|
||||||
|
|
||||||
let r = {
|
let r = {
|
||||||
id: noteResource.id,
|
id: resourceId,
|
||||||
data: decodedData,
|
data: decodedData,
|
||||||
mime: noteResource.mime,
|
mime: noteResource.mime,
|
||||||
title: noteResource.filename ? noteResource.filename : '',
|
title: noteResource.filename ? noteResource.filename : '',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user