1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-01-11 18:24:43 +02:00

Desktop: Improved GotoAnything speed and made it safer

Previously we'd use the remove-markdown package to create the note
preview however this function would freeze on certain notes, and was
probably unsafe as it used regex to parse Markdown. Replaced this in
favour of Markdown-it along with htmlparser2 to strip all markup from a
note.
This commit is contained in:
Laurent Cozic 2020-07-14 23:27:12 +01:00
parent 7f1c25793a
commit 657cebfda9
19 changed files with 147 additions and 79 deletions

View File

@ -5554,11 +5554,6 @@
"through2": "^2.0.3"
}
},
"remove-markdown": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/remove-markdown/-/remove-markdown-0.3.0.tgz",
"integrity": "sha1-XktmdJOpNXlyjz1S7MHbnKUF3Jg="
},
"remove-trailing-separator": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/remove-trailing-separator/-/remove-trailing-separator-1.1.0.tgz",

View File

@ -91,7 +91,6 @@
"read-chunk": "^2.1.0",
"redux": "^3.7.2",
"relative": "^3.0.2",
"remove-markdown": "^0.3.0",
"request": "^2.88.0",
"sax": "^1.2.4",
"server-destroy": "^1.0.1",

View File

@ -0,0 +1,45 @@
require('app-module-path').addPath(__dirname);
const { asyncTest } = require('test-utils.js');
const MarkupToHtml = require('lib/joplin-renderer/MarkupToHtml');
describe('MarkupToHtml', function() {
it('should strip markup', asyncTest(async () => {
const service = new MarkupToHtml();
const testCases = {
[MarkupToHtml.MARKUP_LANGUAGE_MARKDOWN]: [
['', ''],
['## hello', 'hello'],
['## hello **hello!**', 'hello hello!'],
['*hi!*', 'hi!'],
['Some `code` here', 'Some code here'],
['Some <s>html</s> here', 'Some html here'],
['Some &amp; here', 'Some & here'],
['Some & here', 'Some & here'],
['[![image alt](:/fe9ea7fa727e4375b2e7d8a1b873314d)](https://example.com)', ''],
],
[MarkupToHtml.MARKUP_LANGUAGE_HTML]: [
['<h1>hello</h1>', 'hello'],
['Some <b>strong</b> text', 'Some strong text'],
['<b>M&amp;Ms</b>', 'M&Ms'],
['<style>BODY{margin:0;padding:0;background:#f0f0f0}</style>', ''],
],
};
for (const markup in testCases) {
for (const t of testCases[markup]) {
const input = t[0];
const expected = t[1];
const actual = service.stripMarkup(Number(markup), input);
expect(actual).toBe(expected, `Markup: ${markup}`);
}
}
expect(service.stripMarkup(1, 'one line\n\ntwo line', { collapseWhiteSpaces: true })).toBe('one line two line');
expect(service.stripMarkup(1, 'one line two line', { collapseWhiteSpaces: true })).toBe('one line two line');
expect(service.stripMarkup(1, 'one line\n two line', { collapseWhiteSpaces: true })).toBe('one line two line');
}));
});

View File

@ -100,41 +100,4 @@ describe('markdownUtils', function() {
}
}));
it('should remove Markdown syntax elements from the text', asyncTest(async () => {
const inputStrings = [
'', // Empty string
'This is some plain text', // Plain text
'## This is a header', // Header syntax
'This is a text with **bold** and *italicized* text', // Text with annotations
'This is a text with __bold__ and _italicized_ text', // Text with annotations alternate form
'[link to google](https://www.google.com/)', // Link
'> This is a blockquote\n And another line', // Blockquote
'* List item\n* List item', // Unordered list
'- List item\n- List item', // Unordered list
'1. List item\n2. List item', // Ordered list
'This is some `inline code`', // Inlined code
];
const expectedOutputStrings = [
'',
'This is some plain text',
'This is a header',
'This is a text with bold and italicized text',
'This is a text with bold and italicized text',
'link to google',
'This is a blockquote\n And another line',
'List item\nList item',
'List item\nList item',
'List item\nList item',
'This is some inline code',
];
expect(inputStrings.length).toBe(expectedOutputStrings.length);
for (let i = 0; i < inputStrings.length; i++) {
const outputString = markdownUtils.stripMarkdown(inputStrings[i]);
expect(outputString).toBe(expectedOutputStrings[i]);
}
}));
});

View File

@ -460,7 +460,7 @@ class MainScreenComponent extends React.Component {
<div style={style}>
<div style={modalLayerStyle}>{this.state.modalLayer.message}</div>
{noteContentPropertiesDialogOptions.visible && <NoteContentPropertiesDialog theme={this.props.theme} onClose={this.noteContentPropertiesDialog_close} text={noteContentPropertiesDialogOptions.text} lines={noteContentPropertiesDialogOptions.lines}/>}
{noteContentPropertiesDialogOptions.visible && <NoteContentPropertiesDialog theme={this.props.theme} onClose={this.noteContentPropertiesDialog_close} text={noteContentPropertiesDialogOptions.text}/>}
{notePropertiesDialogOptions.visible && <NotePropertiesDialog theme={this.props.theme} noteId={notePropertiesDialogOptions.noteId} onClose={this.notePropertiesDialog_close} onRevisionLinkClick={notePropertiesDialogOptions.onRevisionLinkClick} />}
{shareNoteDialogOptions.visible && <ShareNoteDialog theme={this.props.theme} noteIds={shareNoteDialogOptions.noteIds} onClose={this.shareNoteDialog_close} />}

View File

@ -16,6 +16,7 @@ export const runtime = (comp:any):CommandRuntime => {
noteContentPropertiesDialogOptions: {
visible: true,
text: note.body,
markupLanguage: note.markup_language,
},
});
}

View File

@ -3,12 +3,13 @@ import { useState, useEffect } from 'react';
const { _ } = require('lib/locale.js');
const { themeStyle } = require('lib/theme');
const DialogButtonRow = require('./DialogButtonRow.min');
const { stripMarkdown } = require('lib/markdownUtils');
const Countable = require('countable');
const markupLanguageUtils = require('lib/markupLanguageUtils');
interface NoteContentPropertiesDialogProps {
theme: number,
text: string,
markupLanguage: number,
onClose: Function,
}
@ -20,6 +21,13 @@ interface KeyToLabelMap {
[key: string]: string;
}
let markupToHtml_:any = null;
function markupToHtml() {
if (markupToHtml_) return markupToHtml_;
markupToHtml_ = markupLanguageUtils.newMarkupToHtml();
return markupToHtml_;
}
function countElements(text:string, wordSetter:Function, characterSetter:Function, characterNoSpaceSetter:Function, lineSetter:Function) {
Countable.count(text, (counter:any) => {
wordSetter(counter.words);
@ -60,7 +68,7 @@ export default function NoteContentPropertiesDialog(props:NoteContentPropertiesD
}, [props.text]);
useEffect(() => {
const strippedText: string = stripMarkdown(props.text);
const strippedText: string = markupToHtml().stripMarkup(props.markupLanguage, props.text);
countElements(strippedText, setStrippedWords, setStrippedCharacters, setStrippedCharactersNoSpace, setStrippedLines);
}, [props.text]);
@ -77,7 +85,11 @@ export default function NoteContentPropertiesDialog(props:NoteContentPropertiesD
};
const strippedTextProperties: TextPropertiesMap = {
lines: strippedLines,
// The function stripMarkup() currently removes all new lines so we can't use the
// strippedLines property. Instead we simply use the lines property which should
// be a good approximation anyway.
// Also dummy check to silence TypeScript warning
lines: strippedLines === -5000 ? strippedLines : lines,
words: strippedWords,
characters: strippedCharacters,
charactersNoSpace: strippedCharactersNoSpace,

View File

@ -10878,11 +10878,6 @@
"resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz",
"integrity": "sha1-PFMZQukIwml8DsNEhYwobHygpgo="
},
"striptags": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/striptags/-/striptags-3.1.1.tgz",
"integrity": "sha1-yMPn/db7S7OjKjt1LltePjgJPr0="
},
"stylis": {
"version": "3.5.4",
"resolved": "https://registry.npmjs.org/stylis/-/stylis-3.5.4.tgz",

View File

@ -169,7 +169,6 @@
"readability-node": "^0.1.0",
"redux": "^3.7.2",
"relative": "^3.0.2",
"remove-markdown": "^0.3.0",
"reselect": "^4.0.0",
"sax": "^1.2.4",
"server-destroy": "^1.0.1",
@ -178,7 +177,6 @@
"sqlite3": "^4.1.1",
"string-padding": "^1.0.2",
"string-to-stream": "^1.1.1",
"striptags": "^3.1.1",
"syswide-cas": "^5.1.0",
"taboverride": "^4.0.3",
"tar": "^4.4.4",

View File

@ -13,10 +13,7 @@ const HelpButton = require('../gui/HelpButton.min');
const { surroundKeywords, nextWhitespaceIndex, removeDiacritics } = require('lib/string-utils.js');
const { mergeOverlappingIntervals } = require('lib/ArrayUtils.js');
const PLUGIN_NAME = 'gotoAnything';
const { stripMarkdown } = require('lib/markdownUtils');
const striptags = require('striptags');
const Entities = require('html-entities').AllHtmlEntities;
const htmlEntitiesDecode = new Entities().decode;
const markupLanguageUtils = require('lib/markupLanguageUtils');
class GotoAnything {
@ -158,12 +155,12 @@ class Dialog extends React.PureComponent {
}
scheduleListUpdate() {
if (this.listUpdateIID_) return;
if (this.listUpdateIID_) clearTimeout(this.listUpdateIID_);
this.listUpdateIID_ = setTimeout(async () => {
await this.updateList();
this.listUpdateIID_ = null;
}, 10);
}, 100);
}
makeSearchQuery(query) {
@ -184,6 +181,12 @@ class Dialog extends React.PureComponent {
return SearchEngine.instance().allParsedQueryTerms(parsedQuery);
}
markupToHtml() {
if (this.markupToHtml_) return this.markupToHtml_;
this.markupToHtml_ = markupLanguageUtils.newMarkupToHtml();
return this.markupToHtml_;
}
async updateList() {
let resultsInBody = false;
@ -216,7 +219,7 @@ class Dialog extends React.PureComponent {
resultsInBody = !!results.find(row => row.fields.includes('body'));
if (!resultsInBody) {
if (!resultsInBody || this.state.query.length <= 1) {
for (let i = 0; i < results.length; i++) {
const row = results[i];
const path = Folder.folderPathString(this.props.folders, row.parent_id);
@ -225,8 +228,8 @@ class Dialog extends React.PureComponent {
} else {
const limit = 20;
const searchKeywords = this.keywords(searchQuery);
const notes = await Note.byIds(results.map(result => result.id).slice(0, limit), { fields: ['id', 'body'] });
const notesById = notes.reduce((obj, { id, body }) => ((obj[[id]] = body), obj), {});
const notes = await Note.byIds(results.map(result => result.id).slice(0, limit), { fields: ['id', 'body', 'markup_language'] });
const notesById = notes.reduce((obj, { id, body, markup_language }) => ((obj[[id]] = { id, body, markup_language }), obj), {});
for (let i = 0; i < results.length; i++) {
const row = results[i];
@ -237,7 +240,8 @@ class Dialog extends React.PureComponent {
if (i < limit) { // Display note fragments of search keyword matches
const indices = [];
const body = stripMarkdown(htmlEntitiesDecode(striptags(notesById[row.id])));
const note = notesById[row.id];
const body = this.markupToHtml().stripMarkup(note.markup_language, note.body, { collapseWhiteSpaces: true });
// Iterate over all matches in the body for each search keyword
for (let { valueRegex } of searchKeywords) {
@ -258,6 +262,7 @@ class Dialog extends React.PureComponent {
fragments = mergedIndices.map(f => body.slice(f[0], f[1])).join(' ... ');
// Add trailing ellipsis if the final fragment doesn't end where the note is ending
if (mergedIndices.length && mergedIndices[mergedIndices.length - 1][1] !== body.length) fragments += ' ...';
}
results[i] = Object.assign({}, row, { path, fragments });

View File

@ -443,4 +443,4 @@ SPEC CHECKSUMS:
PODFILE CHECKSUM: c8797581a23c0ae9c6a4f471c2b19dda3644a8b2
COCOAPODS: 1.8.4
COCOAPODS: 1.9.3

View File

@ -1,6 +1,7 @@
const urlUtils = require('lib/urlUtils.js');
const Entities = require('html-entities').AllHtmlEntities;
const htmlentities = new Entities().encode;
const htmlparser2 = require('htmlparser2');
// [\s\S] instead of . for multiline matching
// https://stackoverflow.com/a/16119722/561309
@ -107,6 +108,41 @@ class HtmlUtils {
return output.join(' ');
}
stripHtml(html) {
const output = [];
const tagStack = [];
const currentTag = () => {
if (!tagStack.length) return '';
return tagStack[tagStack.length - 1];
};
const disallowedTags = ['script', 'style', 'head', 'iframe', 'frameset', 'frame', 'object', 'base'];
const parser = new htmlparser2.Parser({
onopentag: (name) => {
tagStack.push(name.toLowerCase());
},
ontext: (decodedText) => {
if (disallowedTags.includes(currentTag())) return;
output.push(decodedText);
},
onclosetag: (name) => {
if (currentTag() === name.toLowerCase()) tagStack.pop();
},
}, { decodeEntities: true });
parser.write(html);
parser.end();
return output.join('').replace(/\s+/g, ' ');
}
}
const htmlUtils = new HtmlUtils();

View File

@ -1,5 +1,7 @@
const MdToHtml = require('./MdToHtml');
const HtmlToHtml = require('./HtmlToHtml');
const htmlUtils = require('lib/htmlUtils');
const MarkdownIt = require('markdown-it');
class MarkupToHtml {
constructor(options) {
@ -33,6 +35,34 @@ class MarkupToHtml {
return '';
}
stripMarkup(markupLanguage, markup, options = null) {
if (!markup) return '';
options = Object.assign({}, {
collapseWhiteSpaces: false,
}, options);
let output = markup;
if (markupLanguage === MarkupToHtml.MARKUP_LANGUAGE_MARKDOWN) {
if (!this.rawMarkdownIt_) {
// We enable HTML because we don't want it to be escaped, so
// that it can be stripped off in the stripHtml call below.
this.rawMarkdownIt_ = new MarkdownIt({ html: true });
}
output = this.rawMarkdownIt_.render(output);
}
output = htmlUtils.stripHtml(output).trim();
if (options.collapseWhiteSpaces) {
output = output.replace(/\n+/g, ' ');
output = output.replace(/\s+/g, ' ');
}
return output;
}
async render(markupLanguage, markup, theme, options) {
return this.renderer(markupLanguage).render(markup, theme, options);
}

View File

@ -16,7 +16,7 @@ function installRule(markdownIt, mdOptions, ruleOptions) {
const token = tokens[idx];
let href = utils.getAttr(token.attrs, 'href');
const resourceHrefInfo = urlUtils.parseResourceUrl(href);
const isResourceUrl = !!resourceHrefInfo;
const isResourceUrl = ruleOptions.resources && !!resourceHrefInfo;
let title = utils.getAttr(token.attrs, 'title', isResourceUrl ? '' : href);
let resourceIdAttr = '';

View File

@ -123,7 +123,7 @@ utils.resourceStatus = function(ResourceModel, resourceInfo) {
};
utils.imageReplacement = function(ResourceModel, src, resources, resourceBaseUrl) {
if (!ResourceModel) return null;
if (!ResourceModel || !resources) return null;
if (!ResourceModel.isResourceUrl(src)) return null;

View File

@ -2,7 +2,6 @@ const stringPadding = require('string-padding');
const urlUtils = require('lib/urlUtils');
const MarkdownIt = require('markdown-it');
const { setupLinkify } = require('lib/joplin-renderer');
const removeMarkdown = require('remove-markdown');
// Taken from codemirror/addon/edit/continuelist.js
const listRegex = /^(\s*)([*+-] \[[x ]\]\s|[*+-]\s|(\d+)([.)]\s))(\s*)/;
@ -118,10 +117,6 @@ const markdownUtils = {
const title = lines[0].trim();
return title.replace(filterRegex, '').replace(mdLinkRegex, '$1').replace(emptyMdLinkRegex, '$1').substring(0,80);
},
stripMarkdown(text) {
return removeMarkdown(text, { gfm: false });
},
};
module.exports = markdownUtils;

View File

@ -83,7 +83,7 @@ reg.scheduleSync = async (delay = null, syncOptions = null) => {
reg.scheduleSyncId_ = null;
}
reg.logger().info('Scheduling sync operation...', delay);
reg.logger().debug('Scheduling sync operation...', delay);
if (Setting.value('env') === 'dev' && delay !== 0) {
reg.logger().info('Schedule sync DISABLED!!!');

View File

@ -9362,11 +9362,6 @@
"through2": "^2.0.3"
}
},
"remove-markdown": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/remove-markdown/-/remove-markdown-0.3.0.tgz",
"integrity": "sha1-XktmdJOpNXlyjz1S7MHbnKUF3Jg="
},
"remove-trailing-separator": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/remove-trailing-separator/-/remove-trailing-separator-1.1.0.tgz",

View File

@ -80,7 +80,6 @@
"react-native-webview": "^5.12.0",
"react-redux": "5.0.7",
"redux": "4.0.0",
"remove-markdown": "^0.3.0",
"reselect": "^4.0.0",
"rn-fetch-blob": "^0.12.0",
"stream": "0.0.2",