1
0
mirror of https://github.com/laurent22/joplin.git synced 2024-11-27 08:21:03 +02:00

Desktop, CLI: Fixed import of notes that contain links with hashes

This commit is contained in:
Laurent Cozic 2019-09-12 22:48:10 +01:00
parent 0379523eaf
commit 88561a6c3c
3 changed files with 68 additions and 33 deletions

View File

@ -35,7 +35,10 @@ describe('urlUtils', function() {
it('should detect resource URLs', asyncTest(async (done) => {
const testCases = [
[':/1234abcd1234abcd1234abcd1234abcd', { itemId: '1234abcd1234abcd1234abcd1234abcd', hash: '' }],
[':/1234abcd1234abcd1234abcd1234abcd "some text"', { itemId: '1234abcd1234abcd1234abcd1234abcd', hash: '' }],
[':/1234abcd1234abcd1234abcd1234abcd#hash', { itemId: '1234abcd1234abcd1234abcd1234abcd', hash: 'hash' }],
[':/1234abcd1234abcd1234abcd1234abcd#Книги-из-номер', { itemId: '1234abcd1234abcd1234abcd1234abcd', hash: 'Книги-из-номер' }],
[':/1234abcd1234abcd1234abcd1234abcd#hash "some text"', { itemId: '1234abcd1234abcd1234abcd1234abcd', hash: 'hash' }],
['joplin://1234abcd1234abcd1234abcd1234abcd', { itemId: '1234abcd1234abcd1234abcd1234abcd', hash: '' }],
['joplin://1234abcd1234abcd1234abcd1234abcd#hash', { itemId: '1234abcd1234abcd1234abcd1234abcd', hash: 'hash' }],
[':/1234abcd1234abcd1234abcd1234abc', null],
@ -48,11 +51,34 @@ describe('urlUtils', function() {
if (!expected) {
expect(!u).toBe(true);
} else {
if (!u) {
expect(!!u).toBe(true);
} else {
expect(u.itemId).toBe(expected.itemId);
expect(u.hash).toBe(expected.hash);
}
}
}
}));
it('should extract resource URLs', asyncTest(async (done) => {
const testCases = [
['Bla [](:/11111111111111111111111111111111) bla [](:/22222222222222222222222222222222) bla', ['11111111111111111111111111111111', '22222222222222222222222222222222']],
['Bla [](:/11111111111111111111111111111111 "Some title") bla [](:/22222222222222222222222222222222 "something else") bla', ['11111111111111111111111111111111', '22222222222222222222222222222222']],
['Bla <img src=":/fcca2938a96a22570e8eae2565bc6b0b"/> bla [](:/22222222222222222222222222222222) bla', ['fcca2938a96a22570e8eae2565bc6b0b', '22222222222222222222222222222222']],
['Bla <img src=":/fcca2938a96a22570e8eae2565bc6b0b"/> bla <a href=":/33333333333333333333333333333333"/>Some note link</a> blu [](:/22222222222222222222222222222222) bla', ['fcca2938a96a22570e8eae2565bc6b0b', '33333333333333333333333333333333', '22222222222222222222222222222222']],
['nothing here', []],
['', []],
];
for (const t of testCases) {
const result = urlUtils.extractResourceUrls(t[0]);
const expected = t[1];
const itemIds = result.map(r => r.itemId);
expect(itemIds.sort().join(',')).toBe(expected.sort().join(','));
}
}));
});

View File

@ -10,6 +10,7 @@ const { time } = require('lib/time-utils.js');
const { _ } = require('lib/locale.js');
const ArrayUtils = require('lib/ArrayUtils.js');
const lodash = require('lodash');
const urlUtils = require('lib/urlUtils.js');
class Note extends BaseItem {
static tableName() {
@ -114,27 +115,9 @@ class Note extends BaseItem {
static linkedItemIds(body) {
if (!body || body.length <= 32) return [];
// For example: ![](:/fcca2938a96a22570e8eae2565bc6b0b)
let matches = body.match(/\(:\/[a-zA-Z0-9]{32}\)/g);
if (!matches) matches = [];
matches = matches.map(m => m.substr(3, 32));
// For example: ![](:/fcca2938a96a22570e8eae2565bc6b0b "Some title")
let matches2 = body.match(/\(:\/[a-zA-Z0-9]{32}\s(.*?)\)/g);
if (!matches2) matches2 = [];
matches2 = matches2.map(m => m.substr(3, 32));
matches = matches.concat(matches2);
// For example: <img src=":/fcca2938a96a22570e8eae2565bc6b0b"/>
const imgRegex = /<img[\s\S]*?src=["']:\/([a-zA-Z0-9]{32})["'][\s\S]*?>/gi;
const imgMatches = [];
while (true) {
const m = imgRegex.exec(body);
if (!m) break;
imgMatches.push(m[1]);
}
return ArrayUtils.unique(matches.concat(imgMatches));
const links = urlUtils.extractResourceUrls(body);
const itemIds = links.map(l => l.itemId);
return ArrayUtils.unique(itemIds);
}
static async linkedItems(body) {

View File

@ -40,27 +40,53 @@ urlUtils.prependBaseUrl = function(url, baseUrl) {
}
};
const resourceRegex = /^(joplin:\/\/|:\/)([0-9a-zA-Z]{32})(|#[^\s]*)(|\s".*?")$/;
urlUtils.isResourceUrl = function(url) {
return !!url.match(/^(joplin:\/\/|:\/)[0-9a-zA-Z]{32}(|#.*)$/);
return !!url.match(resourceRegex);
};
urlUtils.parseResourceUrl = function(url) {
if (!urlUtils.isResourceUrl(url)) return null;
const filename = url.split('/').pop();
const splitted = filename.split('#');
const match = url.match(resourceRegex);
const output = {
itemId: '',
hash: '',
};
if (splitted.length) output.itemId = splitted[0];
const itemId = match[2];
let hash = match[3].trim();
// In general we want the hash to be decoded so that non-alphabetical languages
// appear as-is without being encoded with %.
// Fixes https://github.com/laurent22/joplin/issues/1870
if (splitted.length >= 2) output.hash = urlDecode(splitted[1]);
if (hash) hash = urlDecode(hash.substr(1)); // Remove the first #
return {
itemId: itemId,
hash: hash,
};
};
urlUtils.extractResourceUrls = function(text) {
const markdownLinksRE = /\]\((.*?)\)/g;
const output = [];
let result = null;
while ((result = markdownLinksRE.exec(text)) !== null) {
const resourceUrlInfo = urlUtils.parseResourceUrl(result[1]);
if (resourceUrlInfo) output.push(resourceUrlInfo);
}
const htmlRegexes = [
/<img[\s\S]*?src=["']:\/([a-zA-Z0-9]{32})["'][\s\S]*?>/gi,
/<a[\s\S]*?href=["']:\/([a-zA-Z0-9]{32})["'][\s\S]*?>/gi,
];
for (const htmlRegex of htmlRegexes) {
while (true) {
const m = htmlRegex.exec(text);
if (!m) break;
output.push({ itemId: m[1], hash: '' });
}
}
return output;
};