Desktop, CLI: Fixed import of notes that contain links with hashes

2025-08-13 22:12:50 +02:00 · 2019-09-12 22:48:10 +01:00
parent 0379523eaf
commit 88561a6c3c
3 changed files with 68 additions and 33 deletions
--- a/CliClient/tests/urlUtils.js
+++ b/CliClient/tests/urlUtils.js
@@ -35,7 +35,10 @@ describe('urlUtils', function() {
 	it('should detect resource URLs', asyncTest(async (done) => {
 		const testCases = [
 			[':/1234abcd1234abcd1234abcd1234abcd', { itemId: '1234abcd1234abcd1234abcd1234abcd', hash: '' }],
+			[':/1234abcd1234abcd1234abcd1234abcd "some text"', { itemId: '1234abcd1234abcd1234abcd1234abcd', hash: '' }],
 			[':/1234abcd1234abcd1234abcd1234abcd#hash', { itemId: '1234abcd1234abcd1234abcd1234abcd', hash: 'hash' }],
+			[':/1234abcd1234abcd1234abcd1234abcd#Книги-из-номер', { itemId: '1234abcd1234abcd1234abcd1234abcd', hash: 'Книги-из-номер' }],
+			[':/1234abcd1234abcd1234abcd1234abcd#hash "some text"', { itemId: '1234abcd1234abcd1234abcd1234abcd', hash: 'hash' }],
 			['joplin://1234abcd1234abcd1234abcd1234abcd', { itemId: '1234abcd1234abcd1234abcd1234abcd', hash: '' }],
 			['joplin://1234abcd1234abcd1234abcd1234abcd#hash', { itemId: '1234abcd1234abcd1234abcd1234abcd', hash: 'hash' }],
 			[':/1234abcd1234abcd1234abcd1234abc', null],
@@ -49,10 +52,33 @@ describe('urlUtils', function() {
 			if (!expected) {
 				expect(!u).toBe(true);
 			} else {
-				expect(u.itemId).toBe(expected.itemId);
-				expect(u.hash).toBe(expected.hash);
+				if (!u) {
+					expect(!!u).toBe(true);
+				} else {
+					expect(u.itemId).toBe(expected.itemId);
+					expect(u.hash).toBe(expected.hash);
+				}
 			}
 		}
 	}));

+	it('should extract resource URLs', asyncTest(async (done) => {
+		const testCases = [
+			['Bla [](:/11111111111111111111111111111111) bla [](:/22222222222222222222222222222222) bla', ['11111111111111111111111111111111', '22222222222222222222222222222222']],
+			['Bla [](:/11111111111111111111111111111111 "Some title") bla [](:/22222222222222222222222222222222 "something else") bla', ['11111111111111111111111111111111', '22222222222222222222222222222222']],
+			['Bla <img src=":/fcca2938a96a22570e8eae2565bc6b0b"/> bla [](:/22222222222222222222222222222222) bla', ['fcca2938a96a22570e8eae2565bc6b0b', '22222222222222222222222222222222']],
+			['Bla <img src=":/fcca2938a96a22570e8eae2565bc6b0b"/> bla <a href=":/33333333333333333333333333333333"/>Some note link</a> blu [](:/22222222222222222222222222222222) bla', ['fcca2938a96a22570e8eae2565bc6b0b', '33333333333333333333333333333333', '22222222222222222222222222222222']],
+			['nothing here', []],
+			['', []],
+		];
+
+		for (const t of testCases) {
+			const result = urlUtils.extractResourceUrls(t[0]);
+			const expected = t[1];
+
+			const itemIds = result.map(r => r.itemId);
+			expect(itemIds.sort().join(',')).toBe(expected.sort().join(','));
+		}
+	}));
+
 });
--- a/ReactNativeClient/lib/models/Note.js
+++ b/ReactNativeClient/lib/models/Note.js
@@ -10,6 +10,7 @@ const { time } = require('lib/time-utils.js');
 const { _ } = require('lib/locale.js');
 const ArrayUtils = require('lib/ArrayUtils.js');
 const lodash = require('lodash');
+const urlUtils = require('lib/urlUtils.js');

 class Note extends BaseItem {
 	static tableName() {
@@ -114,27 +115,9 @@ class Note extends BaseItem {
 	static linkedItemIds(body) {
 		if (!body || body.length <= 32) return [];

-		// For example: ![](:/fcca2938a96a22570e8eae2565bc6b0b)
-		let matches = body.match(/\(:\/[a-zA-Z0-9]{32}\)/g);
-		if (!matches) matches = [];
-		matches = matches.map(m => m.substr(3, 32));
-
-		// For example: ![](:/fcca2938a96a22570e8eae2565bc6b0b "Some title")
-		let matches2 = body.match(/\(:\/[a-zA-Z0-9]{32}\s(.*?)\)/g);
-		if (!matches2) matches2 = [];
-		matches2 = matches2.map(m => m.substr(3, 32));
-		matches = matches.concat(matches2);
-
-		// For example: <img src=":/fcca2938a96a22570e8eae2565bc6b0b"/>
-		const imgRegex = /<img[\s\S]*?src=["']:\/([a-zA-Z0-9]{32})["'][\s\S]*?>/gi;
-		const imgMatches = [];
-		while (true) {
-			const m = imgRegex.exec(body);
-			if (!m) break;
-			imgMatches.push(m[1]);
-		}
-
-		return ArrayUtils.unique(matches.concat(imgMatches));
+		const links = urlUtils.extractResourceUrls(body);
+		const itemIds = links.map(l => l.itemId);
+		return ArrayUtils.unique(itemIds);
 	}

 	static async linkedItems(body) {
--- a/ReactNativeClient/lib/urlUtils.js
+++ b/ReactNativeClient/lib/urlUtils.js
@@ -40,27 +40,53 @@ urlUtils.prependBaseUrl = function(url, baseUrl) {
 	}
 };

+const resourceRegex = /^(joplin:\/\/|:\/)([0-9a-zA-Z]{32})(|#[^\s]*)(|\s".*?")$/;
+
 urlUtils.isResourceUrl = function(url) {
-	return !!url.match(/^(joplin:\/\/|:\/)[0-9a-zA-Z]{32}(|#.*)$/);
+	return !!url.match(resourceRegex);
 };

 urlUtils.parseResourceUrl = function(url) {
 	if (!urlUtils.isResourceUrl(url)) return null;

-	const filename = url.split('/').pop();
-	const splitted = filename.split('#');
+	const match = url.match(resourceRegex);

-	const output = {
-		itemId: '',
-		hash: '',
-	};
-
-	if (splitted.length) output.itemId = splitted[0];
+	const itemId = match[2];
+	let hash = match[3].trim();

 	// In general we want the hash to be decoded so that non-alphabetical languages
 	// appear as-is without being encoded with %.
 	// Fixes https://github.com/laurent22/joplin/issues/1870
-	if (splitted.length >= 2) output.hash = urlDecode(splitted[1]);
+	if (hash) hash = urlDecode(hash.substr(1)); // Remove the first #
+
+	return {
+		itemId: itemId,
+		hash: hash,
+	};
+};
+
+urlUtils.extractResourceUrls = function(text) {
+	const markdownLinksRE = /\]\((.*?)\)/g;
+	const output = [];
+	let result = null;
+
+	while ((result = markdownLinksRE.exec(text)) !== null) {
+		const resourceUrlInfo = urlUtils.parseResourceUrl(result[1]);
+		if (resourceUrlInfo) output.push(resourceUrlInfo);
+	}
+
+	const htmlRegexes = [
+		/<img[\s\S]*?src=["']:\/([a-zA-Z0-9]{32})["'][\s\S]*?>/gi,
+		/<a[\s\S]*?href=["']:\/([a-zA-Z0-9]{32})["'][\s\S]*?>/gi,
+	];
+
+	for (const htmlRegex of htmlRegexes) {
+		while (true) {
+			const m = htmlRegex.exec(text);
+			if (!m) break;
+			output.push({ itemId: m[1], hash: '' });
+		}
+	}

 	return output;
 };