From 09d088b2b5c3295271e1f673bc8135a01eecc62e Mon Sep 17 00:00:00 2001 From: Henry Heino <46334387+personalizedrefrigerator@users.noreply.github.com> Date: Sun, 12 May 2024 02:01:12 -0700 Subject: [PATCH] Mobile,Desktop: Fix nonbreaking spaces and CRLF break search for adjacent words (#10417) --- packages/lib/services/search/SearchEngine.test.ts | 14 ++++++++++++++ packages/lib/services/search/SearchEngine.ts | 4 ++++ 2 files changed, 18 insertions(+) diff --git a/packages/lib/services/search/SearchEngine.test.ts b/packages/lib/services/search/SearchEngine.test.ts index 3bad73880..e5a587195 100644 --- a/packages/lib/services/search/SearchEngine.test.ts +++ b/packages/lib/services/search/SearchEngine.test.ts @@ -340,6 +340,20 @@ describe('services/SearchEngine', () => { expect((await engine.search('testing')).length).toBe(1); })); + it('should use nonbreaking spaces as separators', (async () => { + await Note.save({ + title: 'Test', + body: 'This is\u00A0a\u00A0test\r\nof different\r\nspace separators.', + }); + + await engine.syncTables(); + + expect((await engine.search('test')).length).toBe(1); + expect((await engine.search('different')).length).toBe(1); + expect((await engine.search('space')).length).toBe(1); + expect((await engine.search('separators')).length).toBe(1); + })); + it('should supports various query types', (async () => { let rows; diff --git a/packages/lib/services/search/SearchEngine.ts b/packages/lib/services/search/SearchEngine.ts index fd4c85ae5..09a1c08ad 100644 --- a/packages/lib/services/search/SearchEngine.ts +++ b/packages/lib/services/search/SearchEngine.ts @@ -629,6 +629,10 @@ export default class SearchEngine { // https://github.com/laurent22/joplin/issues/9694 normalizedText = htmlentitiesDecode(normalizedText); + // The FTS tokenizer doesn't understand some types of space, + // including nonbreaking spaces and CRLF. + normalizedText = normalizedText.replace(/\s/g, ' '); + return removeDiacritics(normalizedText.toLowerCase()); }