From 3f75d770f707e3dd735dc4aee529ca7e8583ef57 Mon Sep 17 00:00:00 2001 From: pedr Date: Wed, 6 Aug 2025 06:37:20 -0300 Subject: [PATCH] Desktop: Resolves #12224: Add an option to enable or disable search in OCR text (#12578) --- .../lib/models/settings/builtInMetadata.ts | 10 ++++++ .../lib/services/search/SearchEngine.test.ts | 32 ++++++++++++++++++- packages/lib/services/search/SearchEngine.ts | 2 +- 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/packages/lib/models/settings/builtInMetadata.ts b/packages/lib/models/settings/builtInMetadata.ts index 46e3a5ba9f..16eba8f555 100644 --- a/packages/lib/models/settings/builtInMetadata.ts +++ b/packages/lib/models/settings/builtInMetadata.ts @@ -594,6 +594,16 @@ const builtInMetadata = (Setting: typeof SettingType) => { label: () => _('OCR: Clear cache and re-download language data files'), }, + 'ocr.searchInExtractedContent': { + value: true, + type: SettingItemType.Bool, + advanced: true, + public: true, + appTypes: [AppType.Desktop], + storage: SettingStorage.Database, + label: () => _('OCR: Search in extracted content'), + }, + theme: { value: Setting.THEME_LIGHT, type: SettingItemType.Int, diff --git a/packages/lib/services/search/SearchEngine.test.ts b/packages/lib/services/search/SearchEngine.test.ts index e5a587195b..e37b9419a7 100644 --- a/packages/lib/services/search/SearchEngine.test.ts +++ b/packages/lib/services/search/SearchEngine.test.ts @@ -1,8 +1,10 @@ -import { setupDatabaseAndSynchronizer, db, sleep, switchClient, msleep } from '../../testing/test-utils'; +import { setupDatabaseAndSynchronizer, db, sleep, switchClient, msleep, createNoteAndResource } from '../../testing/test-utils'; import SearchEngine from './SearchEngine'; import Note from '../../models/Note'; import ItemChange from '../../models/ItemChange'; import Setting from '../../models/Setting'; +import Resource from '../../models/Resource'; +import { ResourceOcrStatus } from '../database/types'; let engine: SearchEngine = null; @@ -583,4 +585,32 @@ describe('services/SearchEngine', () => { // expect(await engine.search(f1.id)).toEqual([]); // })); + it.each( + [ + ['find', 'enabled', true, 1], + ['not find', 'disabled', false, 0], + ], + )('should %s resources if searching in OCR content is %s', + async (_testName: string, _testName2: string, isSearchEnabled: boolean, resourcesFound: number) => { + const { resource } = await createNoteAndResource(); + await Resource.save({ + id: resource.id, + ocr_status: ResourceOcrStatus.Done, + ocr_text: 'héllô, hôw äre yoù ?', + }); + + await engine.syncTables(); + + const normalized = await db().selectAll('select * from items_fts'); + expect(normalized[0].body).toBe('hello, how are you ?'); + + Setting.setValue('ocr.searchInExtractedContent', isSearchEnabled); + + const rows = await engine.search('hello', { + searchType: SearchEngine.SEARCH_TYPE_FTS, + includeOrphanedResources: true, + }); + expect(rows.length).toBe(resourcesFound); + }); + }); diff --git a/packages/lib/services/search/SearchEngine.ts b/packages/lib/services/search/SearchEngine.ts index 16abf0acd8..cb4e4b7b92 100644 --- a/packages/lib/services/search/SearchEngine.ts +++ b/packages/lib/services/search/SearchEngine.ts @@ -815,7 +815,7 @@ export default class SearchEngine { }; }); - if (!queryHasFilters) { + if (!queryHasFilters && Setting.value('ocr.searchInExtractedContent')) { const toSearch = parsedQuery.allTerms.map(t => t.value).join(' '); let itemRows: ProcessResultsRow[] = [];