diff --git a/packages/lib/models/settings/builtInMetadata.ts b/packages/lib/models/settings/builtInMetadata.ts index 46e3a5ba9f..16eba8f555 100644 --- a/packages/lib/models/settings/builtInMetadata.ts +++ b/packages/lib/models/settings/builtInMetadata.ts @@ -594,6 +594,16 @@ const builtInMetadata = (Setting: typeof SettingType) => { label: () => _('OCR: Clear cache and re-download language data files'), }, + 'ocr.searchInExtractedContent': { + value: true, + type: SettingItemType.Bool, + advanced: true, + public: true, + appTypes: [AppType.Desktop], + storage: SettingStorage.Database, + label: () => _('OCR: Search in extracted content'), + }, + theme: { value: Setting.THEME_LIGHT, type: SettingItemType.Int, diff --git a/packages/lib/services/search/SearchEngine.test.ts b/packages/lib/services/search/SearchEngine.test.ts index e5a587195b..e37b9419a7 100644 --- a/packages/lib/services/search/SearchEngine.test.ts +++ b/packages/lib/services/search/SearchEngine.test.ts @@ -1,8 +1,10 @@ -import { setupDatabaseAndSynchronizer, db, sleep, switchClient, msleep } from '../../testing/test-utils'; +import { setupDatabaseAndSynchronizer, db, sleep, switchClient, msleep, createNoteAndResource } from '../../testing/test-utils'; import SearchEngine from './SearchEngine'; import Note from '../../models/Note'; import ItemChange from '../../models/ItemChange'; import Setting from '../../models/Setting'; +import Resource from '../../models/Resource'; +import { ResourceOcrStatus } from '../database/types'; let engine: SearchEngine = null; @@ -583,4 +585,32 @@ describe('services/SearchEngine', () => { // expect(await engine.search(f1.id)).toEqual([]); // })); + it.each( + [ + ['find', 'enabled', true, 1], + ['not find', 'disabled', false, 0], + ], + )('should %s resources if searching in OCR content is %s', + async (_testName: string, _testName2: string, isSearchEnabled: boolean, resourcesFound: number) => { + const { resource } = await createNoteAndResource(); + await Resource.save({ + id: resource.id, + ocr_status: ResourceOcrStatus.Done, + ocr_text: 'héllô, hôw äre yoù ?', + }); + + await engine.syncTables(); + + const normalized = await db().selectAll('select * from items_fts'); + expect(normalized[0].body).toBe('hello, how are you ?'); + + Setting.setValue('ocr.searchInExtractedContent', isSearchEnabled); + + const rows = await engine.search('hello', { + searchType: SearchEngine.SEARCH_TYPE_FTS, + includeOrphanedResources: true, + }); + expect(rows.length).toBe(resourcesFound); + }); + }); diff --git a/packages/lib/services/search/SearchEngine.ts b/packages/lib/services/search/SearchEngine.ts index 16abf0acd8..cb4e4b7b92 100644 --- a/packages/lib/services/search/SearchEngine.ts +++ b/packages/lib/services/search/SearchEngine.ts @@ -815,7 +815,7 @@ export default class SearchEngine { }; }); - if (!queryHasFilters) { + if (!queryHasFilters && Setting.value('ocr.searchInExtractedContent')) { const toSearch = parsedQuery.allTerms.map(t => t.value).join(' '); let itemRows: ProcessResultsRow[] = [];