1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-07-16 00:14:34 +02:00

Chore: Added test for OCR search

This commit is contained in:
Laurent Cozic
2024-01-05 14:43:24 +00:00
parent 2bf4f39ca1
commit 73ad7e0572
4 changed files with 42 additions and 17 deletions

View File

@ -1,18 +1,10 @@
import { createNoteAndResource, ocrSampleDir, resourceFetcher, setupDatabaseAndSynchronizer, supportDir, switchClient, synchronizerStart } from '../../testing/test-utils'; import { createNoteAndResource, newOcrService, ocrSampleDir, resourceFetcher, setupDatabaseAndSynchronizer, supportDir, switchClient, synchronizerStart } from '../../testing/test-utils';
import OcrDriverTesseract from './drivers/OcrDriverTesseract';
import OcrService from './OcrService';
import { supportedMimeTypes } from './OcrService'; import { supportedMimeTypes } from './OcrService';
import { createWorker } from 'tesseract.js';
import Resource from '../../models/Resource'; import Resource from '../../models/Resource';
import { ResourceEntity, ResourceOcrStatus } from '../database/types'; import { ResourceEntity, ResourceOcrStatus } from '../database/types';
import { msleep } from '@joplin/utils/time'; import { msleep } from '@joplin/utils/time';
import Logger from '@joplin/utils/Logger'; import Logger from '@joplin/utils/Logger';
const newService = () => {
const driver = new OcrDriverTesseract({ createWorker });
return new OcrService(driver);
};
describe('OcrService', () => { describe('OcrService', () => {
beforeEach(async () => { beforeEach(async () => {
@ -31,7 +23,7 @@ describe('OcrService', () => {
expect(await Resource.needOcrCount(supportedMimeTypes)).toBe(3); expect(await Resource.needOcrCount(supportedMimeTypes)).toBe(3);
const service = newService(); const service = newOcrService();
await service.processResources(); await service.processResources();
const expectedText = 'This is a lot of 12 point text to test the\n' + const expectedText = 'This is a lot of 12 point text to test the\n' +
@ -88,7 +80,7 @@ describe('OcrService', () => {
it('should process PDF resources', async () => { it('should process PDF resources', async () => {
const { resource } = await createNoteAndResource({ path: `${ocrSampleDir}/dummy.pdf` }); const { resource } = await createNoteAndResource({ path: `${ocrSampleDir}/dummy.pdf` });
const service = newService(); const service = newOcrService();
await service.processResources(); await service.processResources();
@ -111,7 +103,7 @@ describe('OcrService', () => {
await msleep(1); await msleep(1);
const service = newService(); const service = newOcrService();
await service.processResources(); await service.processResources();
@ -157,7 +149,7 @@ describe('OcrService', () => {
fetch_error: 'cannot be downloaded', fetch_error: 'cannot be downloaded',
}); });
const service = newService(); const service = newOcrService();
// The service will print a warnign so we disable it in tests // The service will print a warnign so we disable it in tests
Logger.globalLogger.enabled = false; Logger.globalLogger.enabled = false;
@ -196,7 +188,7 @@ describe('OcrService', () => {
it('should handle conflicts if two clients process the same resource then sync', async () => { it('should handle conflicts if two clients process the same resource then sync', async () => {
await createNoteAndResource({ path: `${ocrSampleDir}/dummy.pdf` }); await createNoteAndResource({ path: `${ocrSampleDir}/dummy.pdf` });
const service1 = newService(); const service1 = newOcrService();
await synchronizerStart(); await synchronizerStart();
await service1.processResources(); await service1.processResources();
@ -205,7 +197,7 @@ describe('OcrService', () => {
await synchronizerStart(); await synchronizerStart();
await msleep(1); await msleep(1);
await resourceFetcher().startAndWait(); await resourceFetcher().startAndWait();
const service2 = newService(); const service2 = newOcrService();
await service2.processResources(); await service2.processResources();
await synchronizerStart(); await synchronizerStart();
const expectedResouceUpatedTime = (await Resource.all())[0].updated_time; const expectedResouceUpatedTime = (await Resource.all())[0].updated_time;
@ -236,7 +228,7 @@ describe('OcrService', () => {
// it('should process resources 2', async () => { // it('should process resources 2', async () => {
// await createNoteAndResource({ path: `${require('os').homedir()}/Desktop/AllClients.png` }); // await createNoteAndResource({ path: `${require('os').homedir()}/Desktop/AllClients.png` });
// const service = newService(); // const service = newOcrService();
// await service.processResources(); // await service.processResources();
// console.info(await Resource.all()); // console.info(await Resource.all());

View File

@ -1,5 +1,8 @@
import { ModelType } from '../../BaseModel';
import Note from '../../models/Note';
import Resource from '../../models/Resource'; import Resource from '../../models/Resource';
import { db, msleep, setupDatabaseAndSynchronizer, switchClient } from '../../testing/test-utils'; import shim from '../../shim';
import { db, msleep, newOcrService, ocrSampleDir, resourceService, setupDatabaseAndSynchronizer, switchClient } from '../../testing/test-utils';
import { ResourceOcrStatus } from '../database/types'; import { ResourceOcrStatus } from '../database/types';
import SearchEngine from './SearchEngine'; import SearchEngine from './SearchEngine';
@ -35,6 +38,27 @@ describe('SearchEngine.resources', () => {
expect(normalized[0].body).toBe('hello, how are you ?'); expect(normalized[0].body).toBe('hello, how are you ?');
}); });
it('should return notes associated with indexed resources', (async () => {
const note1 = await Note.save({});
await Note.save({});
await shim.attachFileToNote(note1, `${ocrSampleDir}/testocr.png`);
const resource = (await Resource.all())[0];
await resourceService().indexNoteResources();
const ocrService = newOcrService();
await ocrService.processResources();
const searchEngine = newSearchEngine();
await searchEngine.syncTables();
const results = await searchEngine.search('lazy fox');
expect(results.length).toBe(1);
expect(results[0].id).toBe(note1.id);
expect(results[0].item_id).toBe(resource.id);
expect(results[0].item_type).toBe(ModelType.Resource);
}));
it('should delete normalized data when a resource is deleted', async () => { it('should delete normalized data when a resource is deleted', async () => {
const engine = newSearchEngine(); const engine = newSearchEngine();

View File

@ -534,4 +534,5 @@ describe('services/SearchEngine', () => {
expect((await engine.search(n2.id))[0].id).toBe(n2.id); expect((await engine.search(n2.id))[0].id).toBe(n2.id);
expect(await engine.search(f1.id)).toEqual([]); expect(await engine.search(f1.id)).toEqual([]);
})); }));
}); });

View File

@ -63,6 +63,9 @@ const { Dirnames } = require('../services/synchronizer/utils/types');
import RSA from '../services/e2ee/RSA.node'; import RSA from '../services/e2ee/RSA.node';
import { State as ShareState } from '../services/share/reducer'; import { State as ShareState } from '../services/share/reducer';
import initLib from '../initLib'; import initLib from '../initLib';
import OcrDriverTesseract from '../services/ocr/drivers/OcrDriverTesseract';
import OcrService from '../services/ocr/OcrService';
import { createWorker } from 'tesseract.js';
// Each suite has its own separate data and temp directory so that multiple // Each suite has its own separate data and temp directory so that multiple
// suites can be run at the same time. suiteName is what is used to // suites can be run at the same time. suiteName is what is used to
@ -1027,4 +1030,9 @@ const simulateReadOnlyShareEnv = (shareId: string) => {
}; };
}; };
export const newOcrService = () => {
const driver = new OcrDriverTesseract({ createWorker });
return new OcrService(driver);
};
export { supportDir, createNoteAndResource, createTempFile, createTestShareData, simulateReadOnlyShareEnv, waitForFolderCount, afterAllCleanUp, exportDir, synchronizerStart, afterEachCleanUp, syncTargetName, setSyncTargetName, syncDir, createTempDir, isNetworkSyncTarget, kvStore, expectThrow, logger, expectNotThrow, resourceService, resourceFetcher, tempFilePath, allSyncTargetItemsEncrypted, msleep, setupDatabase, revisionService, setupDatabaseAndSynchronizer, db, synchronizer, fileApi, sleep, clearDatabase, switchClient, syncTargetId, objectsEqual, checkThrowAsync, checkThrow, encryptionService, loadEncryptionMasterKey, fileContentEqual, decryptionWorker, currentClientId, id, ids, sortedIds, at, createNTestNotes, createNTestFolders, createNTestTags, TestApp }; export { supportDir, createNoteAndResource, createTempFile, createTestShareData, simulateReadOnlyShareEnv, waitForFolderCount, afterAllCleanUp, exportDir, synchronizerStart, afterEachCleanUp, syncTargetName, setSyncTargetName, syncDir, createTempDir, isNetworkSyncTarget, kvStore, expectThrow, logger, expectNotThrow, resourceService, resourceFetcher, tempFilePath, allSyncTargetItemsEncrypted, msleep, setupDatabase, revisionService, setupDatabaseAndSynchronizer, db, synchronizer, fileApi, sleep, clearDatabase, switchClient, syncTargetId, objectsEqual, checkThrowAsync, checkThrow, encryptionService, loadEncryptionMasterKey, fileContentEqual, decryptionWorker, currentClientId, id, ids, sortedIds, at, createNTestNotes, createNTestFolders, createNTestTags, TestApp };