You've already forked joplin
mirror of
https://github.com/laurent22/joplin.git
synced 2025-07-16 00:14:34 +02:00
Chore: Added test for OCR search
This commit is contained in:
@ -1,18 +1,10 @@
|
||||
import { createNoteAndResource, ocrSampleDir, resourceFetcher, setupDatabaseAndSynchronizer, supportDir, switchClient, synchronizerStart } from '../../testing/test-utils';
|
||||
import OcrDriverTesseract from './drivers/OcrDriverTesseract';
|
||||
import OcrService from './OcrService';
|
||||
import { createNoteAndResource, newOcrService, ocrSampleDir, resourceFetcher, setupDatabaseAndSynchronizer, supportDir, switchClient, synchronizerStart } from '../../testing/test-utils';
|
||||
import { supportedMimeTypes } from './OcrService';
|
||||
import { createWorker } from 'tesseract.js';
|
||||
import Resource from '../../models/Resource';
|
||||
import { ResourceEntity, ResourceOcrStatus } from '../database/types';
|
||||
import { msleep } from '@joplin/utils/time';
|
||||
import Logger from '@joplin/utils/Logger';
|
||||
|
||||
const newService = () => {
|
||||
const driver = new OcrDriverTesseract({ createWorker });
|
||||
return new OcrService(driver);
|
||||
};
|
||||
|
||||
describe('OcrService', () => {
|
||||
|
||||
beforeEach(async () => {
|
||||
@ -31,7 +23,7 @@ describe('OcrService', () => {
|
||||
|
||||
expect(await Resource.needOcrCount(supportedMimeTypes)).toBe(3);
|
||||
|
||||
const service = newService();
|
||||
const service = newOcrService();
|
||||
await service.processResources();
|
||||
|
||||
const expectedText = 'This is a lot of 12 point text to test the\n' +
|
||||
@ -88,7 +80,7 @@ describe('OcrService', () => {
|
||||
it('should process PDF resources', async () => {
|
||||
const { resource } = await createNoteAndResource({ path: `${ocrSampleDir}/dummy.pdf` });
|
||||
|
||||
const service = newService();
|
||||
const service = newOcrService();
|
||||
|
||||
await service.processResources();
|
||||
|
||||
@ -111,7 +103,7 @@ describe('OcrService', () => {
|
||||
|
||||
await msleep(1);
|
||||
|
||||
const service = newService();
|
||||
const service = newOcrService();
|
||||
|
||||
await service.processResources();
|
||||
|
||||
@ -157,7 +149,7 @@ describe('OcrService', () => {
|
||||
fetch_error: 'cannot be downloaded',
|
||||
});
|
||||
|
||||
const service = newService();
|
||||
const service = newOcrService();
|
||||
|
||||
// The service will print a warnign so we disable it in tests
|
||||
Logger.globalLogger.enabled = false;
|
||||
@ -196,7 +188,7 @@ describe('OcrService', () => {
|
||||
it('should handle conflicts if two clients process the same resource then sync', async () => {
|
||||
await createNoteAndResource({ path: `${ocrSampleDir}/dummy.pdf` });
|
||||
|
||||
const service1 = newService();
|
||||
const service1 = newOcrService();
|
||||
await synchronizerStart();
|
||||
await service1.processResources();
|
||||
|
||||
@ -205,7 +197,7 @@ describe('OcrService', () => {
|
||||
await synchronizerStart();
|
||||
await msleep(1);
|
||||
await resourceFetcher().startAndWait();
|
||||
const service2 = newService();
|
||||
const service2 = newOcrService();
|
||||
await service2.processResources();
|
||||
await synchronizerStart();
|
||||
const expectedResouceUpatedTime = (await Resource.all())[0].updated_time;
|
||||
@ -236,7 +228,7 @@ describe('OcrService', () => {
|
||||
// it('should process resources 2', async () => {
|
||||
// await createNoteAndResource({ path: `${require('os').homedir()}/Desktop/AllClients.png` });
|
||||
|
||||
// const service = newService();
|
||||
// const service = newOcrService();
|
||||
// await service.processResources();
|
||||
|
||||
// console.info(await Resource.all());
|
||||
|
@ -1,5 +1,8 @@
|
||||
import { ModelType } from '../../BaseModel';
|
||||
import Note from '../../models/Note';
|
||||
import Resource from '../../models/Resource';
|
||||
import { db, msleep, setupDatabaseAndSynchronizer, switchClient } from '../../testing/test-utils';
|
||||
import shim from '../../shim';
|
||||
import { db, msleep, newOcrService, ocrSampleDir, resourceService, setupDatabaseAndSynchronizer, switchClient } from '../../testing/test-utils';
|
||||
import { ResourceOcrStatus } from '../database/types';
|
||||
import SearchEngine from './SearchEngine';
|
||||
|
||||
@ -35,6 +38,27 @@ describe('SearchEngine.resources', () => {
|
||||
expect(normalized[0].body).toBe('hello, how are you ?');
|
||||
});
|
||||
|
||||
it('should return notes associated with indexed resources', (async () => {
|
||||
const note1 = await Note.save({});
|
||||
await Note.save({});
|
||||
await shim.attachFileToNote(note1, `${ocrSampleDir}/testocr.png`);
|
||||
const resource = (await Resource.all())[0];
|
||||
|
||||
await resourceService().indexNoteResources();
|
||||
|
||||
const ocrService = newOcrService();
|
||||
await ocrService.processResources();
|
||||
|
||||
const searchEngine = newSearchEngine();
|
||||
await searchEngine.syncTables();
|
||||
|
||||
const results = await searchEngine.search('lazy fox');
|
||||
expect(results.length).toBe(1);
|
||||
expect(results[0].id).toBe(note1.id);
|
||||
expect(results[0].item_id).toBe(resource.id);
|
||||
expect(results[0].item_type).toBe(ModelType.Resource);
|
||||
}));
|
||||
|
||||
it('should delete normalized data when a resource is deleted', async () => {
|
||||
const engine = newSearchEngine();
|
||||
|
||||
|
@ -534,4 +534,5 @@ describe('services/SearchEngine', () => {
|
||||
expect((await engine.search(n2.id))[0].id).toBe(n2.id);
|
||||
expect(await engine.search(f1.id)).toEqual([]);
|
||||
}));
|
||||
|
||||
});
|
||||
|
@ -63,6 +63,9 @@ const { Dirnames } = require('../services/synchronizer/utils/types');
|
||||
import RSA from '../services/e2ee/RSA.node';
|
||||
import { State as ShareState } from '../services/share/reducer';
|
||||
import initLib from '../initLib';
|
||||
import OcrDriverTesseract from '../services/ocr/drivers/OcrDriverTesseract';
|
||||
import OcrService from '../services/ocr/OcrService';
|
||||
import { createWorker } from 'tesseract.js';
|
||||
|
||||
// Each suite has its own separate data and temp directory so that multiple
|
||||
// suites can be run at the same time. suiteName is what is used to
|
||||
@ -1027,4 +1030,9 @@ const simulateReadOnlyShareEnv = (shareId: string) => {
|
||||
};
|
||||
};
|
||||
|
||||
export const newOcrService = () => {
|
||||
const driver = new OcrDriverTesseract({ createWorker });
|
||||
return new OcrService(driver);
|
||||
};
|
||||
|
||||
export { supportDir, createNoteAndResource, createTempFile, createTestShareData, simulateReadOnlyShareEnv, waitForFolderCount, afterAllCleanUp, exportDir, synchronizerStart, afterEachCleanUp, syncTargetName, setSyncTargetName, syncDir, createTempDir, isNetworkSyncTarget, kvStore, expectThrow, logger, expectNotThrow, resourceService, resourceFetcher, tempFilePath, allSyncTargetItemsEncrypted, msleep, setupDatabase, revisionService, setupDatabaseAndSynchronizer, db, synchronizer, fileApi, sleep, clearDatabase, switchClient, syncTargetId, objectsEqual, checkThrowAsync, checkThrow, encryptionService, loadEncryptionMasterKey, fileContentEqual, decryptionWorker, currentClientId, id, ids, sortedIds, at, createNTestNotes, createNTestFolders, createNTestTags, TestApp };
|
||||
|
Reference in New Issue
Block a user