You've already forked joplin
mirror of
https://github.com/laurent22/joplin.git
synced 2025-07-16 00:14:34 +02:00
Chore: Added test for OCR search
This commit is contained in:
@ -1,18 +1,10 @@
|
|||||||
import { createNoteAndResource, ocrSampleDir, resourceFetcher, setupDatabaseAndSynchronizer, supportDir, switchClient, synchronizerStart } from '../../testing/test-utils';
|
import { createNoteAndResource, newOcrService, ocrSampleDir, resourceFetcher, setupDatabaseAndSynchronizer, supportDir, switchClient, synchronizerStart } from '../../testing/test-utils';
|
||||||
import OcrDriverTesseract from './drivers/OcrDriverTesseract';
|
|
||||||
import OcrService from './OcrService';
|
|
||||||
import { supportedMimeTypes } from './OcrService';
|
import { supportedMimeTypes } from './OcrService';
|
||||||
import { createWorker } from 'tesseract.js';
|
|
||||||
import Resource from '../../models/Resource';
|
import Resource from '../../models/Resource';
|
||||||
import { ResourceEntity, ResourceOcrStatus } from '../database/types';
|
import { ResourceEntity, ResourceOcrStatus } from '../database/types';
|
||||||
import { msleep } from '@joplin/utils/time';
|
import { msleep } from '@joplin/utils/time';
|
||||||
import Logger from '@joplin/utils/Logger';
|
import Logger from '@joplin/utils/Logger';
|
||||||
|
|
||||||
const newService = () => {
|
|
||||||
const driver = new OcrDriverTesseract({ createWorker });
|
|
||||||
return new OcrService(driver);
|
|
||||||
};
|
|
||||||
|
|
||||||
describe('OcrService', () => {
|
describe('OcrService', () => {
|
||||||
|
|
||||||
beforeEach(async () => {
|
beforeEach(async () => {
|
||||||
@ -31,7 +23,7 @@ describe('OcrService', () => {
|
|||||||
|
|
||||||
expect(await Resource.needOcrCount(supportedMimeTypes)).toBe(3);
|
expect(await Resource.needOcrCount(supportedMimeTypes)).toBe(3);
|
||||||
|
|
||||||
const service = newService();
|
const service = newOcrService();
|
||||||
await service.processResources();
|
await service.processResources();
|
||||||
|
|
||||||
const expectedText = 'This is a lot of 12 point text to test the\n' +
|
const expectedText = 'This is a lot of 12 point text to test the\n' +
|
||||||
@ -88,7 +80,7 @@ describe('OcrService', () => {
|
|||||||
it('should process PDF resources', async () => {
|
it('should process PDF resources', async () => {
|
||||||
const { resource } = await createNoteAndResource({ path: `${ocrSampleDir}/dummy.pdf` });
|
const { resource } = await createNoteAndResource({ path: `${ocrSampleDir}/dummy.pdf` });
|
||||||
|
|
||||||
const service = newService();
|
const service = newOcrService();
|
||||||
|
|
||||||
await service.processResources();
|
await service.processResources();
|
||||||
|
|
||||||
@ -111,7 +103,7 @@ describe('OcrService', () => {
|
|||||||
|
|
||||||
await msleep(1);
|
await msleep(1);
|
||||||
|
|
||||||
const service = newService();
|
const service = newOcrService();
|
||||||
|
|
||||||
await service.processResources();
|
await service.processResources();
|
||||||
|
|
||||||
@ -157,7 +149,7 @@ describe('OcrService', () => {
|
|||||||
fetch_error: 'cannot be downloaded',
|
fetch_error: 'cannot be downloaded',
|
||||||
});
|
});
|
||||||
|
|
||||||
const service = newService();
|
const service = newOcrService();
|
||||||
|
|
||||||
// The service will print a warnign so we disable it in tests
|
// The service will print a warnign so we disable it in tests
|
||||||
Logger.globalLogger.enabled = false;
|
Logger.globalLogger.enabled = false;
|
||||||
@ -196,7 +188,7 @@ describe('OcrService', () => {
|
|||||||
it('should handle conflicts if two clients process the same resource then sync', async () => {
|
it('should handle conflicts if two clients process the same resource then sync', async () => {
|
||||||
await createNoteAndResource({ path: `${ocrSampleDir}/dummy.pdf` });
|
await createNoteAndResource({ path: `${ocrSampleDir}/dummy.pdf` });
|
||||||
|
|
||||||
const service1 = newService();
|
const service1 = newOcrService();
|
||||||
await synchronizerStart();
|
await synchronizerStart();
|
||||||
await service1.processResources();
|
await service1.processResources();
|
||||||
|
|
||||||
@ -205,7 +197,7 @@ describe('OcrService', () => {
|
|||||||
await synchronizerStart();
|
await synchronizerStart();
|
||||||
await msleep(1);
|
await msleep(1);
|
||||||
await resourceFetcher().startAndWait();
|
await resourceFetcher().startAndWait();
|
||||||
const service2 = newService();
|
const service2 = newOcrService();
|
||||||
await service2.processResources();
|
await service2.processResources();
|
||||||
await synchronizerStart();
|
await synchronizerStart();
|
||||||
const expectedResouceUpatedTime = (await Resource.all())[0].updated_time;
|
const expectedResouceUpatedTime = (await Resource.all())[0].updated_time;
|
||||||
@ -236,7 +228,7 @@ describe('OcrService', () => {
|
|||||||
// it('should process resources 2', async () => {
|
// it('should process resources 2', async () => {
|
||||||
// await createNoteAndResource({ path: `${require('os').homedir()}/Desktop/AllClients.png` });
|
// await createNoteAndResource({ path: `${require('os').homedir()}/Desktop/AllClients.png` });
|
||||||
|
|
||||||
// const service = newService();
|
// const service = newOcrService();
|
||||||
// await service.processResources();
|
// await service.processResources();
|
||||||
|
|
||||||
// console.info(await Resource.all());
|
// console.info(await Resource.all());
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
|
import { ModelType } from '../../BaseModel';
|
||||||
|
import Note from '../../models/Note';
|
||||||
import Resource from '../../models/Resource';
|
import Resource from '../../models/Resource';
|
||||||
import { db, msleep, setupDatabaseAndSynchronizer, switchClient } from '../../testing/test-utils';
|
import shim from '../../shim';
|
||||||
|
import { db, msleep, newOcrService, ocrSampleDir, resourceService, setupDatabaseAndSynchronizer, switchClient } from '../../testing/test-utils';
|
||||||
import { ResourceOcrStatus } from '../database/types';
|
import { ResourceOcrStatus } from '../database/types';
|
||||||
import SearchEngine from './SearchEngine';
|
import SearchEngine from './SearchEngine';
|
||||||
|
|
||||||
@ -35,6 +38,27 @@ describe('SearchEngine.resources', () => {
|
|||||||
expect(normalized[0].body).toBe('hello, how are you ?');
|
expect(normalized[0].body).toBe('hello, how are you ?');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should return notes associated with indexed resources', (async () => {
|
||||||
|
const note1 = await Note.save({});
|
||||||
|
await Note.save({});
|
||||||
|
await shim.attachFileToNote(note1, `${ocrSampleDir}/testocr.png`);
|
||||||
|
const resource = (await Resource.all())[0];
|
||||||
|
|
||||||
|
await resourceService().indexNoteResources();
|
||||||
|
|
||||||
|
const ocrService = newOcrService();
|
||||||
|
await ocrService.processResources();
|
||||||
|
|
||||||
|
const searchEngine = newSearchEngine();
|
||||||
|
await searchEngine.syncTables();
|
||||||
|
|
||||||
|
const results = await searchEngine.search('lazy fox');
|
||||||
|
expect(results.length).toBe(1);
|
||||||
|
expect(results[0].id).toBe(note1.id);
|
||||||
|
expect(results[0].item_id).toBe(resource.id);
|
||||||
|
expect(results[0].item_type).toBe(ModelType.Resource);
|
||||||
|
}));
|
||||||
|
|
||||||
it('should delete normalized data when a resource is deleted', async () => {
|
it('should delete normalized data when a resource is deleted', async () => {
|
||||||
const engine = newSearchEngine();
|
const engine = newSearchEngine();
|
||||||
|
|
||||||
|
@ -534,4 +534,5 @@ describe('services/SearchEngine', () => {
|
|||||||
expect((await engine.search(n2.id))[0].id).toBe(n2.id);
|
expect((await engine.search(n2.id))[0].id).toBe(n2.id);
|
||||||
expect(await engine.search(f1.id)).toEqual([]);
|
expect(await engine.search(f1.id)).toEqual([]);
|
||||||
}));
|
}));
|
||||||
|
|
||||||
});
|
});
|
||||||
|
@ -63,6 +63,9 @@ const { Dirnames } = require('../services/synchronizer/utils/types');
|
|||||||
import RSA from '../services/e2ee/RSA.node';
|
import RSA from '../services/e2ee/RSA.node';
|
||||||
import { State as ShareState } from '../services/share/reducer';
|
import { State as ShareState } from '../services/share/reducer';
|
||||||
import initLib from '../initLib';
|
import initLib from '../initLib';
|
||||||
|
import OcrDriverTesseract from '../services/ocr/drivers/OcrDriverTesseract';
|
||||||
|
import OcrService from '../services/ocr/OcrService';
|
||||||
|
import { createWorker } from 'tesseract.js';
|
||||||
|
|
||||||
// Each suite has its own separate data and temp directory so that multiple
|
// Each suite has its own separate data and temp directory so that multiple
|
||||||
// suites can be run at the same time. suiteName is what is used to
|
// suites can be run at the same time. suiteName is what is used to
|
||||||
@ -1027,4 +1030,9 @@ const simulateReadOnlyShareEnv = (shareId: string) => {
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const newOcrService = () => {
|
||||||
|
const driver = new OcrDriverTesseract({ createWorker });
|
||||||
|
return new OcrService(driver);
|
||||||
|
};
|
||||||
|
|
||||||
export { supportDir, createNoteAndResource, createTempFile, createTestShareData, simulateReadOnlyShareEnv, waitForFolderCount, afterAllCleanUp, exportDir, synchronizerStart, afterEachCleanUp, syncTargetName, setSyncTargetName, syncDir, createTempDir, isNetworkSyncTarget, kvStore, expectThrow, logger, expectNotThrow, resourceService, resourceFetcher, tempFilePath, allSyncTargetItemsEncrypted, msleep, setupDatabase, revisionService, setupDatabaseAndSynchronizer, db, synchronizer, fileApi, sleep, clearDatabase, switchClient, syncTargetId, objectsEqual, checkThrowAsync, checkThrow, encryptionService, loadEncryptionMasterKey, fileContentEqual, decryptionWorker, currentClientId, id, ids, sortedIds, at, createNTestNotes, createNTestFolders, createNTestTags, TestApp };
|
export { supportDir, createNoteAndResource, createTempFile, createTestShareData, simulateReadOnlyShareEnv, waitForFolderCount, afterAllCleanUp, exportDir, synchronizerStart, afterEachCleanUp, syncTargetName, setSyncTargetName, syncDir, createTempDir, isNetworkSyncTarget, kvStore, expectThrow, logger, expectNotThrow, resourceService, resourceFetcher, tempFilePath, allSyncTargetItemsEncrypted, msleep, setupDatabase, revisionService, setupDatabaseAndSynchronizer, db, synchronizer, fileApi, sleep, clearDatabase, switchClient, syncTargetId, objectsEqual, checkThrowAsync, checkThrow, encryptionService, loadEncryptionMasterKey, fileContentEqual, decryptionWorker, currentClientId, id, ids, sortedIds, at, createNTestNotes, createNTestFolders, createNTestTags, TestApp };
|
||||||
|
Reference in New Issue
Block a user