From cbdc98553a6dae62d033ed4ef0f33567e9bddc99 Mon Sep 17 00:00:00 2001 From: pedr Date: Thu, 31 Jul 2025 12:42:03 -0300 Subject: [PATCH] Desktop, Server: Add transcribe functionality to Desktop though Joplin Server (#12670) --- .eslintignore | 3 + .gitignore | 3 + packages/app-desktop/app.ts | 11 +- .../gui/NoteEditor/utils/contextMenu.ts | 39 +++- packages/lib/SyncTargetRegistry.ts | 8 + packages/lib/models/Resource.ts | 3 +- .../lib/models/settings/builtInMetadata.ts | 10 + .../lib/services/database/migrations/48.ts | 7 + .../lib/services/database/migrations/index.ts | 2 + packages/lib/services/database/types.ts | 11 +- packages/lib/services/ocr/OcrDriverBase.ts | 7 +- packages/lib/services/ocr/OcrService.ts | 75 +++++--- .../ocr/drivers/OcrDriverTesseract.ts | 13 +- .../ocr/drivers/OcrDriverTranscribe.test.ts | 107 +++++++++++ .../ocr/drivers/OcrDriverTranscribe.ts | 134 +++++++++++++ packages/lib/services/ocr/utils/types.ts | 14 +- packages/lib/testing/test-utils.ts | 2 +- packages/server/assets/tests/htr_example.png | Bin 0 -> 25377 bytes packages/server/src/env.ts | 12 ++ .../server/src/routes/api/transcribe.test.ts | 177 ++++++++++++++++++ packages/server/src/routes/api/transcribe.ts | 97 ++++++++++ packages/server/src/routes/routes.ts | 2 + packages/server/src/utils/errors.ts | 31 +++ readme/privacy.md | 1 + 24 files changed, 728 insertions(+), 41 deletions(-) create mode 100644 packages/lib/services/database/migrations/48.ts create mode 100644 packages/lib/services/ocr/drivers/OcrDriverTranscribe.test.ts create mode 100644 packages/lib/services/ocr/drivers/OcrDriverTranscribe.ts create mode 100644 packages/server/assets/tests/htr_example.png create mode 100644 packages/server/src/routes/api/transcribe.test.ts create mode 100644 packages/server/src/routes/api/transcribe.ts diff --git a/.eslintignore b/.eslintignore index e1a62ea017..025d324ce3 100644 --- a/.eslintignore +++ b/.eslintignore @@ -1344,6 +1344,7 @@ packages/lib/services/database/migrations/44.js packages/lib/services/database/migrations/45.js packages/lib/services/database/migrations/46.js packages/lib/services/database/migrations/47.js +packages/lib/services/database/migrations/48.js packages/lib/services/database/migrations/index.js packages/lib/services/database/sqlStringToLines.js packages/lib/services/database/types.js @@ -1412,6 +1413,8 @@ packages/lib/services/ocr/OcrDriverBase.js packages/lib/services/ocr/OcrService.test.js packages/lib/services/ocr/OcrService.js packages/lib/services/ocr/drivers/OcrDriverTesseract.js +packages/lib/services/ocr/drivers/OcrDriverTranscribe.test.js +packages/lib/services/ocr/drivers/OcrDriverTranscribe.js packages/lib/services/ocr/utils/filterOcrText.test.js packages/lib/services/ocr/utils/filterOcrText.js packages/lib/services/ocr/utils/types.js diff --git a/.gitignore b/.gitignore index 1a9ec7e911..a3dd43499a 100644 --- a/.gitignore +++ b/.gitignore @@ -1317,6 +1317,7 @@ packages/lib/services/database/migrations/44.js packages/lib/services/database/migrations/45.js packages/lib/services/database/migrations/46.js packages/lib/services/database/migrations/47.js +packages/lib/services/database/migrations/48.js packages/lib/services/database/migrations/index.js packages/lib/services/database/sqlStringToLines.js packages/lib/services/database/types.js @@ -1385,6 +1386,8 @@ packages/lib/services/ocr/OcrDriverBase.js packages/lib/services/ocr/OcrService.test.js packages/lib/services/ocr/OcrService.js packages/lib/services/ocr/drivers/OcrDriverTesseract.js +packages/lib/services/ocr/drivers/OcrDriverTranscribe.test.js +packages/lib/services/ocr/drivers/OcrDriverTranscribe.js packages/lib/services/ocr/utils/filterOcrText.test.js packages/lib/services/ocr/utils/filterOcrText.js packages/lib/services/ocr/utils/types.js diff --git a/packages/app-desktop/app.ts b/packages/app-desktop/app.ts index c3727b1639..61e6b4db23 100644 --- a/packages/app-desktop/app.ts +++ b/packages/app-desktop/app.ts @@ -55,11 +55,13 @@ import userFetcher, { initializeUserFetcher } from '@joplin/lib/utils/userFetche import { parseNotesParent } from '@joplin/lib/reducer'; import OcrService from '@joplin/lib/services/ocr/OcrService'; import OcrDriverTesseract from '@joplin/lib/services/ocr/drivers/OcrDriverTesseract'; +import OcrDriverTranscribe from '@joplin/lib/services/ocr/drivers/OcrDriverTranscribe'; import SearchEngine from '@joplin/lib/services/search/SearchEngine'; import { PackageInfo } from '@joplin/lib/versionInfo'; import { CustomProtocolHandler } from './utils/customProtocols/handleCustomProtocols'; import { refreshFolders } from '@joplin/lib/folders-screen-utils'; import initializeCommandService from './utils/initializeCommandService'; +import OcrDriverBase from '@joplin/lib/services/ocr/OcrDriverBase'; import PerformanceLogger from '@joplin/lib/PerformanceLogger'; const perfLogger = PerformanceLogger.create('app-desktop/app'); @@ -353,16 +355,19 @@ class Application extends BaseApplication { // eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied const Tesseract = (window as any).Tesseract; - const driver = new OcrDriverTesseract( + const drivers: OcrDriverBase[] = []; + drivers.push(new OcrDriverTesseract( { createWorker: Tesseract.createWorker }, { workerPath: `${bridge().buildDir()}/tesseract.js/worker.min.js`, corePath: `${bridge().buildDir()}/tesseract.js-core`, languageDataPath: Setting.value('ocr.languageDataPath') || null, }, - ); + )); - this.ocrService_ = new OcrService(driver); + drivers.push(new OcrDriverTranscribe()); + + this.ocrService_ = new OcrService(drivers); } void this.ocrService_.runInBackground(); diff --git a/packages/app-desktop/gui/NoteEditor/utils/contextMenu.ts b/packages/app-desktop/gui/NoteEditor/utils/contextMenu.ts index f1fe8559ff..cef133e96d 100644 --- a/packages/app-desktop/gui/NoteEditor/utils/contextMenu.ts +++ b/packages/app-desktop/gui/NoteEditor/utils/contextMenu.ts @@ -8,14 +8,15 @@ const MenuItem = bridge().MenuItem; import Resource, { resourceOcrStatusToString } from '@joplin/lib/models/Resource'; import BaseItem from '@joplin/lib/models/BaseItem'; import BaseModel, { ModelType } from '@joplin/lib/BaseModel'; -import { NoteEntity, ResourceEntity, ResourceOcrStatus } from '@joplin/lib/services/database/types'; +import { NoteEntity, ResourceEntity, ResourceOcrDriverId, ResourceOcrStatus } from '@joplin/lib/services/database/types'; import { TinyMceEditorEvents } from '../NoteBody/TinyMCE/utils/types'; import { itemIsReadOnlySync, ItemSlice } from '@joplin/lib/models/utils/readOnly'; import Setting from '@joplin/lib/models/Setting'; import ItemChange from '@joplin/lib/models/ItemChange'; -import shim from '@joplin/lib/shim'; +import shim, { MessageBoxType } from '@joplin/lib/shim'; import { openFileWithExternalEditor } from '@joplin/lib/services/ExternalEditWatcher/utils'; import CommandService from '@joplin/lib/services/CommandService'; +import SyncTargetRegistry from '@joplin/lib/SyncTargetRegistry'; const fs = require('fs-extra'); const { writeFile } = require('fs-extra'); const { clipboard } = require('electron'); @@ -137,6 +138,40 @@ export function menuItems(dispatch: Function): ContextMenuItems { }, isActive: (itemType: ContextMenuItemType, options: ContextMenuOptions) => !!options.textToCopy && itemType === ContextMenuItemType.Image && options.mime?.startsWith('image/svg'), }, + recognizeHandwrittenImage: { + label: _('Recognize handwritten image'), + onAction: async (options: ContextMenuOptions) => { + const syncTargetId = Setting.value('sync.target'); + if (!SyncTargetRegistry.isJoplinServerOrCloud(syncTargetId)) { + await shim.showMessageBox(_('This feature is only available on Joplin Cloud and Joplin Server.'), { type: MessageBoxType.Error }); + return; + } + + if (!Setting.value('ocr.handwrittenTextDriverEnabled')) { + await shim.showMessageBox(_('This feature is disabled by default, you need to manually enable it by turning on the option to \'Enable handwritten transcription\'.'), { type: MessageBoxType.Error }); + return; + } + + const { resource } = await resourceInfo(options); + + if (!['image/png', 'image/jpg', 'image/jpeg', 'image/bmp'].includes(resource.mime)) { + await shim.showMessageBox(_('This image type is not supported by the recognition system.'), { type: MessageBoxType.Error }); + return; + } + + await Resource.save({ + id: resource.id, + ocr_status: ResourceOcrStatus.Todo, + ocr_driver_id: ResourceOcrDriverId.HandwrittenText, + ocr_details: '', + ocr_error: '', + ocr_text: '', + }); + }, + isActive: (itemType: ContextMenuItemType, options: ContextMenuOptions) => { + return itemType === ContextMenuItemType.Resource || (itemType === ContextMenuItemType.Image && options.resourceId); + }, + }, revealInFolder: { label: _('Reveal file in folder'), onAction: async (options: ContextMenuOptions) => { diff --git a/packages/lib/SyncTargetRegistry.ts b/packages/lib/SyncTargetRegistry.ts index 043a72ae51..96b6e9ea3e 100644 --- a/packages/lib/SyncTargetRegistry.ts +++ b/packages/lib/SyncTargetRegistry.ts @@ -97,4 +97,12 @@ export default class SyncTargetRegistry { ]; } + public static isJoplinServerOrCloud(id: number) { + return [ + SyncTargetRegistry.nameToId('joplinServer'), + SyncTargetRegistry.nameToId('joplinCloud'), + SyncTargetRegistry.nameToId('joplinServerSaml'), + ].includes(id); + } + } diff --git a/packages/lib/models/Resource.ts b/packages/lib/models/Resource.ts index 43f5d634f9..ee02defe5c 100644 --- a/packages/lib/models/Resource.ts +++ b/packages/lib/models/Resource.ts @@ -518,12 +518,13 @@ export default class Resource extends BaseItem { SELECT ${selectSql} FROM resources WHERE - ocr_status = ? AND + (ocr_status = ? or ocr_status = ?) AND encryption_applied = 0 AND mime IN ('${supportedMimeTypes.join('\',\'')}') `, params: [ ResourceOcrStatus.Todo, + ResourceOcrStatus.Processing, ], }; } diff --git a/packages/lib/models/settings/builtInMetadata.ts b/packages/lib/models/settings/builtInMetadata.ts index de202ae01b..850e493086 100644 --- a/packages/lib/models/settings/builtInMetadata.ts +++ b/packages/lib/models/settings/builtInMetadata.ts @@ -556,6 +556,16 @@ const builtInMetadata = (Setting: typeof SettingType) => { isGlobal: true, }, + 'ocr.handwrittenTextDriverEnabled': { + value: true, + type: SettingItemType.Bool, + public: true, + appTypes: [AppType.Desktop], + label: () => _('Enable handwritten transcription'), + storage: SettingStorage.File, + isGlobal: true, + }, + 'ocr.languageDataPath': { value: '', type: SettingItemType.String, diff --git a/packages/lib/services/database/migrations/48.ts b/packages/lib/services/database/migrations/48.ts new file mode 100644 index 0000000000..31a3288b9a --- /dev/null +++ b/packages/lib/services/database/migrations/48.ts @@ -0,0 +1,7 @@ +import { SqlQuery } from '../types'; + +export default (): (SqlQuery|string)[] => { + return [ + 'ALTER TABLE `resources` ADD COLUMN `ocr_driver_id` INT NOT NULL DEFAULT "1"', + ]; +}; diff --git a/packages/lib/services/database/migrations/index.ts b/packages/lib/services/database/migrations/index.ts index 7f0a5f1704..0f21cccf66 100644 --- a/packages/lib/services/database/migrations/index.ts +++ b/packages/lib/services/database/migrations/index.ts @@ -5,6 +5,7 @@ import migration44 from './44'; import migration45 from './45'; import migration46 from './46'; import migration47 from './47'; +import migration48 from './48'; import { Migration } from '../types'; @@ -15,6 +16,7 @@ const index: Migration[] = [ migration45, migration46, migration47, + migration48, ]; export default index; diff --git a/packages/lib/services/database/types.ts b/packages/lib/services/database/types.ts index fe48d33c92..634bc557b1 100644 --- a/packages/lib/services/database/types.ts +++ b/packages/lib/services/database/types.ts @@ -76,6 +76,11 @@ interface DatabaseTables { [key: string]: DatabaseTable; } +export enum ResourceOcrDriverId { + PrintedText = 1, + HandwrittenText = 2, +} + // AUTO-GENERATED BY packages/tools/generate-database-types.js /* @@ -283,6 +288,7 @@ export interface ResourceEntity { 'master_key_id'?: string; 'mime'?: string; 'ocr_details'?: string; + 'ocr_driver_id'?: number; 'ocr_error'?: string; 'ocr_status'?: number; 'ocr_text'?: string; @@ -330,9 +336,9 @@ export interface SyncItemEntity { 'item_type'?: number; 'sync_disabled'?: number; 'sync_disabled_reason'?: string; - 'sync_warning_ignored'?: number; 'sync_target'?: number; 'sync_time'?: number; + 'sync_warning_ignored'?: number; 'type_'?: number; } export interface TableFieldEntity { @@ -435,9 +441,9 @@ export const databaseSchema: DatabaseTables = { item_type: { type: 'number' }, sync_disabled: { type: 'number' }, sync_disabled_reason: { type: 'string' }, - sync_warning_ignored: { type: 'number' }, sync_target: { type: 'number' }, sync_time: { type: 'number' }, + sync_warning_ignored: { type: 'number' }, type_: { type: 'number' }, }, version: { @@ -502,6 +508,7 @@ export const databaseSchema: DatabaseTables = { master_key_id: { type: 'string' }, mime: { type: 'string' }, ocr_details: { type: 'string' }, + ocr_driver_id: { type: 'number' }, ocr_error: { type: 'string' }, ocr_status: { type: 'number' }, ocr_text: { type: 'string' }, diff --git a/packages/lib/services/ocr/OcrDriverBase.ts b/packages/lib/services/ocr/OcrDriverBase.ts index 90c4418bc9..88cb905be6 100644 --- a/packages/lib/services/ocr/OcrDriverBase.ts +++ b/packages/lib/services/ocr/OcrDriverBase.ts @@ -1,11 +1,16 @@ +import { ResourceOcrDriverId } from '../database/types'; import { RecognizeResult } from './utils/types'; export default class OcrDriverBase { - public async recognize(_language: string, _filePath: string): Promise { + public async recognize(_language: string, _filePath: string, _id: string): Promise { throw new Error('Not implemented'); } public async dispose(): Promise {} + public get driverId() { + return ResourceOcrDriverId.PrintedText; + } + } diff --git a/packages/lib/services/ocr/OcrService.ts b/packages/lib/services/ocr/OcrService.ts index 5b21641600..620f53a906 100644 --- a/packages/lib/services/ocr/OcrService.ts +++ b/packages/lib/services/ocr/OcrService.ts @@ -2,12 +2,11 @@ import { toIso639Alpha3 } from '../../locale'; import Resource from '../../models/Resource'; import Setting from '../../models/Setting'; import shim from '../../shim'; -import { ResourceEntity, ResourceOcrStatus } from '../database/types'; +import { ResourceEntity, ResourceOcrDriverId, ResourceOcrStatus } from '../database/types'; import OcrDriverBase from './OcrDriverBase'; -import { RecognizeResult } from './utils/types'; +import { emptyRecognizeResult, RecognizeResult } from './utils/types'; import { Minute } from '@joplin/utils/time'; import Logger from '@joplin/utils/Logger'; -import filterOcrText from './utils/filterOcrText'; import TaskQueue from '../../TaskQueue'; import eventManager, { EventName } from '../../eventManager'; @@ -30,19 +29,24 @@ const resourceInfo = (resource: ResourceEntity) => { export default class OcrService { - private driver_: OcrDriverBase; + private drivers_: OcrDriverBase[]; private isRunningInBackground_ = false; // eslint-disable-next-line @typescript-eslint/no-explicit-any -- Old code before rule was applied private maintenanceTimer_: any = null; private pdfExtractDir_: string = null; private isProcessingResources_ = false; - private recognizeQueue_: TaskQueue = null; + private printedTextQueue_: TaskQueue = null; + private handwrittenTextQueue_: TaskQueue = null; - public constructor(driver: OcrDriverBase) { - this.driver_ = driver; - this.recognizeQueue_ = new TaskQueue('recognize', logger); - this.recognizeQueue_.setConcurrency(5); - this.recognizeQueue_.keepTaskResults = false; + public constructor(drivers: OcrDriverBase[]) { + this.drivers_ = drivers; + this.printedTextQueue_ = new TaskQueue('printed', logger); + this.printedTextQueue_.setConcurrency(5); + this.printedTextQueue_.keepTaskResults = false; + + this.handwrittenTextQueue_ = new TaskQueue('handwritten', logger); + this.handwrittenTextQueue_.setConcurrency(1); + this.handwrittenTextQueue_.keepTaskResults = false; } private async pdfExtractDir(): Promise { @@ -62,6 +66,9 @@ export default class OcrService { const resourceFilePath = Resource.fullPath(resource); + const driver = this.drivers_.find(d => d.driverId === resource.ocr_driver_id); + if (!driver) throw new Error(`Unknown driver ID: ${resource.ocr_driver_id}`); + if (resource.mime === 'application/pdf') { // OCR can be slow for large PDFs. // Skip it if the PDF already includes text. @@ -70,7 +77,9 @@ export default class OcrService { if (pagesWithText.length > 0) { return { - text: pageTexts.join('\n'), + ...emptyRecognizeResult(), + ocr_status: ResourceOcrStatus.Done, + ocr_text: pageTexts.join('\n'), }; } @@ -80,7 +89,7 @@ export default class OcrService { let pageIndex = 0; for (const imageFilePath of imageFilePaths) { logger.info(`Recognize: ${resourceInfo(resource)}: Processing PDF page ${pageIndex + 1} / ${imageFilePaths.length}...`); - results.push(await this.driver_.recognize(language, imageFilePath)); + results.push(await driver.recognize(language, imageFilePath, resource.id)); pageIndex++; } @@ -89,15 +98,19 @@ export default class OcrService { } return { - text: results.map(r => r.text).join('\n'), + ...emptyRecognizeResult(), + ocr_status: ResourceOcrStatus.Done, + ocr_text: results.map(r => r.ocr_text).join('\n'), }; } else { - return this.driver_.recognize(language, resourceFilePath); + return driver.recognize(language, resourceFilePath, resource.id); } } public async dispose() { - await this.driver_.dispose(); + for (const d of this.drivers_) { + await d.dispose(); + } } public async processResources() { @@ -115,7 +128,7 @@ export default class OcrService { return async () => { logger.info(`Processing resource ${totalProcessed + 1} / ${totalResourcesToProcess}: ${resourceInfo(resource)}...`); - const toSave: ResourceEntity = { + let toSave: ResourceEntity = { id: resource.id, }; @@ -132,11 +145,11 @@ export default class OcrService { return; } - const result = await this.recognize(language, resource); - toSave.ocr_status = ResourceOcrStatus.Done; - toSave.ocr_text = filterOcrText(result.text); - toSave.ocr_details = Resource.serializeOcrDetails(result.lines); - toSave.ocr_error = ''; + const recognizeResult = await this.recognize(language, resource); + toSave = { + ...toSave, + ...recognizeResult, + }; } catch (error) { const errorMessage = typeof error === 'string' ? error : error?.message; logger.warn(`Could not process resource ${resourceInfo(resource)}`, error); @@ -162,18 +175,29 @@ export default class OcrService { 'mime', 'file_extension', 'encryption_applied', + 'ocr_driver_id', ], }); if (!resources.length) break; - for (const resource of resources) { + const ocrResources = resources.filter(r => r.ocr_driver_id === ResourceOcrDriverId.PrintedText); + + for (const resource of ocrResources) { inProcessResourceIds.push(resource.id); - await this.recognizeQueue_.pushAsync(resource.id, makeQueueAction(totalProcessed++, language, resource)); + await this.printedTextQueue_.pushAsync(resource.id, makeQueueAction(totalProcessed++, language, resource)); + } + + const htrResources = resources.filter(r => r.ocr_driver_id === ResourceOcrDriverId.HandwrittenText); + + for (const resource of htrResources) { + inProcessResourceIds.push(resource.id); + await this.handwrittenTextQueue_.pushAsync(resource.id, makeQueueAction(totalProcessed++, language, resource)); } } - await this.recognizeQueue_.waitForAll(); + await this.printedTextQueue_.waitForAll(); + await this.handwrittenTextQueue_.waitForAll(); if (totalProcessed) { eventManager.emit(EventName.OcrServiceResourcesProcessed); @@ -212,7 +236,8 @@ export default class OcrService { if (this.maintenanceTimer_) shim.clearInterval(this.maintenanceTimer_); this.maintenanceTimer_ = null; this.isRunningInBackground_ = false; - await this.recognizeQueue_.stop(); + await this.printedTextQueue_.stop(); + await this.handwrittenTextQueue_.stop(); } } diff --git a/packages/lib/services/ocr/drivers/OcrDriverTesseract.ts b/packages/lib/services/ocr/drivers/OcrDriverTesseract.ts index e2792ee3fd..ceefc605fe 100644 --- a/packages/lib/services/ocr/drivers/OcrDriverTesseract.ts +++ b/packages/lib/services/ocr/drivers/OcrDriverTesseract.ts @@ -4,6 +4,9 @@ import OcrDriverBase from '../OcrDriverBase'; import { Minute } from '@joplin/utils/time'; import shim from '../../../shim'; import Logger from '@joplin/utils/Logger'; +import filterOcrText from '../utils/filterOcrText'; +import Resource from '../../../models/Resource'; +import { ResourceOcrDriverId, ResourceOcrStatus } from '../../database/types'; const logger = Logger.create('OcrDriverTesseract'); @@ -55,6 +58,10 @@ export default class OcrDriverTesseract extends OcrDriverBase { this.languageDataPath_ = languageDataPath; } + public get driverId() { + return ResourceOcrDriverId.PrintedText; + } + public static async clearLanguageDataCache() { if (typeof indexedDB === 'undefined') { throw new Error('Missing indexedDB access!'); @@ -224,8 +231,10 @@ export default class OcrDriverTesseract extends OcrDriverBase { // Note that Tesseract provides a `.text` property too, but it's the // concatenation of all lines, even those with a low confidence // score, so we recreate it here based on the good lines. - text: goodParagraphs.map(p => p.text).join('\n'), - lines: goodLines, + ocr_text: filterOcrText(goodParagraphs.map(p => p.text).join('\n')), + ocr_details: Resource.serializeOcrDetails(goodLines), + ocr_status: ResourceOcrStatus.Done, + ocr_error: '', }); }); } diff --git a/packages/lib/services/ocr/drivers/OcrDriverTranscribe.test.ts b/packages/lib/services/ocr/drivers/OcrDriverTranscribe.test.ts new file mode 100644 index 0000000000..75a0488aad --- /dev/null +++ b/packages/lib/services/ocr/drivers/OcrDriverTranscribe.test.ts @@ -0,0 +1,107 @@ +import Setting from '../../../models/Setting'; +import { createNoteAndResource, setupDatabaseAndSynchronizer, switchClient } from '../../../testing/test-utils'; +import { ResourceOcrStatus } from '../../database/types'; +import OcrDriverTranscribe from './OcrDriverTranscribe'; +import { reg } from '../../../registry'; + +type JobGenerated = { jobId: string }; +type GetResultPending = { state: string; jobId: string }; +type GetResultCompleted = { state: 'completed'; jobId: string; output: { result: string } }; +type GetResultFailed = { state: 'failed'; jobId: string; output: { stack: string; message: string } }; + +type Response = JobGenerated | GetResultPending | GetResultCompleted | GetResultFailed | Error; + +interface MockApi { + exec: jest.MockedFunction<( + method: string, + path: string, + query?: unknown, + body?: unknown, + headers?: Record, + options?: Record + )=> Promise>; +} + +describe('OcrDriverTranscribe', () => { + let mockApi: MockApi; + + beforeEach(async () => { + await setupDatabaseAndSynchronizer(1); + await switchClient(1); + + mockApi = { + exec: jest.fn(), + }; + + const mockApiMethod = jest.fn().mockResolvedValue(mockApi); + const mockDriver = { api: mockApiMethod }; + const mockFileApi = { driver: jest.fn().mockReturnValue(mockDriver) }; + const mockSyncTarget = { fileApi: jest.fn().mockResolvedValue(mockFileApi) }; + + reg.syncTarget = jest.fn().mockReturnValue(mockSyncTarget); + }); + + it('should return an error if synchronization target is not set', async () => { + const { resource } = await createNoteAndResource(); + const htr = new OcrDriverTranscribe(); + const response = await htr.recognize('', 'mock-path', resource.id); + + expect(response.ocr_status).toBe(ResourceOcrStatus.Error); + }); + + it('should return correct response when successful', async () => { + const { resource } = await createNoteAndResource(); + + mockApi.exec.mockResolvedValue(Promise.resolve({ jobId: 'not-a-real-job-id' })); + mockApi.exec.mockResolvedValue(Promise.resolve({ state: 'pending', jobId: 'not-a-real-job-id' })); + mockApi.exec.mockResolvedValue(Promise.resolve({ state: 'completed', jobId: 'not-a-real-job-id', output: { result: 'this is the final transcription' } })); + + const htr = new OcrDriverTranscribe([1]); + Setting.setValue('sync.target', 9); + + const response = await htr.recognize('', resource.filename, resource.id); + + expect(response.ocr_status).toBe(ResourceOcrStatus.Done); + expect(response.ocr_text).toBe('this is the final transcription'); + }); + + it('should return error when unsuccessful', async () => { + const { resource } = await createNoteAndResource(); + + mockApi.exec.mockResolvedValue(Promise.resolve({ jobId: 'not-a-real-job-id' })); + mockApi.exec.mockResolvedValue(Promise.resolve({ state: 'failed', jobId: 'not-a-real-job-id', output: { stack: '', message: 'Something went wrong' } })); + + const htr = new OcrDriverTranscribe([1]); + Setting.setValue('sync.target', 9); + + const response = await htr.recognize('', resource.filename, resource.id); + + expect(response.ocr_status).toBe(ResourceOcrStatus.Error); + expect(response.ocr_error).toEqual({ stack: '', message: 'Something went wrong' }); + }); + + it('should be able to retrieve jobId from database instead of creating a new job', async () => { + const { resource } = await createNoteAndResource(); + const jobId = 'jobIdThat should be reused latter'; + + mockApi.exec.mockResolvedValue(Promise.resolve({ jobId })); + mockApi.exec.mockImplementationOnce(() => { throw new Error('Network request failed'); }); + + const htr = new OcrDriverTranscribe([1]); + Setting.setValue('sync.target', 9); + + const response = await htr.recognize('', resource.filename, resource.id); + await htr.dispose(); + expect(response.ocr_status).toBe(ResourceOcrStatus.Todo); + expect(response.ocr_error).toBe(''); + + // Simulating closing/opening application + mockApi.exec.mockResolvedValue({ jobId, state: 'completed', output: { result: 'result' } }); + const htr2 = new OcrDriverTranscribe([1]); + + const response2 = await htr2.recognize('', resource.filename, resource.id); + expect(response2.ocr_status).toBe(ResourceOcrStatus.Done); + expect(response2.ocr_text).toBe('result'); + + }); +}); diff --git a/packages/lib/services/ocr/drivers/OcrDriverTranscribe.ts b/packages/lib/services/ocr/drivers/OcrDriverTranscribe.ts new file mode 100644 index 0000000000..412518811b --- /dev/null +++ b/packages/lib/services/ocr/drivers/OcrDriverTranscribe.ts @@ -0,0 +1,134 @@ +import { emptyRecognizeResult, RecognizeResult } from '../utils/types'; +import OcrDriverBase from '../OcrDriverBase'; +import Logger from '@joplin/utils/Logger'; +import { ResourceOcrDriverId, ResourceOcrStatus } from '../../database/types'; +import KvStore from '../../KvStore'; +import shim from '../../../shim'; +import { msleep } from '@joplin/utils/time'; +import Resource from '../../../models/Resource'; +import { reg } from '../../../registry'; + +const logger = Logger.create('OcrDriverTranscribe'); + +type CreateJobResult = { jobId: string }; + +export default class OcrDriverTranscribe extends OcrDriverBase { + + private retryIntervals_ = [10 * 1000, 15 * 1000, 30 * 1000, 60 * 1000]; + private jobIdKeyPrefix_ = 'OcrDriverTranscribe::JobId::'; + private disposed_ = false; + + public constructor(interval?: number[]) { + super(); + this.retryIntervals_ = interval ?? this.retryIntervals_; + } + + public get driverId() { + return ResourceOcrDriverId.HandwrittenText; + } + + public async recognize(_language: string, filePath: string, resourceId: string): Promise { + logger.info(`${resourceId}: Starting to recognize resource from ${filePath}`); + + const key = `${this.jobIdKeyPrefix_}${resourceId}`; + let jobId = await KvStore.instance().value(key); + + try { + if (!jobId) { + await Resource.save({ + id: resourceId, + ocr_status: ResourceOcrStatus.Processing, + }); + logger.info(`${resourceId}: Job does not exist yet, creating...`); + jobId = await this.queueJob(filePath, resourceId); + + logger.info(`${resourceId}: Job created, reference: ${jobId}`); + await KvStore.instance().setValue(key, jobId); + } + + const ocrResult = await this.checkJobIsFinished(jobId, resourceId); + await KvStore.instance().deleteValue(key); + + return { + ...emptyRecognizeResult(), + ...ocrResult, + }; + } catch (error) { + if (shim.fetchRequestCanBeRetried(error) || error.code === 503) { + return emptyRecognizeResult(); + } + await KvStore.instance().deleteValue(key); + return { + ...emptyRecognizeResult(), + ocr_status: ResourceOcrStatus.Error, + ocr_error: error.message, + }; + } + } + + private async queueJob(filePath: string, resourceId: string) { + const api = await this.api(); + + const result: CreateJobResult = await api.exec('POST', 'api/transcribe', null, null, { + 'Content-Type': 'application/octet-stream', + }, { path: filePath, source: 'file' }); + + logger.info(`${resourceId}: Job queued`); + return result.jobId; + } + + private async checkJobIsFinished(jobId: string, resourceId: string) { + logger.info(`${resourceId}: Checking if job is finished...`); + let i = 0; + while (true) { + if (this.disposed_) break; + + const api = await this.api(); + + const response = await api.exec('GET', `api/transcribe/${jobId}`); + + if (this.disposed_) break; + + if (response.state === 'completed') { + logger.info(`${resourceId}: Finished.`); + return { + ocr_status: ResourceOcrStatus.Done, + ocr_text: response.output.result, + }; + } else if (response.state === 'failed') { + logger.info(`${resourceId}: Failed.`); + return { + ocr_status: ResourceOcrStatus.Error, + ocr_error: response.output, + }; + } + + logger.info(`${resourceId}: Job not finished yet, waiting... ${this.getInterval(i)}`); + await msleep(this.getInterval(i)); + i += 1; + } + + return { + ocr_status: ResourceOcrStatus.Error, + ocr_error: 'OcrDriverTranscribe was stopped while waiting for a transcription', + }; + } + + private getInterval(index: number) { + if (index >= this.retryIntervals_.length) { + return this.retryIntervals_[this.retryIntervals_.length - 1]; + } + return this.retryIntervals_[index]; + } + + private async api() { + const fileApi = await reg.syncTarget().fileApi(); + return fileApi.driver().api(); + } + + public dispose() { + this.disposed_ = true; + return Promise.resolve(); + } + +} diff --git a/packages/lib/services/ocr/utils/types.ts b/packages/lib/services/ocr/utils/types.ts index 0cdfd1dc2d..83e6be0c1c 100644 --- a/packages/lib/services/ocr/utils/types.ts +++ b/packages/lib/services/ocr/utils/types.ts @@ -1,7 +1,11 @@ +import { ResourceOcrStatus } from '../../database/types'; + export const emptyRecognizeResult = (): RecognizeResult => { return { - text: '', - lines: [], + ocr_status: ResourceOcrStatus.Todo, + ocr_text: '', + ocr_details: '', + ocr_error: '', }; }; @@ -18,6 +22,8 @@ export interface RecognizeResultLine { } export interface RecognizeResult { - text: string; - lines?: RecognizeResultLine[]; // We do not store detailed data for PDFs + ocr_status: ResourceOcrStatus; + ocr_text: string; + ocr_details: string; + ocr_error: string; } diff --git a/packages/lib/testing/test-utils.ts b/packages/lib/testing/test-utils.ts index 24067e4c4b..dbd9cd1dc1 100644 --- a/packages/lib/testing/test-utils.ts +++ b/packages/lib/testing/test-utils.ts @@ -1116,7 +1116,7 @@ const simulateReadOnlyShareEnv = (shareIds: string[]|string, store?: Store) => { export const newOcrService = () => { const driver = new OcrDriverTesseract({ createWorker }, { workerPath: null, corePath: null, languageDataPath: null }); - return new OcrService(driver); + return new OcrService([driver]); }; export const mockMobilePlatform = (platform: string) => { diff --git a/packages/server/assets/tests/htr_example.png b/packages/server/assets/tests/htr_example.png new file mode 100644 index 0000000000000000000000000000000000000000..8c0335f5ebd4fe575f3d957cabc82f05c2288d73 GIT binary patch literal 25377 zcmeEtWmnW)*e@cDlr%^P$RLeKmnfYg-6`GONH+*bmmneCA>AF4Lw7ew=h<^V>p3sa zCpfcoxnu_Zn;qBns~!4TP8|!a|j&VGbR)y@RgS(6kFgQ1c#53 zN+{sviDDE22S*7fDI%=ons%`0;)n;GMLIEYR&AC>K}CIy053w=OvJTd%Tligp|xYl zJe;wUT}$Y)J6;5TW{^m^SZDHF#)Mz);a{{^wFnvEyso|)u}r+YNsn`QXEQ?kps7wcq%ni?`OXKsQ)ZOfggPkp{Bb`j-!y- z1KW8L&y^9Et_$F>d}@(P$ZDxegM*3e=6)Dy`P|w@NJXBtlfA^b@Cqv|EG)C}3km7V zETFgT)#SO>`&w~ck<~87?<318mmYf%$VW}Jr>o}hQAC91bcfJ`1CbW)uXXKe;WpQ5 z+_DtEr>cm^1J-}L*B8s@ZBJFWgOgWeA-qd-Guho>ihv{Recb=vN3Pw1t{)#C*zQ^I zF*<8K9*DiK)mZrN2N1$OkgYSxk!m-Lo~mCvm+l>PA4br0BAY>a92WB1$IuoGU^2XM zX~{P>rB*G0>kU)iN( zz48)WlCzQ>+vcVfE4jHhM=hdM+1{b;s5m$ZTx0}%N8UF1Cv1giFUYZj5=&D9o|agN z*~NO$DV{dx-nlGM;V86r!)En&s;Uq7H@^XbRYL=6zmvI0MI0RSkLCT}8d*ppDdX0o zC!fbN(G`Nhlz{2w`)@GbCbe^?w^+yl(F%Ag$wiehoRkNDTV=_~83Y7ob!K)FJZPx) zMv2nE;yxUObL9Ru-@z8#N^$G7HMne}eiU%{9B{HPg?Y9S5F!x-8}Zn&&QsP0R{=I& zgt`p&N~ncoMOH@{6VHstHVYNiEd>$;lp;9J?I}am3hs-&O59*7{;rw*njfxqwqGWU zoNt#xLf{eqTN1eAM=iQ0astb)++Hj@SFaC_At(D5sl;F~{jegH{S7)}-oADbwVeXj zc0XGX5{>5Us!fnK`TiYvH&!3ED$kCqrgBZ$=L4c|eyvO*z#)n7VQRts%$mx+YXJ|! zfISG!^|*}grOxO`NcZ(Q(W)WUN7;^jf+FoJFB}|164-$8{!Skkb!WuVt{NKU)57V! zqwm-^hQ09xHi>jUn_DZ@Tm|Td0;)`MBr+j=bdM5W!P9AaVao!qqL_-mY>7?Sk;%n3 zrGK|_BqN3xwsH~xrYq*ve*;j{En$cw=+L!~;Lj}zA6wz(t(a4qQC9xv07gLd-w4VD zAC=R_;{_NA*2jNw2x;fBvNlcvhzIpp_i!cib!@{5Z>#VKrA zV9&up%)wF#n@0{tB()onaeM9Q(oSpFTZ)+h<`R|dzqvH5DY!OpDmY=wmzNb&N^*4u z-}UFet&henWK|@h{3-Z^4LxcsaU2n@(%nUY3oNI>e^b1(t}7h?4WvJLr4gJmt~FNZXVhJv-L*<8I+MUPL$_{pFF)ZpUEn3zSZuHBK&@Oa(jzM)+G>b5ReaM9nSG$mvrALDE_9kG_8PKBzj zN&TA01Z}mkI+d&r4U>}?o5a%b_D^UC{IiLMkdOr#WnT;31y40ScsMI#Qft?*3}{6f zUXCW`XXBo$7xjI5Y5Z^d3%0Ef%TX=l3_ASN+;yn3KXia4?;EZqK&*iJWZ22$7147&ckfDxT%CuFWF$>HO>6 zHva;kvSNsdneQiex0ipYvNUDH6C9p9;Nl>n{r6!4!q;-ksPE27pz~m4rw?0FtA=u^ zyeOYucBXqr4#%z-8A;_#jLM4O(OsiTZWzL9^OCl|pOl>x$_3f*p~~t7C1fI~&&}pu zi85W-ezBla%RRE>W^Kht$^3`(sJsXsN)nv(2KAva>5-QYq2Oc5_1~dDGwQXIA>~;K zikP%d&hN_G98!DbuEq4)-@5tP#+TXD&{=<8EB^Mm2hw#U&Y*H z;UVnLD*Ns=JrN<<;l0Q;b0zPuP-55-sXhr`*(h@Q>iU?T!-@er^P?OEarAde zs32O=9^Yr3d1g-;XQc4eQ1AdZ+b_!o?bFS*FLg z=i_}>e%e}$F~>=7d~>^d+9p!z_qv4r`R48h_GHI#J8>xR5673Br+-nw~%48+clKBS7d>hlmRk0o95)TdYA@8?)&Db@GA|f((!_PgNwq0L@0T&QV6n$OS6h@F{zii{`W+q6Y!8i`(o-yrdK0c1cT!H`g zSC5*iGYMBN<4uvYEyr+xr-`b3N6kb>ug1Y_f{aNvphdHR0ZycV@ zs~$4ra7_{kh6rienEyx{>TTf0R#e_}V#0N^|0t4^JQ|IH$?5p&vK1dOxJFH2#*`q# zb&@mLZq$SC^s@3}PFHY;k&<3TfvI&}_PHKup*w%8OmKIUem_nMd1G_=v%y;BtWJa8 zfGjp{w=XXz>?8(^6YDtBdEPBayL>vT6@Qs#ggJis`0;3Ll3q~T#fhCB)I?W5oA-1? znImzaGnROgrv(I)d!q}iW0%yt`Lj7pS8VN6kg7_^WJVb6*+MwV)Gu)0op0Q;+N_yR zP5)t{qeV?ItS%9ssw^9^GnTsC)|z)DGktG={)`WNH}}jA_Y@6H-#5G6F>vtgA?gfg ztNi%?3iF&#`$LP&nko&h6$%uZ^B}l53df!q(^FJ8D}ULE2FZLSCB8p13tjE#Wn5J9 zH6h$4$C0oaD(PH2K6Jbxzs(kJoe3w^oehip#&evXEO4?QG_2Jg<==&tr`a`)mYQuyKv$bEyac+j~)8-rX{5iiD5%KNGXP_*S z6%|%4I$=9rc!v=>R_dZSX6|aA^PXehKfW8Y%&VqUhWn&QXrRor{h#!id-LU3S@(5s zqc3qe4^@#Q(V$|SVA0Pn_udH^M3`|g1({0*RYj(Uwy)CY(6Dj#3~XZ|f;`D7{qA6z z1w2X8@F?)|^}EFp4UYnRua7bvSjVIn<-|-Sc6mR~uAQWGn~N(q*Y~xys#pBdaOFKa z8V$TYuY1X|6yp;&MH4}OaJztb7U^ehXWL=mdTef1M{d`#s4WuGpUd06<;-v?Ietxx zwcQWeF1a9v+}Qvb%h@x%owkwl!j1aRo?cf9u8(t)M5PsKi|Xsa1O7i7jO{E2`pVn( zF2B~yg53XQozM+8_x?tiJ6yK>y(!UpsnbhIRy0uZDL0Pezy~Y|m$&jq5wT7DfW> zq9gsE_-YUPyTE8!Qf%78$*Bcn{G%VD0V(jMNp2l~iC$3Py&XC%wP9<_)fd)P{W3u- z!!8+$_38cg!MS81TJfSLgXfER6puvpvkcsq8wR#i#8muN2#NqRoS)4SJgr@9cAV42 zDYO?e_EwP_e|@8;q5_{XS4>x8_DMF5*trocTvqhEg0RSM)YaEwMwnszKkTbd3NSi6 zc-i{3G;E233~7%YX2NMKG^olU@_rf2@)Bovt1A9nbFhm8C0|DBqm*aTxm$LJOyBtD z50i99ljetB8n%=5jS)t|Xwf0x-MF{5Q_#!r(W1pOy)^%$CThk9#aqAeIzl4Q3 zVeoFzvoe;s%&HfF1hRbTD6+Lns3qYBg?KQsKER5*|N6EnMG#it#FP=e7u`NzC!ufW zxb0fsYr2(OZ-_|S8q^fjZ$G5F`iPEY;dSQBeCBtU5_u)y{7v&H?p*WFr+Y42gWCFx z$c3^_e+WcqJH&!kJYN*?;DZhomFl%ok&~+3uan!A0M=LRBhBH({E}IFGq1-|wy@q0 z=3yYO^!Jypi79b8bN>ehrvANaaut7+cDB#ntUk#ay2PA$Jzu6-vRGp42szwR5-p@X|G}bO zjS(Xgcs#zqeDwG-q`iN1zMC(kH&XsZ-?6mK01X84_B4tw<5@byCCc1mZkV&_vh$gV zO{7-2u?^FKpByp(b8OBinj#*W!J_xxhnB*gUg3!Im?1*FFqq5J3u%{>^Sze!lFGZ| zOAA4Qz^A7-RDaKNLR_Kp-0eYIy(l2RcDqzAole73sY6p#HhR9=$4*d22u&TzOdm*L zpg2()?hiX$Yp-I%5oQd6P}#@I`^7QEVnQ;KkaVY3xDQeOlBe}eG-z4K&O%}ErWRhZ zH)Zwy`w>+UPfU6mjR+z-;UCno*CPuAe@q*VIydW2LvU0Lb=@o z<{CPl9q!{L(~}{Edd3olw_=*S5yc$242!zADU}*-QqbKAB-nP&q!TBDQyRlj;wnJZ zzdqF!U!PFU1!OHa>4Y+klY_Xba-*cY{~^VwDjMFAwMghhWVjzolRgmIixJx1rk1Dr z5{GASKQqOKi2>WWhM1SNHh|Hw}WYq3T@~XkBhOUqRN>~xdiW6heM38py z)~rKO_fXuh2(g{^QyB#NRH2>Q(7bY9S5u{nJnL6y!t7S^^p6pDXFF!{JA-BY#-qTg z5}=#llRr+l-k;XLCighxh+Pp;swWFE@+~@0in@>cy@UYw+>ydOaS~+@Lrzdvo2n_} z6=7Kaft-ZM#>+AUhydZ=C<6U$3XU0S2-Z>BUa6F`BtFYgtoOAvo6bEamN^URr$V&* zf1TRgR#NZQe!X%LcTiFOgp5X0WtbPa;u&{j^wy{HobpV*QiDo6$c=>X^qWOCueg!7Z970>8IiVH3m?N$)F|Hdff^T#zi$@*pefhASD+oVXU+)@*t zWWIPtcRFGkGqp3(@~~Y0B7-Dt)i?Vb5U`Ke?Wp?WzJ9(v0N=f*!z8TYY(z`d-dxLz z1}6@I_<26&@N%`ZU)*Oh2n5Cx42wjslN?Mc;=$vKz~ennq$~1zWr;}q75snk+hd(o zGaoJ^sau6v5>BZDLgC|X{@i=^ZRzZ;sL_o2Ov&* zN!maXHzyYv*37FPo0vb@x;& z$5BmtkGC5tj=Ap;udQ5h_`&~#4VT?u;j!VtS2(uv4e6xBqqxcmv-BO+XiVM9=}VmJ z?!7Ooqhm%&wwFVA+E-#CBF+C?ZS7}6KLtXlsJ~F1mI+0i?g&&@OXoENn+frRhCA}_ z&KfCwhFVVFYq`I#4sl3tlnz1bQB;-d5fje-t9GjW%yOpnt9~Y#yM!2Nc4H_|2z-1 z+zPZbM!KQc9sL1;2vVm?=jKv+cRunSFjx=>Zh7W80ud)tRRdL*)r3_HBdSAl5KTd2 zB1kXJPhB0g{-7&MN4;ynnOT;%VV6c??##O)5AUBz{@RS)QTj{I_2kY9#{7ZRC&FLC z?M?lXu16`74IWqfHFiGfC$|wXr{rD_EfHqg57CSzgB>efCPt)+8cTOVYU(si|2*e*dV~&57akOF4=A} zo6Vn1wPYVm+8ec%+1RL&qICtI8PZ&BG4Ef72D8W++OB;s-u6f4Q-}-h(?G1ETyoJO z`#w8q+#QRt+0i?c8DChmPl$sP>c+%<`X+qJ3pWjrZaqCql)^o}SAu!v1MCIb4iFlH zLqms(qIw*_ePi@#i7?RKA&a_iT)0=gDI-Tl=p@7sgg~e&6`S<1GTmeDZaxWne)Zx* zE7VX>nhMMIFEKT(_}Mb=)8KvVCxaA0{wgq(xPsgL04@h(XSpiGh(oI~b1|DEcbpbN zNRgTc(;E9K+EC)HtA^u9^{t;&2mxM1pdd)~p?mvcSR$4X?@0ncrl|mPGglkznHhc z+f@^gi& zU(0!6Iink#n@aEA++u6pP`drECVNi>=xwpy-(A!Wt_@^?D5&oLE5{65%-;`m;|S?! z;U7_QW?qZa&o4UgIwu1@x5{IgvCo(1*Q4GIq}Z09`lRoE^m`!@g4w>)u0Rw^ME=>I z=BpF>5id_cu}!bQ@nIL1CK)tiS09LudSBU8EQLY{%ThNgk|kP<;yjNd{o3q@Ge|(2 zcJ8vWQm0xQCASZ+Kb(b!jCjc~rE zvTp<%VIgUH>J8OofXfpvnZ@NK5DSq45QuH>uSN2oqZY|_hc9of#*AK(Uml0S1Dz3- zD?KLEl1pB@b+=XXv@=1fPB5Pw$Yu*qtIU9Lt=|7ax;qeTg0cOwSkwuV;gi-tInb91 zgD@A)mrK-G%mV)A<}QY9Cv2?MP++b+$x%e@%aLteK|Yp#Pfx3V*Vt29Sijwz?rM?D z-Q1@PS_IdF3xl65QW~4pcX8dpRLfXngeW;clc)8&FZXoHoc`?WRDa{Wc|&!U@XggE z9={7YIV}_U9RoxACfV^cCTXdwGqv~b*VDqnwyRVIm^wL>{yn5MjRRhhU0c2t?Ancq zwwy*2g6+$@6(i96s*}kGr-KlPSsi}(ynk7)c6e>1jG9o)(tG>h1lBr@SKWer{^UB_ z>)P4@QLx@Bs2amXHC!O6P&=?V>C3E>PFxGoF_B+Gi7+@Q}2e!Zx6x0bM1AgPWdGZu_~y2Ly;QR$5wp zxVG}C4~xw%2kWkxn;b{Nwvr*Baz~A>?|UqWhIT?26X#OT7&P>1=WvRvF7sK`n{>2- z*=_7AbzhX;n`;x3-y2~`na?d;rtzVXRsf3UYP!mu$?0%HqhJf~m=OVB3b|%A)WTOX zF^B+N=aHEr`rfQbRTrw5!vSf&m*L!+*5AZDE$Hu$i~TP6*CA3qWJjVgjMQ$OK@+P) z7uX2s(ek}sgwbQ^8S;-MPo+qW6;fx_7X)366aJY8V_tq0#_||)PuuhhXLFpu&q{OG z_L=LBLSw&q|KU7n!TvO0w`lvO_Nd2L)ovNN(e^=FbAcquaW+LadVYDx?(W|$`m9ID zQbXLuo%!j4f!(mbP9-s)_Vm$MLJO2o?af=K<|%Z6%WlC{C>@&Zzu9EwMM9iihw`xq z#OP8!OT#I1McDDVhKe_Ox|(dDKC`#aetJGYnqXDbF}S?RcTvOH7$)z{&My@+dnS!CCxu>FI`R|t znto<|_hum6#euuAaIGP+c7;?jkwf*6&aw*tbnm#9ExGY**Yv=HXqGwT#vrW*AlxIQ zz5ozkPt6`72qJE9@ZFR~1eyBXWWde|B@hR((xr|7K3xyLM14A9*q?V=EE1~POT%t8 z@Zj*CeBoj)y-vpheD{uy{ywk6nHnH+v^bsi+Z)~Upz(><8eEOuV__b8Xx zXmrS6(W8!4+>`Gz(q>BbaOp3L)MR@|!{}-5KatOJ#A@cs`0fgw;(xOMkIvo+;u&`n z%TmDdoX6U?JwI2)alI;=Scp|@Eh{DlzNKAx)^i}=iv%X??Ih5?B*h8n`-}^?y{W0) zy+G}(a<%h}W6g4j`OXJW0)*|dm?=zzuKUUJpX0_Uj7`zI?B<1elcWL4UhNYtWieMS zH}3gC;oB?#1cr}vT>;4Y*FRB+5O5xW+S+vc@XT_c=#!^Wxvr&@tA=Vxw8T|4voa$2 zqh;C3SUM=p8(xiJuky1st>`ZkIB%SKrN<$ZfNYiKG9aFg+?r{|=SiOJKJQepFPFW3 ztqHa4$;?~|$t33%Q@u(8UPAd=yWvUufWZNil|2#NrgF8Oi`}ozZQ%0dHO8r#et%Xa z2GM&gR*KGIVz|fmfor^d` z#OSClkG$JYYmoPW$nKBZ*`GZQQ0cc3k{fiP;v%p8DXI!Av#6Lmow~JedhGP=oD+g{ z2avmGEahchC1&|?HqtNLvk}L4%2^VYFm+#kv`ansgNsDP*V(L%8!92kNyo_?n@{^7 zsRy=+&XpiH&kawnU3+deT$ZrdiwKM{yf(NGlCq>3kM2T8e@v@O?MClZ>f>f$^<9mF5}f}z8EHKl%m2zc;wENWmF5H1ls9jO2#oI;vD< z`59RYtlq6RFrvbTQDN+qQK>iAR{-gtg85L;8l#{-DAo}wbh>!z2cyK z)*CG^BIS83WCJkpu3{Xl`Fb{1G(oj*wwx_m4~7V+Zk~QYxMwx#F;~17zJz9}G6!bE ztp|1eTr}wP9UvIqb<)0C+d2J(d=-u5=EipaYThcK%gNE1&TJ zoEEzckjTo$;=xVsgibFpsQDVT=rSrj8;0V`m%)I{rtuX{3 zY1biPs#;jN#%0rv&?TK8BqgpI`P{YNAA%Z|DufSf^`*OiXpOr*D7<>I#NrHTeG(98 zKnhQYRTMDDvUAN};;346H2u`!el?ESiv##W?Bgk(WzjcxnI#yMDXag6Ad5!q3yY#m((Q;3VNd zB|_2a?&w{n(tgwbwVsB~4r+sJNyK(p{}GoEg*sc)?v-LsV#3xhjY{?N-t{0oQz2p* z$;m`AK4zTWDCzSHtM9>xVzyK_<;q^Rj#Tqgn&Z+}InGsN4m~g0QUo}fy-xtD!Or@% z9;d<=*40==MsS6|axuJCs?XGX(8YIFW3-c%pz^a2jsJ90kpCjyRim0OAb8*jujoxa z$Wi!xQNY0Y`{#4>(WHJ#CKeokw4;ObR7N2 zAd2$QdQSq3{L8(4_mqSA-*o9#aUOZ0|0zPlu?_rZQ5}0K?`t$-V(_9+FpiVG6oAfQ zkNJK@PZ4<>DFDD6&(>L16q*|lDo~vc@Q+M~tb*K)NDxWPE7CB#UA9*wZ^wk58az&V ztC^cW&B9UAE(8FK@j^PP?Xi;Dr8$}`ap!VG6`6E+hE@Xdju^iV|L;Hx#C>bbasbyQ zu;KCjY3R2?cpo1Z(j*?i>KX5Rkb$CWp9Ru`}bSKjidu5o3AP z)%r8zT1)t}I}CoC`W<2X2ieTVT-R9?7d7T3GZd8;!&+N1sAan4qtLqz6J^^LQ2C$P)3EyC_B-k3IPn-R zA3qEI#bqdeQ2&^w_{MVyOc9F1#OF_DBtcPg30s_}R5oj@r~Ul^RY=0xuglxa(7lT8^A$%oD3uyOuq|{+m^ocwttZFBzpOVr;4JOQLIXDm6z(x7pR;;oKbD$ zbZvy_R7w5Kl-2m3SK6ghZX^?`0SJHcDO)<%{`@HYPWhZUEiPzZo;2t+iACJ)yV*P? zAQ@+tz2qV##wLp>+WuO9TIvq{Yapyvr=NT-1OpW9ofcTq%@$nqCm`Ga9FA}-o7B>x zxL-0Lx}sak)LptpOCuJ%l9v3oA^4TWS5@Vv)@-6t<`GeKt7E1a7ksX@N)yht#=C9X zy2a6wW!3ONd8-d`Mh^-K(5S)8#gOi+p{)<9$l!RNcE-h(Q*INUHy@rOCcmiPWW35J zzppQma`iGdPMg<}p4OKh?GHULv92jVasK=0WMhU47#4#UM%>u9f@HSqm0IGC?%hTq z!qw8cI=&|F$s~l;V7b)Upkq!|wByZe9{=2{`Sjwrm;=1L{ZALm6vxv|^F9L`G()Ki ztzUv0`s5gEkRUPS63YD~uL39XF*`c`>=OOOQrt_+&25|=Hi4C^}hAI}&2GF5$_II9s&Q%x;wR{=9@bz4vEfSm`kcb9^ zS1Uge9J&HY5YV8=4MLTmq zza=ZzkvyUJJq0@`0h6KQQ-nk$7M}sR>`z;#2{VwNU=|Qu6yTch*H*UM-{-mBsEh4< z^aAS#aH0O#+&j2*pLxUaY&Hu=c;l#u-i@J|Z&GZ4)0MNSre!zUi*sb?#s;U33_0%B z0IlYv7cxz8s$&5xp#8(cq*&tpT6GeqVPma}dt+H`?#2z(YF7bnGa#IT>7*y+{XFmJ zO4z=`iu(cy@p<+qRySA651F@{s?~a{6LZhw03PhB$@FCe(6p7G$4=4q-Mcq0<+XXn zsQo9r7o9%WGg!Z;OA_F1k1V6R6W&3Q`$*F+8H`~U)2w@EmFS5ykh5BGy)wOK1xM?pU zrfTovLErfoO%hSxNBO=+oiBJs{U8JrtD}4aC5_eYXX&Df0H9AxQ^|tw$Wg5q;-|*7 z#f$nr+nqqr8V2^~0(CtWu0`&zo@gyfPrrLb8=*t#B@>y&KjC;Ns>}za>MjN;H+Bon zKY3P)|8oe7kO}Sbu}Siq3}|VVh>bynxRdAwAd!+GL7bJl-KrF$X9P_2)^w`GAuBlt zG|>W2(V8Ce6O(7^N@`PuX0ChE%UkQY`w+MDC*;4xA3Q4y5dxK&OigRDfl307xm~y9 zrx?8bIY0O29t}<)$oL4(M%LM6Y)$S1P}0jq+D%4YwAY=9t^sVdwI`jWY(7?C5_j6^ zaC6Km%!H8Y%#K@hlEoC>Vro4Oi7__&$bI0L0l2vP9vLd38N+9PG=#^stnh?G)E;Wo zt9>D?0qy%CabMnTN7fDsQvG$(dz#+BTg$e;4b5vGso$E%1Ol;CheWyD&-_j(|6&~Ox8%}#=M|1Xw}+$(aNa_R}%J6YFfbj2-)uBUE93SU}~&m^JS%# zGB==TFCbfEO=l0!^!11=-|1abZp(i?C=7$P2z~1%IE-@+-hT^XqGe0mvn2&90_w%i zO($058Mo%G6UTQzfnxDVjLt7RXx(jGgn6)As&zbMNol-qW)01LJqQB|&YdRvi%fZb z2pxr3x+o;T&lIvvnUBqVk@ZFWS)w+B18VKzc4WszJ_dTy<9$>LgX37rG$I_RTLFAU zS3c$FRaGzf8`MyZ(4*u!iTdxViuh;aOBzA*Zdv{k8Xj$@%T@=V%4Y73tF>flIYZ2> zxwx4usm@acT<_<4(v3+-p4}zb}fh57~TnRe4 zL9gu7;@tj0tkTLp-!R`5y@+3-Qp}EG% zfIUxOd#IlNl6CDaZ@Zg^y6?$dk|67=LM60Eq`bP6R^MkFX>>Fmrrp87%iJFpJq6uzE5JOLnF~Fxjw1gOe5%#5`oMY%4Z)g&T zA>Lv1%b?vIKkqYnvS8b_s_gy&d7K%4*%{bKHWf*xRxRd<_RKNw>mj{49y5{XvmoI! z)vOwC%DN_E5Z`+#XbcYDIM)7{nL9{oZd@Os96lEzo(1I#vihyS;rjDy$NPP(mLi3HEOIUGks>F4h|H@HuBUhSARdT2 z(#T4TpC;tN>Wt52ATJrWb(>p|Z0EXrD``E?>bl+wKV0HPrruES>PnYiW8z9(ftS!t zBe=;K+&=gQ11cXe{LM%;7&{P|;nLW&TYM)|qiKZY69azh@t^coa1Z+XN&@b>q@xnD z-P%7zhLTSkpbu1futN>%HrO%XVD=2^C&MOtc%_Mr>npa_O2iT}m0@j;3+!eVxA1Jr~2#n-!{gYPC1+| zI$VBi);m24!>vMId&F{)q}U#6ieyIL&O=vh$?lY;!?ld)`(j4FH+On*`);@5IMSpq zILND|LB#z)kXhS0CssbvjiS+?i|e4DW<8?ykCyq2J_YF*`r zpAyb0%0P|->WNTdRJUk!>BM{NbN_fzkaApa2^ZftXOc}HNr~~prFHjVvpc94c_I0t zH}vX0%o+LE3^-UG1QBAEBG|x41kN0Y=z8x6lo$O~QTUd$tl0f7mWtIm z3xXf8OvY|8gDOZ;t)wBV{Q5ku^%DZrGN*jZPFWLUDp1n_9sG+2^cCAdi+NT)hIT^^ z3T>*7GTb3c>{4K8;9Ou`dHnpBVfw&sCGtvMW}$7nT@rR}!kSUsTNIsSo;RPw1T(A) z2ylxD)GV?SW-i`Y`-KehBw;Mj1hd;;#~>2k&8~ag%dTqTanQ2ne$aEpuWf$$xzt1K z3xZs7TEHv?oNnB;iJkE|)@n%ex#NRn=Y|KQ z+Q2WoR#4UjG5KJ!a{Z7X1$@-5nR6m7=a0O3(Ri5y7cLr_o;@N6tGYfXmiu}!0ER_K zhIef7_ma&4q4YaA+_Z+JApQ%iJUf~)bFe zuXefoteTjx5W+)I)Jk8xA+)l7)|M$J(YwriVhQewV0qW7{-YdZ{uD=b(x1+b3EH=t zct^&PwoX6^^W%%J%!0JC_0DtZlRKC0r6<_GSeYt%!fV7_LbLHAj`h@&08x&9n52L@IE9jT|pW#s)+T zr1~daW}6tr!w?|&Ac~u|zS`jer&iO5ROVIITm=(|3**>NCR(Ef4~#%s6SF42^omj* z4U5g-%(i#298lE_<@&m>bz?tKxNyP$R@3OmdraE<<8+S_$vLJuQWwWV#cHb-X10=V zQ3XnQd7cxJl{|bJU!0h21TsSoANkm7#xP0*+Ww2rMu_VgAoNk1%q8RAwft4XIIWy7 zyzrxIF+*$&UX~e~QN>(^!no|F4WZE=r(P}#TfCOrUdv(J1Z8~Ji@WFbn>DXwop2tK z52|HquYdfxDaKJdznUc~G`w{JQdInWv}M)o@*z0;Ab=a!?K_gb0`%m|aCNpbjQ7iF zN3D;BzUQ#GlI>*#p7t;TI6^Fz%jh3SxcTjF5OQck_KRQ;l>k{-?o{~3Z1O;dU{_<3 zgHK11F3gXxW%%Dt(l-#&09vN;FsV4K-DW;|y*O$$is+0S{Z9GdJGa$ztE2hQ5LcWb zFVCBzQaSphx!f1?=pC0NZ!QN)F1S)Mbw2Y&IHO4AG`4@e)S1#Qv6q=N8YoPCG2lL)W=AXmC+Ec7YbcYj*_4GpKQA zAo+tjQ?RKFvI`ZlSCmY&Skc|lPrM{oUge71ITd_NR%LgoCW#wn8b4+&3&nYFxIZO; zymIERsXQqzZ$p`bY#VgmYqQlPDxI8_7U>Y!WstLJ$a(9d8s56!%DmJ zV4?tL6~>lzzH&v3Ft@iS)f%W!#V3Irq1W8>e|5mnE?q!{r(G=mVCudJWGTE5DRnw*j>n8I-2@ z{eUrwGs_}s{l|s&VgT$=oqFe&uz3xi{_(o5>E^R|0+6^~W`DB%a`^!mMoHom4S;P8 z5wm}&UtbLkw6g(1c-iuML|7pWW{U3viEKA*K1)t~XJt{tRM@!-KqSTi;j9yE>K@Ec za2=Obra!{L9d3M)&{!Y%Np&^JnD2;B4yI=N5^^G(`pdA78-|9$pd1A{(IEWOYZ?<#pq8wl7(QZoFweCXSd+V6jqK@-=P!h0!S@10CajhXz;hU;g3 zvHl}{$MJBhY+g0o2_zSiC(0bT;}nEJ?#_(#4qSL>a__{`E|@5tTm~-ubDc)Juxi9s z_8g38t(}A2K8}Aut0i9pF(;S~3&TL8P&qDb2XS1Oz0CdeTWP^oEU%PO@cRu7_iVYY z)=v)Z*Duz`4a?uOlIP1imG-*2bjEgT2VyGK{_HdmdXGJrh?iUSsSlp)txksR(;GwE z5g(N+cN+Khw05~;Iv$JLKG!m53X*h!NG|J?FwI-<45|Ok0_ZGno%L&&eCj#5eaXLlgkm4k8KNg)UlW{v3O>1cpDC4&Fq?#5GB$U_X!Py>?X+POO8Aea`WqZwSA%-T{o5X;GX{2#6;OjmbPUAERM4@dTd-)_vvzy*#m)u;;xVPDjom^*Spkgc_V_jJ`l(52-Uy{vA6R7Dy~lr6k0ir%=|<~{|QB6BxpLX))nJxpkjIm5$bOp z4%)R-Tb!Pjbz2|PPU~K8Y~(1P4c=_y2Cdm_N84LG-ol{f%K9Q^S?ix-?0k8LhaY)D zlJYOEb{OMZC})h?5FL=#{g(4FK;|Q5juBix%lb3ym&duU-)IPu+!C@V4sI@{-;KU! zaQ##5&F@`l^ciU{DcgQ!PI0jFzM!GvIk@lq@1{SKO8^AQ8U4}^PkDok_CS`4-2q5k zekru<<|#u&?wvNVv2|~e0~Fh~EFWo7Y-UT(HFNsKc!bC7K7aMVLxav9Fyi5a!6r~_widJ^afPeJ{0?)hreT~XLhLiHe zuZe6nFC+daa=w+nKA`wI4^~|8uq(dG;{aXJr6D|`D!bGFWG_ff8@mbIKE3nKUR{wPTe<;j|LWn?DZ#yuKOJNt3^ zP;ecuR9j7a+|B0+Jwt9pE;di@(sKB5aKyhujKJL_`eJM3 z=GVz&JTA@wa)CeG$x4Tv98~}4N6yXgEBvyzRV)9da)UX}%Zuniu0E_tcP2tQbKx6E z6J{&)qW;dN>$u{V&$Egmdl~su&uv->?%gKxw`4eUx}V6}u4VatrJ`2M4&aH?Zj2In z)U$=vGy1y_uEnk$?E9;re!;;wA&{L-<5Sd9q)MS#TsGbPnbxsjP}XqQ!I<`=q-3h0 zSi6L4;bBQG*y^HD4*CA8-MsdwV(;L*i#BxN|0z%i$xnd}4Lll>7xT$eRbvP7UNv1Y4 zu3X4TzPmy-`D`bA>ef;*Hq|l|1A~R;!{d%0l(`x5!l&}6>WbCXaCdh{2g0KzJAol= zyK7SoAVdv|WrBCK=$*Fyh%Q1rnWeP zW{Q;o0w>Z-DC}Fq0K)}}oZUqQ!ex`Xb*ZZB{p;;+=DBV!>-S?9LtD?$I73xAKx z7kxLiQ>;_TH<=h+}i?Rb-EnJr0?1 zaAanOB+1B(V`MvI#IYUczRu_V{sZ^#_YWQ(&f~bw`&zH{d_G?%l0xW1YuD?xwx%bH z1$CKDVuZO`Mn>8p_-<-wobBnqsJ%n%s~2H0X8?as^5Rl;|FE86kjk7HPUMKd+87o< zKByf~NllM9ek215-Suthp}1Etom)`>4JD7H@s=@13ZR2tTyA=4#bKGkO}Yn6&6W9LhYn|Ak} z*3mZSeD|Nsk zD(T+RL?#v38&y~k1qgq(?Bzf)=}0~5l^G9$J7-^Bn3&7QPowj$5p8{r z01To8O-H(Yt94jpy2sbMJbYp=oVP#MTy2~J@Lbb@8&TlrQf~E=3H+_ij`+uJ+auGU z8bnO)i#>&fjBDzs0Q|9D84Ta31kk%Tn;;h%V4cB(_=4x(|54a4S}EE0uX?5vEH=-* z<4uH{8iaH!Kf=6O6-hu@kYV_#SnBYUVPCeum0L41=ZNZI%;gdG2frhMOki#^=BcE< zkIJTyk&a%=lPjM;V}xQnvL|^H0Bic=ubFbebwBfWJ0=@BsS}-6S3osnAyxAWAjW;U zg)~YymwaqW<2`P=KRCJ@ekqg|I&?5k$LXyS{AyVC!vI3%*h4!$s@50VR(Gk7S}}oz^EW)JDpCN5CRg^h!~4H24G1`Xb=0-q>h+Rbra* z)25%Q3rqU7DA47iLo~5xKX20yYJM1M1unRyyv(4n78(|2z1)qGWtLmcWnXEX*Z}Uk z43r*o8>XW{NLR|#_wgefW}#Ru9Qk*{a{)~JIiMMq&6GRvgz~Hrs&Ko^v<9uhCtGo*Vv0_ zof)f++F~cv{V=Ro)t~;HBWzEU-K6A?8Q0RAKJJeT-pFfjuh0&$v>wqOg_gBzUwm|f z+1aPI#`IHhf$~8!sf)PfyxcUUo;MLi1{|JZFH95Q&V{ghjnU@4{?``>v;1GP5Z%o4 zz;21q%X?#;-cduK5Jdu5*jh$Q0Y5A%$=59W6X|6&e@6b2z6Brob2p;I5UK=Z3^YND zO|5oC7pfN0b(G=lUWixK6QdTP zbkciV(L*=Bh&ycXXDlpi0nOr9Sah2u^5*;_B^^B@-ia=@J9(|_0M+`0{q;L>MXZpx zn?THV^>)0hGY}qWK1+`?Z0+p^Wq&G-i=A&^G~cL;?HUUU%Y{I&_<41;-N)bf!|=d6 zzzwiNV*`s6D|@z2Vv+p>bHL33w6+(z7A*JS7;Ub*cQs-!yNS(7fj%|>LJRXatA6VD zc!(1)Wmy<7AI{r^#YnC^1m&Hay6BY-vG?&pK!p@@9>GX{-!xa{Bp2%u{#<72%kL}A z0eEY(ov(g9(wc_2uQX!ff9S2Nj!oS1Nh3$pu|*x6#Y3j9@tuniwuT4 z*IW5&&{IXfHm)a3!Hd*EqYlt_#YEB(98j^*^nM3j%ZK;1a+&{CU!;CSvDGBHGQb*p zy{*r~ryRYb?3$RtcJ_v+bYG2UNJ;r}c}j=OojYriAkDrM`d+^Y?z+@7X_qh730hmo z4YK^my()Y)&bRk+<+~e|>cAuR1__VuQaE~z=F87rhpQC!F)G9a!HDS& zuwyJpuLgg!$B*>{DrD8oa)kUnNoQJp?>h%MWp=WhZC>~m_UDyqx3Z>sqb$KCfjL5& zQysJ{65!%Jj=UiytuHFkO4C%DSNh>ehWiQy%-%gUM@&y5pr*JDDMSiv=nMS>Ttfrx zHMhJ&W`S76 zk6(T5y=%j(w3Z;PF36OaO02ZK;o*3)< z{=IOY3by~HmM3Vvr4sj8 zv#;FkJ)rqTWZXsf-}u|0BL$Pk`qa!Gx7p6qi78@(i@Y=I<&Qp$)IYAqOz{Co<1bJR ze2ryHm<}zI!{jh2r^QmRHaU_yOR>{t?yh}8mt8W*qyT*(d?K$Zf4Z#{PpRmrAOIk; z1~auO8&J)$=1~08mqYQ^bS5_GP{7RHF=JW&HDPw9r5mO{_(ePMI2-XsFN1>>aWA9S ztfu1TXLTX>;h7_2tXgH58_Ch(-Vb`luLm`j59BcYtohaBXWdofd>+6BEZyAb&FwC!G|f8qmbh>V)&G~{Ij20!lt)4HFf>($yrQ_ z+O5*yrXo4nGY?T3{g`DPoHXn+)Cw>B zwzf0BS^hI@ZM)}b;D+dO+$B!ze%H|sLYqsRj@Ba)_jlR@7YFODv4rUxjW(F|d&E%{ zFP;da1oTz;do~R?d74b!_87D6aqBAs`+W?bDI2oFTmKC#crGRV{3vX+L6gUzv260- zg`u9v$J5mt^w!?ye3jf~`?zBaX^T4`s$7w-rqER>yKI$mM zz4mn-mDi@e#{X`y}L z^xn&yd||pT`ErJ2+LF;k;=Suo>T;~+aIb6{YGW&VY_oIkhtke!KJCzB3&Dnk>n?~j z{hi|R_>VPZCaQL}z3X0gXBKok;7u0O=1vW@YvB#-2<%tfTY~h(*~Neyu9Xwdfj-*N z!KtErkvQ|ZlRChuU}8vK{PQte0nJ-W*3mritp)Q?K}UDe#OlJH~5`}TW&yM z5ABM&=H9I)@pV6Gv{Wgxr<4N94X!#i@H=@mY-uPuS+i1*g5TbAKL7r+J2$U*j$11 zmBBi1_}85@}3n+2VKJzDle@- z66gX4%7Z((I0M)J&M%G$ncEm{(9)c*!rzK8Tldz()+n=gZAQ9@;}$Zx944~nEaT4k zA5L87Uf&dHs@WIWz1D6nNQE6!^gZs#o3TH=g!BGI!|k@5t)-cLX)9#Dt1LdL5=9AJ zKo+Gu@-@3`Q~Y)9rTVtY+gbdyb8THMT-7(ue&R@rqpN#$+*@t5tYaiHHeywkb$;wz z4*xHkt{W!0CLcuik!xdf2eBLpn@u)}eEda@0$0>VI-^`7eZ`_&hc%3;VUI{c&d?Pf z^b?vrwF+(YClMbl5Y*GAM|XKq(W_F#8x&Yi2N&ZEC1>sw8%H}~e>&D4V)VT#HI`uO zh&44-hRNi}@1Y`}1XaG`4+V#eOLkrG<1{+I5)y_yR*i3$RadJl4#|;03u(5nZA?)} z*HBdZ&?N|GYL{`^Na$%X!Mnfd7e<(;qxgNSm>bt8KOJ`@`<8vDA$3lDqPr8AkvLmD za}ScC+jD&gOat}=uEJ9jU|F$KJJ%vqgW89 zmN0C`H}I?5mdd7<4Dw@Jn_FaG&{3N`s6UU(ul2|EN*0CV%NT;2u&nc6KQpG7lH}f! zz$}he-D;@_<-U1*I*~4eW=OIIF#dx+9+i9!X(;UW+8oF5$|&qxp~Eopy*%2uzE;Qd zAbV)!SC6GM3RH4&_{H0smEXuAh3*BW`^Um*i?-k3|7sraA~On=8)WL7N#w#5q=8l2 zd6Tc!sm#U}NTBh@tppR4j0Q>`-ZOrne2*bw7tE%nIs!@@ZRhTH1Gc)sy3765gB;w~ET&}X?7xeq@w8}0G*E6X(GUzy8 zeQPAGHMp$k-lE|9`4_^0YE>0!NgsH)Y#d&yEc!W z69lBM$bMZP0`~TH0yPvVgT|aL{GDeo^x#hOUX^icG;X5F%y|LACS-YsvbIK^Egcy@ zFY~(BnkYZ3){7Y5(Q?WS8NFi7>3YS!^7fNVCE`%PoHt4HL0@Fb>g;RHYpNa!ccEVY zyiCb#idDALb=A>2vxj18-d%W|TVL!q>nY zer>@@BFQ75x#n^3gy%?=YUTLGNZXli5C`D~6e5#^-OVugD`7VMyJWRzD$jE`JdS_s zcg1i|YlxmAm(^}}6KHAXJ*pO~5$6b@CDMoRC3Xz2c;fE2hp_EF1$r886B zDvIJP!2<(4D#Pe!KK!fFnZ0n&qMq5gGGO|>Cmc*uIY11Q!vDRtnNNor4FbrKREFI? zMhIue*|2Q09DFo&rm>=78?T^Sj61wI^k}t#b6i_mV>2@ccz8pkWsabm~tcn z0pH_bZariyKbxeYR*l;jU4bCKXHOGh3*nhA1JlROD?49O=`#&COf-RGm$B&u;p8)u ztm3JkhZZ*^!Bua& z%K>U=P`ZwqJ5l()kOhChLB0tHA>)+ReU+jRk|H3qOejk$ifDVNZrroHw@m`!2V!5- z4GV?s#RcIH-up|o1T}Q6^)!m|bRwCcT>n@(wrj9n|7UD$ETztatuO36KR0TRB6v$n z;Ra7UGH+&_mb&Z^gQxuiCMVx%4SH)DZE!x9847v0r%y8lAvuCKUOH2q$Fafua*N+U z;SY_YZA!GEcg_ex)^FN#$}q_19?vE;NPes?Mgo0Nu{Tb3{_Gut6%lsG66 z9c_GjQYwwP`A(b^dd*Jso=K7^v5x}V8fwj%dL>dWTKFl~VWsUdPmlXz$UCxcLSIOM zff2}kkdCxz@Y@+u#H-rU6&TIz4#sR@)!xhgJtFzvKy(q7tNogEX%@SG$5dEkCVtP= z4wwu6z#%8A%l&YLp>rBE(DT=>q!6PkAAi4A5x>vGp+RudrbRWX6Rv7^SvP#(g~TQr z8LSEJ+|_h;C|9WC67>9wn*fR%)|g&A2>X7Pt{P=2R|4 z*zGf1ynB1h#&19~$Jfwl11E4HMCrf9ECZHWM%+7W3E?eWwGi;)8*n+(!of+{y%U}R zhtBx>!*B@t@ZVc&T+E-nG-*&xbXYUR)kf6uY@b z$eNwRk0l{_=mRkNzV70ow~YSPpRaMVLwsGn2(b}2KWb}QOukd+g*w`vh77j{t6QII zW6|&gTAGv`culjz^l`NjDPqyd7}+{=(K|A+E%Ezg={{*|@SCWrCO%>wOw$?(5h4$s!F5 z%Lj&q__hnXj*iTXJGIA(8t8z#IF^q|YB>HX( zjVaDhN|IpVTpTgp?FA{5)`&H>yE^TB12f_|k2AbBJ zT^-L~r&-&;*Or#JXqzO$QW*-@8^t@9+})SSpboIG;Hw#)Q)Y?(-`~VJK~SCC|6cz8 efBrwO#x@~N!Ig~w literal 0 HcmV?d00001 diff --git a/packages/server/src/env.ts b/packages/server/src/env.ts index f796766524..ccf35206e7 100644 --- a/packages/server/src/env.ts +++ b/packages/server/src/env.ts @@ -161,6 +161,14 @@ const defaultEnvValues: EnvVariables = { SAML_IDP_CONFIG_FILE: '', // Config file for the Identity Provider. Should point to an XML file generated by the Identity Provider. SAML_SP_CONFIG_FILE: '', // Config file for the Service Provider (Joplin, in this case). Should point to an XML file generated by the Identity Provider. SAML_ORGANIZATION_DISPLAY_NAME: '', // The name of the organization to display on the login screen. Optional. + + // ================================================== + // Transcribe Server + // ================================================== + + TRANSCRIBE_ENABLED: false, + TRANSCRIBE_API_KEY: '', + TRANSCRIBE_BASE_URL: '', }; export interface EnvVariables { @@ -260,6 +268,10 @@ export interface EnvVariables { SAML_ORGANIZATION_DISPLAY_NAME: string; LOCAL_AUTH_ENABLED: boolean; + + TRANSCRIBE_ENABLED: boolean; + TRANSCRIBE_API_KEY: string; + TRANSCRIBE_BASE_URL: string; } const parseBoolean = (s: string): boolean => { diff --git a/packages/server/src/routes/api/transcribe.test.ts b/packages/server/src/routes/api/transcribe.test.ts new file mode 100644 index 0000000000..d73bbb2ab4 --- /dev/null +++ b/packages/server/src/routes/api/transcribe.test.ts @@ -0,0 +1,177 @@ +import { readFile } from 'fs-extra'; +import { ApiError } from '../../utils/errors'; +import { getApi, postApi } from '../../utils/testing/apiUtils'; +import { beforeAllDb, afterAllTests, beforeEachDb, createUserAndSession, testAssetDir, checkThrowAsync, expectThrow, makeTempFileWithContent } from '../../utils/testing/testUtils'; + +export type TranscribeJob = { + jobId: number; +}; + +type OutputError = { stack: string; message: string }; +type OutputSuccess = { result: string }; +type Output = OutputError | OutputSuccess; + +type JobWithResult = { + id: string; + completedOn?: Date; + result?: Output; + state: string; +}; + + +describe('api_transcribe', () => { + + beforeAll(async () => { + await beforeAllDb('api_transcribe', { + envValues: { + TRANSCRIBE_ENABLED: 'true', + TRANSCRIBE_API_KEY: 'something', + TRANSCRIBE_SERVER_ADDRESS: 'something', + }, + }); + }); + + afterAll(async () => { + await afterAllTests(); + }); + + beforeEach(async () => { + await beforeEachDb(); + }); + + test('should create job', async () => { + const { session } = await createUserAndSession(1); + + jest.spyOn(global, 'fetch').mockImplementation( + jest.fn(() => Promise.resolve( + { + json: () => Promise.resolve( + { jobId: '608626f1-cad9-4b07-a02e-ec427c47147f' }, + ), + status: 200, + })) as jest.Mock, + ); + const fileContent = await readFile(`${testAssetDir}/htr_example.png`); + const tempFilePath = await makeTempFileWithContent(fileContent); + const response = await postApi(session.id, 'transcribe', {}, + { + filePath: tempFilePath, + }, + ); + + expect(response.jobId).toBe('608626f1-cad9-4b07-a02e-ec427c47147f'); + }); + + test('should create job and return response eventually', async () => { + const { session } = await createUserAndSession(1); + + jest.spyOn(global, 'fetch').mockImplementation( + jest.fn(() => Promise.resolve( + { + json: () => Promise.resolve( + { jobId: '608626f1-cad9-4b07-a02e-ec427c47147f' }, + ), + status: 200, + })) as jest.Mock, + ); + + const fileContent = await readFile(`${testAssetDir}/htr_example.png`); + const tempFilePath = await makeTempFileWithContent(fileContent); + const postResponse = await postApi(session.id, 'transcribe', {}, + { + filePath: tempFilePath, + }, + ); + + expect(postResponse.jobId).not.toBe(undefined); + + jest.spyOn(global, 'fetch').mockImplementation( + jest.fn(() => Promise.resolve( + { + json: (): Promise => Promise.resolve( + { + id: '608626f1-cad9-4b07-a02e-ec427c47147f', + state: 'completed', + result: { result: 'transcription' }, + }, + ), + status: 200, + })) as jest.Mock, + ); + + const getResponse = await getApi(session.id, `transcribe/${postResponse.jobId}`, {}); + expect(getResponse.id).toBe(postResponse.jobId); + expect(getResponse.state).toBe('completed'); + expect((getResponse.result as OutputSuccess).result).toBe('transcription'); + }); + + test('should throw a error if API returns error 400', async () => { + const { session } = await createUserAndSession(1); + + jest.spyOn(global, 'fetch').mockImplementation( + jest.fn(() => Promise.resolve( + { + json: () => Promise.resolve(''), + status: 400, + })) as jest.Mock, + ); + + const fileContent = await readFile(`${testAssetDir}/htr_example.png`); + const tempFilePath = await makeTempFileWithContent(fileContent); + const error = await checkThrowAsync(() => + postApi(session.id, 'transcribe', {}, + { + filePath: tempFilePath, + }, + )); + + expect(error instanceof ApiError).toBe(true); + }); + + test('should throw error if API returns error 500', async () => { + const { session } = await createUserAndSession(1); + + jest.spyOn(global, 'fetch').mockImplementation( + jest.fn(() => Promise.resolve( + { + json: () => Promise.resolve(''), + status: 500, + })) as jest.Mock, + ); + + const fileContent = await readFile(`${testAssetDir}/htr_example.png`); + const tempFilePath = await makeTempFileWithContent(fileContent); + const error = await checkThrowAsync(() => + postApi(session.id, 'transcribe', {}, + { + filePath: tempFilePath, + }, + )); + + expect(error instanceof ApiError).toBe(true); + }); + test('should throw 500 error is something unexpected', async () => { + const { session } = await createUserAndSession(1); + + jest.spyOn(global, 'fetch').mockImplementation( + jest.fn(() => Promise.resolve( + { + json: () => Promise.reject(new Error('Something went wrong')), + status: 200, + })) as jest.Mock, + ); + + const fileContent = await readFile(`${testAssetDir}/htr_example.png`); + const tempFilePath = await makeTempFileWithContent(fileContent); + const error = await expectThrow(() => + postApi(session.id, 'transcribe', {}, + { + filePath: tempFilePath, + }, + )); + + expect(error.httpCode).toBe(500); + expect(error.message.startsWith('POST /api/transcribe {"status":500,"body":{"error":"Something went wrong"')).toBe(true); + }); + +}); diff --git a/packages/server/src/routes/api/transcribe.ts b/packages/server/src/routes/api/transcribe.ts new file mode 100644 index 0000000000..56df2d6a17 --- /dev/null +++ b/packages/server/src/routes/api/transcribe.ts @@ -0,0 +1,97 @@ +import { readFile } from 'fs-extra'; +import { ErrorBadGateway, ErrorBadRequest, ErrorNotImplemented, ErrorServiceUnavailable } from '../../utils/errors'; +import { formParse } from '../../utils/requestUtils'; +import Router from '../../utils/Router'; +import { SubPath } from '../../utils/routeUtils'; +import { AppContext, RouteType } from '../../utils/types'; +import Logger from '@joplin/utils/Logger'; +import shim from '@joplin/lib/shim'; +import config from '../../config'; +import { safeRemove } from '../../utils/fileUtils'; + +const logger = Logger.create('api/transcribe'); + +const router = new Router(RouteType.Api); + +const isHtrSupported = () => { + return config().TRANSCRIBE_ENABLED; +}; + +router.get('api/transcribe/:id', async (path: SubPath, _ctx: AppContext) => { + if (!isHtrSupported()) { + throw new ErrorNotImplemented('HTR feature is not enabled in this server'); + } + + try { + logger.info(`Checking Transcribe for Job: ${path.id}`); + const response = await fetch(`${config().TRANSCRIBE_BASE_URL}/transcribe/${path.id}`, + { + headers: { + 'Authorization': config().TRANSCRIBE_API_KEY, + }, + }, + ); + + if (response.status >= 400 && response.status < 500) { + const responseJson = await response.json(); + throw new ErrorBadRequest(responseJson.error); + } else if (response.status >= 500) { + const responseJson = await response.json(); + throw new ErrorBadGateway(responseJson.error); + } + + const responseJson = await response.json(); + return responseJson; + } catch (error) { + if (shim.fetchRequestCanBeRetried(error) || shim.fetchRequestCanBeRetried(error.cause)) { + throw new ErrorServiceUnavailable('Transcribe Server not available right now.', error); + } + throw error; + } +}); + +router.post('api/transcribe', async (_path: SubPath, ctx: AppContext) => { + if (!isHtrSupported()) { + throw new ErrorNotImplemented('HTR feature is not enabled in this server'); + } + + const request = await formParse(ctx.req); + if (!request.files.file) throw new ErrorBadRequest('No file provided. Use a multipart/form request with a \'file\' property.'); + + const form = new FormData(); + const file = await readFile(request.files.file.filepath); + const blob = new Blob([file]); + form.append('file', blob, 'file'); + + try { + logger.info('Sending file to Transcribe Server'); + const response = await fetch(`${config().TRANSCRIBE_BASE_URL}/transcribe`, { + method: 'POST', + body: form, + headers: { + 'Authorization': config().TRANSCRIBE_API_KEY, + }, + }); + + if (response.status >= 400 && response.status < 500) { + const responseJson = await response.json(); + throw new ErrorBadRequest(responseJson.error); + } else if (response.status >= 500) { + const responseJson = await response.json(); + throw new ErrorBadGateway(responseJson.error); + } + + const responseJson = await response.json(); + logger.info(`Job created successfully: ${responseJson.jobId}`); + return responseJson; + } catch (error) { + if (shim.fetchRequestCanBeRetried(error) || shim.fetchRequestCanBeRetried(error.cause)) { + throw new ErrorServiceUnavailable('Transcribe Server not available right now.', error); + } + throw error; + } finally { + await safeRemove(request.files.file.filepath); + } +}); + +export default router; diff --git a/packages/server/src/routes/routes.ts b/packages/server/src/routes/routes.ts index 39d6f305cc..0399ba0def 100644 --- a/packages/server/src/routes/routes.ts +++ b/packages/server/src/routes/routes.ts @@ -12,6 +12,7 @@ import apiShares from './api/shares'; import apiShareUsers from './api/share_users'; import apiUsers from './api/users'; import apiLogin from './api/login'; +import apiTranscribe from './api/transcribe'; import adminDashboard from './admin/dashboard'; import adminEmails from './admin/emails'; @@ -52,6 +53,7 @@ const routes: Routers = { 'api/share_users': apiShareUsers, 'api/shares': apiShares, 'api/users': apiUsers, + 'api/transcribe': apiTranscribe, 'admin/dashboard': adminDashboard, 'admin/emails': adminEmails, diff --git a/packages/server/src/utils/errors.ts b/packages/server/src/utils/errors.ts index b47539aa9d..a455c281e4 100644 --- a/packages/server/src/utils/errors.ts +++ b/packages/server/src/utils/errors.ts @@ -142,6 +142,37 @@ export class ErrorTooManyRequests extends ApiError { } } +export class ErrorNotImplemented extends ApiError { + public static httpCode = 501; + public retryAfterMs = 0; + + public constructor(message = 'Not Implemented', options: ErrorOptions = null) { + super(message, ErrorNotImplemented.httpCode, options); + Object.setPrototypeOf(this, ErrorNotImplemented.prototype); + } +} + +export class ErrorBadGateway extends ApiError { + public static httpCode = 502; + public retryAfterMs = 0; + + public constructor(message = 'Bad Gateway', options: ErrorOptions = null) { + super(message, ErrorBadGateway.httpCode, options); + Object.setPrototypeOf(this, ErrorBadGateway.prototype); + } +} + +export class ErrorServiceUnavailable extends ApiError { + public static httpCode = 503; + public retryAfterMs = 0; + + public constructor(message = 'Service Unavailable', options: ErrorOptions = null) { + super(message, ErrorServiceUnavailable.httpCode, options); + Object.setPrototypeOf(this, ErrorServiceUnavailable.prototype); + } +} + + export function errorToString(error: Error): string { // const msg: string[] = []; // msg.push(error.message ? error.message : 'Unknown error'); diff --git a/readme/privacy.md b/readme/privacy.md index de45172d3f..cbb91ea1a3 100644 --- a/readme/privacy.md +++ b/readme/privacy.md @@ -15,6 +15,7 @@ In order to provide certain features, Joplin may need to connect to third-party | Voice typing | If you use the voice typing feature on Android, the application will download the language files from https://github.com/joplin/voice-typing-models/ or https://alphacephei.com/vosk/models. | Disabled | Yes | OCR | If you have enabled optical character recognition on desktop, the application will download the language files from https://cdn.jsdelivr.net/npm/@tesseract.js-data/. | Disabled | Yes | Crash reports | If you have enabled crash auto-upload, the application will upload the report to Sentry when a crash happens. When Sentry is initialised it will also connect to `sentry.io`. | Disabled | Yes +| Handwriting recognition | This option allows the user to send images to Joplin Server/Cloud to be transcribed, only images selected with the 'Recognize handwritten image' are affected. | Enabled | Yes (1) https://github.com/laurent22/joplin/issues/5705
(2) If the spellchecker is disabled, [it will not download the dictionary](https://discourse.joplinapp.org/t/new-version-of-joplin-contacting-google-servers-on-startup/23000/40?u=laurent).