1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-07-06 23:56:13 +02:00

Desktop: Add support for OCR (#8975)

This commit is contained in:
Laurent Cozic
2023-12-13 19:24:58 +00:00
committed by GitHub
parent 0e847685ff
commit bce94f1775
79 changed files with 2381 additions and 445 deletions

View File

@ -0,0 +1,74 @@
import sqlStringToLines from '../sqlStringToLines';
import { SqlQuery } from '../types';
export default (): (SqlQuery|string)[] => {
const queries: (SqlQuery|string)[] = [];
queries.push('ALTER TABLE `resources` ADD COLUMN `ocr_text` TEXT NOT NULL DEFAULT ""');
queries.push('ALTER TABLE `resources` ADD COLUMN `ocr_details` TEXT NOT NULL DEFAULT ""');
queries.push('ALTER TABLE `resources` ADD COLUMN `ocr_status` INT NOT NULL DEFAULT 0');
queries.push('ALTER TABLE `resources` ADD COLUMN `ocr_error` TEXT NOT NULL DEFAULT ""');
const itemsNormalized = `
CREATE TABLE items_normalized (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT NOT NULL DEFAULT "",
body TEXT NOT NULL DEFAULT "",
item_id TEXT NOT NULL,
item_type INT NOT NULL,
user_updated_time INT NOT NULL DEFAULT 0,
reserved1 INT NULL,
reserved2 INT NULL,
reserved3 INT NULL,
reserved4 INT NULL,
reserved5 INT NULL,
reserved6 INT NULL
);
`;
queries.push(sqlStringToLines(itemsNormalized)[0]);
queries.push('CREATE INDEX items_normalized_id ON items_normalized (id)');
queries.push('CREATE INDEX items_normalized_item_id ON items_normalized (item_id)');
queries.push('CREATE INDEX items_normalized_item_type ON items_normalized (item_type)');
const tableFields = 'id, title, body, item_id, item_type, user_updated_time, reserved1, reserved2, reserved3, reserved4, reserved5, reserved6';
const newVirtualTableSql = `
CREATE VIRTUAL TABLE items_fts USING fts4(
content="items_normalized",
notindexed="id",
notindexed="item_id",
notindexed="item_type",
notindexed="user_updated_time",
notindexed="reserved1",
notindexed="reserved2",
notindexed="reserved3",
notindexed="reserved4",
notindexed="reserved5",
notindexed="reserved6",
${tableFields}
);`
;
queries.push(sqlStringToLines(newVirtualTableSql)[0]);
queries.push(`
CREATE TRIGGER items_fts_before_update BEFORE UPDATE ON items_normalized BEGIN
DELETE FROM items_fts WHERE docid=old.rowid;
END;`);
queries.push(`
CREATE TRIGGER items_fts_before_delete BEFORE DELETE ON items_normalized BEGIN
DELETE FROM items_fts WHERE docid=old.rowid;
END;`);
queries.push(`
CREATE TRIGGER items_after_update AFTER UPDATE ON items_normalized BEGIN
INSERT INTO items_fts(docid, ${tableFields}) SELECT rowid, ${tableFields} FROM items_normalized WHERE new.rowid = items_normalized.rowid;
END;`);
queries.push(`
CREATE TRIGGER items_after_insert AFTER INSERT ON items_normalized BEGIN
INSERT INTO items_fts(docid, ${tableFields}) SELECT rowid, ${tableFields} FROM items_normalized WHERE new.rowid = items_normalized.rowid;
END;`);
return queries;
};

View File

@ -0,0 +1,17 @@
export default (sql: string) => {
const output = [];
const lines = sql.split('\n');
let statement = '';
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (line === '') continue;
if (line.substr(0, 2) === '--') continue;
statement += line.trim();
if (line[line.length - 1] === ',') statement += ' ';
if (line[line.length - 1] === ';') {
output.push(statement);
statement = '';
}
}
return output;
};

View File

@ -55,6 +55,13 @@ export interface UserDataValue {
d?: Number; // deleted - 0 or 1 (default = 0)
}
export enum ResourceOcrStatus {
Todo = 0,
Processing = 1,
Done = 2,
Error = 3,
}
export type UserData = Record<string, Record<string, UserDataValue>>;
interface DatabaseTableColumn {
@ -112,6 +119,10 @@ interface DatabaseTables {
@ -164,6 +175,60 @@ export interface ItemChangeEntity {
'type'?: number;
'type_'?: number;
}
export interface ItemsFtEntity {
'body'?: any | null;
'id'?: any | null;
'item_id'?: any | null;
'item_type'?: any | null;
'reserved1'?: any | null;
'reserved2'?: any | null;
'reserved3'?: any | null;
'reserved4'?: any | null;
'reserved5'?: any | null;
'reserved6'?: any | null;
'title'?: any | null;
'user_updated_time'?: any | null;
'type_'?: number;
}
export interface ItemsFtsDocsizeEntity {
'docid'?: number | null;
'size'?: any | null;
'type_'?: number;
}
export interface ItemsFtsSegdirEntity {
'end_block'?: number | null;
'idx'?: number | null;
'leaves_end_block'?: number | null;
'level'?: number | null;
'root'?: any | null;
'start_block'?: number | null;
'type_'?: number;
}
export interface ItemsFtsSegmentEntity {
'block'?: any | null;
'blockid'?: number | null;
'type_'?: number;
}
export interface ItemsFtsStatEntity {
'id'?: number | null;
'value'?: any | null;
'type_'?: number;
}
export interface ItemsNormalizedEntity {
'body'?: string;
'id'?: number | null;
'item_id'?: string;
'item_type'?: number;
'reserved1'?: number | null;
'reserved2'?: number | null;
'reserved3'?: number | null;
'reserved4'?: number | null;
'reserved5'?: number | null;
'reserved6'?: number | null;
'title'?: string;
'user_updated_time'?: number;
'type_'?: number;
}
export interface KeyValueEntity {
'id'?: number | null;
'key'?: string;
@ -267,6 +332,10 @@ export interface ResourceEntity {
'is_shared'?: number;
'master_key_id'?: string;
'mime'?: string;
'ocr_details'?: string;
'ocr_error'?: string;
'ocr_status'?: number;
'ocr_text'?: string;
'share_id'?: string;
'size'?: number;
'title'?: string;
@ -479,6 +548,10 @@ export const databaseSchema: DatabaseTables = {
is_shared: { type: 'number' },
master_key_id: { type: 'string' },
mime: { type: 'string' },
ocr_details: { type: 'string' },
ocr_error: { type: 'string' },
ocr_status: { type: 'number' },
ocr_text: { type: 'string' },
share_id: { type: 'string' },
size: { type: 'number' },
title: { type: 'string' },
@ -582,4 +655,58 @@ export const databaseSchema: DatabaseTables = {
updated_time: { type: 'number' },
type_: { type: 'number' },
},
items_normalized: {
body: { type: 'string' },
id: { type: 'number' },
item_id: { type: 'string' },
item_type: { type: 'number' },
reserved1: { type: 'number' },
reserved2: { type: 'number' },
reserved3: { type: 'number' },
reserved4: { type: 'number' },
reserved5: { type: 'number' },
reserved6: { type: 'number' },
title: { type: 'string' },
user_updated_time: { type: 'number' },
type_: { type: 'number' },
},
items_fts: {
body: { type: 'any' },
id: { type: 'any' },
item_id: { type: 'any' },
item_type: { type: 'any' },
reserved1: { type: 'any' },
reserved2: { type: 'any' },
reserved3: { type: 'any' },
reserved4: { type: 'any' },
reserved5: { type: 'any' },
reserved6: { type: 'any' },
title: { type: 'any' },
user_updated_time: { type: 'any' },
type_: { type: 'number' },
},
items_fts_segments: {
block: { type: 'any' },
blockid: { type: 'number' },
type_: { type: 'number' },
},
items_fts_segdir: {
end_block: { type: 'number' },
idx: { type: 'number' },
leaves_end_block: { type: 'number' },
level: { type: 'number' },
root: { type: 'any' },
start_block: { type: 'number' },
type_: { type: 'number' },
},
items_fts_docsize: {
docid: { type: 'number' },
size: { type: 'any' },
type_: { type: 'number' },
},
items_fts_stat: {
id: { type: 'number' },
value: { type: 'any' },
type_: { type: 'number' },
},
};