1
0
mirror of https://github.com/laurent22/joplin.git synced 2025-12-02 22:49:09 +02:00

Desktop: Add support for OCR (#8975)

This commit is contained in:
Laurent Cozic
2023-12-13 19:24:58 +00:00
committed by GitHub
parent 0e847685ff
commit bce94f1775
79 changed files with 2381 additions and 445 deletions

View File

@@ -0,0 +1,28 @@
import filterOcrText from './filterOcrText';
const testData: string[][] = [
['— !',
'',
],
[
`- = = — ‘ =
`,
'',
],
['', ''],
[' testing ', 'testing'],
];
describe('filterOcrText', () => {
it('should filter text', () => {
for (const [input, expected] of testData) {
expect(filterOcrText(input)).toBe(expected);
}
});
});

View File

@@ -0,0 +1,8 @@
export default (text: string) => {
// Remove all non-letter characters from the string
const filtered = text.replace(/\P{Letter}/ug, '');
// If there's nothing left, this is most likely an invalid detection, so we
// clear the string.
if (!filtered.trim()) return '';
return text.trim();
};

View File

@@ -0,0 +1,23 @@
export const emptyRecognizeResult = (): RecognizeResult => {
return {
text: '',
lines: [],
};
};
export type RecognizeResultBoundingBox = [number, number, number, number]; // x0, y0, x1, y1
export interface RecognizeResultWord {
t: string;
bb: RecognizeResultBoundingBox; // Bounding box;
bl?: RecognizeResultBoundingBox; // Baseline
}
export interface RecognizeResultLine {
words: RecognizeResultWord[];
}
export interface RecognizeResult {
text: string;
lines?: RecognizeResultLine[]; // We do not store detailed data for PDFs
}