You've already forked joplin
mirror of
https://github.com/laurent22/joplin.git
synced 2025-12-02 22:49:09 +02:00
Desktop: Add support for OCR (#8975)
This commit is contained in:
28
packages/lib/services/ocr/utils/filterOcrText.test.ts
Normal file
28
packages/lib/services/ocr/utils/filterOcrText.test.ts
Normal file
@@ -0,0 +1,28 @@
|
||||
import filterOcrText from './filterOcrText';
|
||||
|
||||
const testData: string[][] = [
|
||||
['— !',
|
||||
'',
|
||||
],
|
||||
|
||||
[
|
||||
`- = = — ‘ =
|
||||
—`,
|
||||
'',
|
||||
],
|
||||
|
||||
['', ''],
|
||||
|
||||
[' testing ', 'testing'],
|
||||
|
||||
];
|
||||
|
||||
describe('filterOcrText', () => {
|
||||
|
||||
it('should filter text', () => {
|
||||
for (const [input, expected] of testData) {
|
||||
expect(filterOcrText(input)).toBe(expected);
|
||||
}
|
||||
});
|
||||
|
||||
});
|
||||
8
packages/lib/services/ocr/utils/filterOcrText.ts
Normal file
8
packages/lib/services/ocr/utils/filterOcrText.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
export default (text: string) => {
|
||||
// Remove all non-letter characters from the string
|
||||
const filtered = text.replace(/\P{Letter}/ug, '');
|
||||
// If there's nothing left, this is most likely an invalid detection, so we
|
||||
// clear the string.
|
||||
if (!filtered.trim()) return '';
|
||||
return text.trim();
|
||||
};
|
||||
23
packages/lib/services/ocr/utils/types.ts
Normal file
23
packages/lib/services/ocr/utils/types.ts
Normal file
@@ -0,0 +1,23 @@
|
||||
export const emptyRecognizeResult = (): RecognizeResult => {
|
||||
return {
|
||||
text: '',
|
||||
lines: [],
|
||||
};
|
||||
};
|
||||
|
||||
export type RecognizeResultBoundingBox = [number, number, number, number]; // x0, y0, x1, y1
|
||||
|
||||
export interface RecognizeResultWord {
|
||||
t: string;
|
||||
bb: RecognizeResultBoundingBox; // Bounding box;
|
||||
bl?: RecognizeResultBoundingBox; // Baseline
|
||||
}
|
||||
|
||||
export interface RecognizeResultLine {
|
||||
words: RecognizeResultWord[];
|
||||
}
|
||||
|
||||
export interface RecognizeResult {
|
||||
text: string;
|
||||
lines?: RecognizeResultLine[]; // We do not store detailed data for PDFs
|
||||
}
|
||||
Reference in New Issue
Block a user