You've already forked joplin
mirror of
https://github.com/laurent22/joplin.git
synced 2025-11-23 22:36:32 +02:00
This commit is contained in:
@@ -9,6 +9,7 @@ import * as fs from 'fs-extra';
|
||||
import * as pdfJsNamespace from 'pdfjs-dist';
|
||||
import { writeFile } from 'fs/promises';
|
||||
import { ResourceEntity } from './services/database/types';
|
||||
import { TextItem } from 'pdfjs-dist/types/src/display/api';
|
||||
|
||||
const { FileApiDriverLocal } = require('./file-api-driver-local');
|
||||
const mimeUtils = require('./mime-utils.js').mime;
|
||||
@@ -734,6 +735,26 @@ function shimInit(options: ShimInitOptions = null) {
|
||||
}
|
||||
};
|
||||
|
||||
shim.pdfExtractEmbeddedText = async (pdfPath: string): Promise<string[]> => {
|
||||
const loadingTask = pdfJs.getDocument(pdfPath);
|
||||
const doc = await loadingTask.promise;
|
||||
|
||||
const textByPage = [];
|
||||
|
||||
for (let pageNum = 1; pageNum <= doc.numPages; pageNum++) {
|
||||
const page = await doc.getPage(pageNum);
|
||||
const textContent = await page.getTextContent();
|
||||
|
||||
const strings = textContent.items.map(item => {
|
||||
const text = (item as TextItem).str ?? '';
|
||||
return text;
|
||||
}).join('\n');
|
||||
textByPage.push(strings);
|
||||
}
|
||||
|
||||
return textByPage;
|
||||
};
|
||||
|
||||
shim.pdfToImages = async (pdfPath: string, outputDirectoryPath: string): Promise<string[]> => {
|
||||
// We handle both the Electron app and testing framework. Potentially
|
||||
// the same code could be use to support the CLI app.
|
||||
|
||||
Reference in New Issue
Block a user