1
0
mirror of https://github.com/laurent22/joplin.git synced 2026-03-12 10:00:05 +02:00

Compare commits

...

3 Commits

Author SHA1 Message Date
Laurent Cozic
a747996d8c update 2026-02-18 19:11:03 +00:00
Laurent Cozic
0877d6e9cd update 2026-02-18 17:56:09 +00:00
Laurent Cozic
66aa47a5ca update 2026-02-18 17:07:42 +00:00
12 changed files with 268 additions and 16 deletions

View File

@@ -214,6 +214,7 @@ module.exports = {
'packages/tools/**',
'packages/app-mobile/tools/**',
'packages/app-desktop/tools/**',
'packages/transcribe/src/tools/**',
],
'rules': {
'no-console': 'off',

View File

@@ -250,4 +250,8 @@ mrjo
codegen
analyzed
Perfetto
appmodules
appmodules
cuda
CUDA
mtmd
gguf

View File

@@ -5,4 +5,5 @@ images/*
models/
*.sqlite3
*.sqlite-journal
.env
.env
htr-metal/

View File

@@ -0,0 +1,24 @@
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04
RUN apt-get update && apt-get install -y \
wget \
unzip \
libgomp1 \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
RUN wget -q https://github.com/ggml-org/llama.cpp/releases/download/b5449/llama-b5449-bin-ubuntu-x64-cuda-12.zip
RUN mkdir /models/
RUN wget -q -O /models/Model-7.6B-Q4_K_M.gguf https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main/Model-7.6B-Q4_K_M.gguf
RUN wget -q -O /models/mmproj-model-f16.gguf https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main/mmproj-model-f16.gguf
WORKDIR /app
RUN unzip llama-b5449-bin-ubuntu-x64-cuda-12.zip
WORKDIR /app/build/bin
# Create an entrypoint script
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]

View File

@@ -15,6 +15,84 @@ docker run --env-file .env-transcribe -p 4567:4567 \
transcribe
```
## GPU Acceleration
By default the server runs inference on CPU. Set `HTR_CLI_GPU_TYPE` in your `.env` to enable GPU acceleration.
| Value | Hardware | Requires |
|-------|----------|---------|
| `none` | CPU (default) | Nothing extra |
| `cuda` | NVIDIA GPU | NVIDIA Docker runtime (`nvidia-container-toolkit`) |
| `metal` | Apple Silicon | Native binary (no Docker for inference) |
### NVIDIA CUDA
1. Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) on the host.
2. Build the GPU Docker image:
```shell
docker build -f packages/transcribe/Dockerfile.htr-cli-gpu -t joplin/htr-cli-gpu:latest .
```
3. Add these variables to your `.env`:
```env
HTR_CLI_GPU_TYPE=cuda
HTR_CLI_DOCKER_IMAGE=joplin/htr-cli-gpu:latest
```
4. Start the transcribe container with `--gpus all`:
```shell
docker run --env-file .env-transcribe -p 4567:4567 \
--gpus all \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ./packages/transcribe/images:/app/packages/transcribe/images \
transcribe
```
### Apple Silicon (Metal)
Metal GPU access is not available inside Docker containers on macOS, so the inference binary runs natively on the host instead of in a container.
1. Run the setup script to download the native binary and model files (from `packages/transcribe`):
```shell
yarn setupMetal
```
An optional `--install-dir` argument controls where files are downloaded (default: `./htr-metal`):
```shell
yarn setupMetal --install-dir /opt/htr-metal
```
The script prints the exact `.env` lines to add, for example:
```env
HTR_CLI_GPU_TYPE=metal
HTR_CLI_BINARY_PATH=/path/to/htr-metal/bin/llama-mtmd-cli
HTR_CLI_MODELS_FOLDER=/path/to/htr-metal/models
```
2. Add those lines to your `.env`.
3. Start the server normally — `HTR_CLI_IMAGES_FOLDER` still needs to be set and accessible to the native binary.
Here's a sample `.env` file for testing:
```ini
# Copy these lines from the output of `yarn setupMetal`
HTR_CLI_GPU_TYPE=metal
HTR_CLI_BINARY_PATH=
HTR_CLI_MODELS_FOLDER=
HTR_CLI_IMAGES_FOLDER=/path/to/images
API_KEY=test-key
QUEUE_DRIVER=sqlite
QUEUE_DATABASE_NAME=./queue.sqlite3
```
## Using Docker Compose
The minimal configuration is provided in `.env-sample` and `docker-compose.server.yml`.

View File

@@ -6,6 +6,7 @@
"rebuild": "yarn clean && yarn build && yarn tsc",
"build": "gulp build",
"start": "node dist/src/api/app.js",
"setupMetal": "node dist/src/tools/setupMetal.js",
"tsc": "tsc --project tsconfig.json",
"test": "jest --verbose=false",
"test-all": "TRANSCRIBE_RUN_ALL=1 jest --verbose=false",

View File

@@ -34,7 +34,13 @@ const init = async (logger: LoggerWrapper) => {
app.context.queue = queue;
app.context.storage = fileStorage;
const htrCli = new HtrCli(envVariables.HTR_CLI_DOCKER_IMAGE, envVariables.HTR_CLI_IMAGES_FOLDER);
const htrCli = new HtrCli({
htrCliDockerImage: envVariables.HTR_CLI_DOCKER_IMAGE,
htrCliImagesFolder: envVariables.HTR_CLI_IMAGES_FOLDER,
gpuType: envVariables.HTR_CLI_GPU_TYPE as import('../core/HtrCli').GpuType,
binaryPath: envVariables.HTR_CLI_BINARY_PATH,
modelsFolder: envVariables.HTR_CLI_MODELS_FOLDER,
});
const jobProcessor = new JobProcessor(queue, htrCli, fileStorage);
@@ -46,6 +52,10 @@ const init = async (logger: LoggerWrapper) => {
const checkServerConfigurations = (envVariables: EnvVariables) => {
if (!envVariables.API_KEY) throw Error('API_KEY environment variable not set.');
if (!envVariables.HTR_CLI_IMAGES_FOLDER) throw Error('HTR_CLI_IMAGES_FOLDER environment variable not set. This should point to a folder where images will be stored.');
if (envVariables.HTR_CLI_GPU_TYPE === 'metal') {
if (!envVariables.HTR_CLI_BINARY_PATH) throw Error('HTR_CLI_BINARY_PATH environment variable not set. This should point to the native llama-mtmd-cli binary for Metal GPU mode.');
if (!envVariables.HTR_CLI_MODELS_FOLDER) throw Error('HTR_CLI_MODELS_FOLDER environment variable not set. This should point to the folder containing the model files for Metal GPU mode.');
}
};
const main = async () => {

View File

@@ -2,7 +2,7 @@ import { readFile } from 'fs-extra';
import HtrCli from './HtrCli';
describe('HtrCli', () => {
const dt = new HtrCli('', '');
const dt = new HtrCli({ htrCliDockerImage: '', htrCliImagesFolder: '', gpuType: 'none' });
it('should parse multiline result', async () => {
const testCase = await readFile('./test-cases/1.txt');
const result = dt.cleanUpResult(testCase.toString());

View File

@@ -4,26 +4,48 @@ import { WorkHandler } from '../types';
const logger = Logger.create('HtrCli');
export type GpuType = 'none' | 'cuda' | 'metal';
const systemPrompt = 'SYSTEM: you are an agent of a OCR system. Your job is to be concise and correct. You should NEVER deviate from the content of the image. You should NEVER add any context or new information. Your only job should be to transcribe the text presented in the image as text without anything new information. The output for it should be inside triple backticks like: ```{{example}}```. If you find no text, output ``````.. Your turn:';
export interface HtrCliOptions {
htrCliDockerImage: string;
htrCliImagesFolder: string;
gpuType: GpuType;
// Required when gpuType is 'metal'
binaryPath?: string;
modelsFolder?: string;
}
export default class HtrCli implements WorkHandler {
private htrCliDockerImage: string;
private htrCliImagesFolder: string;
private options: HtrCliOptions;
public constructor(htrCliDockerImage: string, htrCliImagesFolder: string) {
this.htrCliDockerImage = htrCliDockerImage;
this.htrCliImagesFolder = htrCliImagesFolder;
public constructor(options: HtrCliOptions) {
this.options = options;
}
public async init() {
if (this.options.gpuType === 'metal') {
logger.info('Metal GPU mode: skipping Docker image pull (native binary)');
return;
}
logger.info('Loading');
const result = await execCommand(['docker', 'pull', this.htrCliDockerImage], { quiet: true });
const result = await execCommand(['docker', 'pull', this.options.htrCliDockerImage], { quiet: true });
logger.info('Finished loading: ', result);
}
public async run(imageName: string) {
const command = ['docker', 'run', '--rm', '-t', '-v', `${this.htrCliImagesFolder}:/images`, this.htrCliDockerImage, imageName];
logger.info('Running transcription...');
let command: string[];
if (this.options.gpuType === 'metal') {
command = this.buildMetalCommand(imageName);
} else {
command = this.buildDockerCommand(imageName);
}
logger.info(`Command: ${commandToString(command[0], command.slice(1))}`);
const result = await execCommand(command, { quiet: true });
@@ -31,6 +53,27 @@ export default class HtrCli implements WorkHandler {
return this.cleanUpResult(result);
}
private buildDockerCommand(imageName: string): string[] {
const gpuFlags = this.options.gpuType === 'cuda' ? ['--gpus', 'all'] : [];
return ['docker', 'run', '--rm', '-t', ...gpuFlags, '-v', `${this.options.htrCliImagesFolder}:/images`, this.options.htrCliDockerImage, imageName];
}
private buildMetalCommand(imageName: string): string[] {
const { binaryPath = '', modelsFolder = '', htrCliImagesFolder } = this.options;
return [
binaryPath,
'-m', `${modelsFolder}/Model-7.6B-Q4_K_M.gguf`,
'--mmproj', `${modelsFolder}/mmproj-model-f16.gguf`,
'-c', '4096',
'--temp', '0.05',
'--top-p', '0.8',
'--top-k', '100',
'--repeat-penalty', '1.05',
'--image', `${htrCliImagesFolder}/${imageName}`,
'-p', systemPrompt,
];
}
public cleanUpResult(transcriptionAndLogs: string) {
const s1 = transcriptionAndLogs.split(/image decoded.*/);
// Before the last `image decoded` line it is all logs generated by the transcription tool

View File

@@ -8,6 +8,9 @@ export const defaultEnvValues: EnvVariables = {
QUEUE_MAINTENANCE_INTERVAL: 60 * Second,
HTR_CLI_DOCKER_IMAGE: 'joplin/htr-cli:latest',
HTR_CLI_IMAGES_FOLDER: '',
HTR_CLI_GPU_TYPE: 'none', // 'none' | 'cuda' | 'metal'
HTR_CLI_BINARY_PATH: '', // Path to native llama-mtmd-cli binary (required for metal)
HTR_CLI_MODELS_FOLDER: '', // Path to models directory (required for metal)
QUEUE_DRIVER: 'pg', // 'sqlite'
QUEUE_DATABASE_PASSWORD: '',
QUEUE_DATABASE_NAME: '',
@@ -27,6 +30,9 @@ export interface EnvVariables {
QUEUE_MAINTENANCE_INTERVAL: number;
HTR_CLI_DOCKER_IMAGE: string;
HTR_CLI_IMAGES_FOLDER: string;
HTR_CLI_GPU_TYPE: string;
HTR_CLI_BINARY_PATH: string;
HTR_CLI_MODELS_FOLDER: string;
QUEUE_DRIVER: string;
QUEUE_DATABASE_PASSWORD: string;
QUEUE_DATABASE_NAME: string;

View File

@@ -0,0 +1,84 @@
// Downloads the native llama.cpp binary and model files required for Apple Silicon (Metal) GPU mode.
// Run once to set up the Metal environment, then configure .env accordingly.
//
// Usage:
// npm run setup-metal [-- --install-dir ./htr-metal]
import { fetchWithRetry } from '@joplin/utils/net';
import { execCommand } from '@joplin/utils';
import * as fs from 'fs-extra';
import { join, resolve } from 'path';
import { createWriteStream } from 'fs';
import { pipeline } from 'stream/promises';
const LLAMA_RELEASE = 'b5449';
const LLAMA_ZIP = `llama-${LLAMA_RELEASE}-bin-macos-arm64.zip`;
const LLAMA_URL = `https://github.com/ggml-org/llama.cpp/releases/download/${LLAMA_RELEASE}/${LLAMA_ZIP}`;
const MODEL_BASE_URL = 'https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main';
const downloadFile = async (url: string, destPath: string) => {
console.info(`Downloading ${url} ...`);
const response = await fetchWithRetry(url, { retry: 3, pause: 2000 });
if (!response || !response.ok) throw new Error(`Failed to download ${url}: ${response?.status} ${response?.statusText}`);
await pipeline(response.body, createWriteStream(destPath));
};
const findBinary = async (dir: string, name: string): Promise<string> => {
const entries = await fs.readdir(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = join(dir, entry.name);
if (entry.isDirectory()) {
const found = await findBinary(fullPath, name).catch(() => '');
if (found) return found;
} else if (entry.name === name) {
return fullPath;
}
}
return '';
};
const main = async () => {
const args = process.argv.slice(2);
const installDirArg = args.indexOf('--install-dir');
const installDir = resolve(installDirArg >= 0 ? args[installDirArg + 1] : './htr-metal');
const binDir = join(installDir, 'bin');
const modelsDir = join(installDir, 'models');
await fs.mkdirp(binDir);
await fs.mkdirp(modelsDir);
// Download and extract llama.cpp macOS ARM binary
const zipPath = join(installDir, LLAMA_ZIP);
console.info(`\nDownloading llama.cpp macOS ARM binary (${LLAMA_RELEASE})...`);
await downloadFile(LLAMA_URL, zipPath);
console.info('Extracting...');
await execCommand(['unzip', '-o', zipPath, '-d', binDir]);
await fs.remove(zipPath);
// Download model files
console.info('\nDownloading model files...');
await downloadFile(
`${MODEL_BASE_URL}/Model-7.6B-Q4_K_M.gguf`,
join(modelsDir, 'Model-7.6B-Q4_K_M.gguf'),
);
await downloadFile(
`${MODEL_BASE_URL}/mmproj-model-f16.gguf`,
join(modelsDir, 'mmproj-model-f16.gguf'),
);
// Find the binary
const binaryPath = await findBinary(binDir, 'llama-mtmd-cli');
if (!binaryPath) throw new Error('llama-mtmd-cli binary not found after extraction.');
await fs.chmod(binaryPath, 0o755);
console.info('\nSetup complete. Add these variables to your .env file:\n');
console.info('HTR_CLI_GPU_TYPE=metal');
console.info(`HTR_CLI_BINARY_PATH=${resolve(binaryPath)}`);
console.info(`HTR_CLI_MODELS_FOLDER=${resolve(modelsDir)}`);
};
main().catch(error => {
console.error('Fatal error:', error);
process.exit(1);
});

View File

@@ -36,7 +36,7 @@ describe('JobProcessor', () => {
skipByDefault('should execute work on job in the queue', async () => {
jest.useRealTimers();
const tw = new JobProcessor(queue, new HtrCli('joplin/htr-cli:latest', join(process.cwd(), 'images')), new FileStorage(), 1000);
const tw = new JobProcessor(queue, new HtrCli({ htrCliDockerImage: 'joplin/htr-cli:latest', htrCliImagesFolder: join(process.cwd(), 'images'), gpuType: 'none' }), new FileStorage(), 1000);
await tw.init();
await copy(join('images', 'htr_sample.png'), join('images', 'htr_sample_copy.png'));
@@ -59,7 +59,7 @@ describe('JobProcessor', () => {
skipByDefault('should execute work on job in the queue even if one fails', async () => {
jest.useRealTimers();
const tw = new JobProcessor(queue, new HtrCli('joplin/htr-cli:latest', join(process.cwd(), 'images')), new FileStorage(), 1000);
const tw = new JobProcessor(queue, new HtrCli({ htrCliDockerImage: 'joplin/htr-cli:latest', htrCliImagesFolder: join(process.cwd(), 'images'), gpuType: 'none' }), new FileStorage(), 1000);
await tw.init();
await copy(join('images', 'htr_sample.png'), join('images', 'htr_sample_copy_2.png'));
@@ -84,7 +84,7 @@ describe('JobProcessor', () => {
skipByDefault('should remove file sent to queue if job is completed', async () => {
jest.useRealTimers();
const tw = new JobProcessor(queue, new HtrCli('joplin/htr-cli:latest', join(process.cwd(), 'images')), new FileStorage(), 1000);
const tw = new JobProcessor(queue, new HtrCli({ htrCliDockerImage: 'joplin/htr-cli:latest', htrCliImagesFolder: join(process.cwd(), 'images'), gpuType: 'none' }), new FileStorage(), 1000);
await tw.init();
const imagePath = join('images', 'htr_sample_copy_3.png');
await copy(join('images', 'htr_sample.png'), imagePath);
@@ -112,7 +112,7 @@ describe('JobProcessor', () => {
const fileStorage = new FileStorage();
const mockedFileStorageRemove = jest.fn();
fileStorage.remove = mockedFileStorageRemove;
const tw = new JobProcessor(queue, new HtrCli('joplin/htr-cli:latest', join(process.cwd(), 'images')), fileStorage, 1000);
const tw = new JobProcessor(queue, new HtrCli({ htrCliDockerImage: 'joplin/htr-cli:latest', htrCliImagesFolder: join(process.cwd(), 'images'), gpuType: 'none' }), fileStorage, 1000);
await tw.init();
// file doesn't exist to force a fail, but the call to remove the file should still exist