joplin/Dockerfile.transcribe.gpu

# GPU-enabled variant of the Transcribe server image (NVIDIA / CUDA).
#
# This Dockerfile is the same as Dockerfile.transcribe, but:
#   - The base image is nvidia/cuda (so the CUDA runtime libs are present).
#   - Node.js 24 is installed on top of the CUDA base image.
#   - The CUDA build of llama-mtmd-cli is copied from the official llama.cpp
#     CUDA image (ghcr.io/ggml-org/llama.cpp:full-cuda-b5449). We use a
#     multi-stage COPY here because llama.cpp does not publish a prebuilt
#     Linux/CUDA zip on its release page — only Windows CUDA zips and the
#     official Docker images.
#
# To run with GPU:
#   docker run --gpus all -e HTR_CLI_GPU_LAYERS=9999 \
#       --rm --env-file .env-transcribe -p 4567:4567 \
#       -v ./data:/data joplin/transcribe:gpu-latest
#
# Requires the NVIDIA Container Toolkit on the host.
# Apple Silicon / Metal is not supported through Docker (Docker Desktop on
# macOS cannot expose the GPU to containers) — see packages/transcribe/README.md
# for the native-run instructions.

# Stage 1: pull the CUDA-built llama.cpp binaries from the official image.
FROM ghcr.io/ggml-org/llama.cpp:full-cuda-b5449 AS llama-cuda

# Stage 2: our runtime image.
FROM nvidia/cuda:13.1.1-cudnn-runtime-ubuntu22.04

RUN apt-get update \
    && apt-get install -y \
    ca-certificates curl wget \
    python3 tini gnupg \
    && curl -fsSL https://deb.nodesource.com/setup_24.x | bash - \
    && apt-get install -y nodejs \
    && rm -rf /var/lib/apt/lists/*

ENV NODE_ENV=production

RUN corepack enable

# Copy the CUDA-built llama-mtmd-cli binary and its runtime shared libraries
# from the official llama.cpp CUDA image. The /app directory of that image
# contains the built binaries and their .so dependencies.
RUN mkdir -p /opt/llama/build/bin
COPY --from=llama-cuda /app/llama-mtmd-cli /opt/llama/build/bin/llama-mtmd-cli
COPY --from=llama-cuda /app/*.so /opt/llama/build/bin/
RUN chmod +x /opt/llama/build/bin/llama-mtmd-cli

# Create non-root user for security
RUN groupadd -r transcribe && useradd -r -g transcribe -m transcribe

WORKDIR /app

COPY .yarn/releases ./.yarn/releases
COPY .yarn/patches ./.yarn/patches
COPY package.json .
COPY .yarnrc.yml .
COPY yarn.lock .
COPY gulpfile.js .
COPY tsconfig.json .
COPY packages/lib ./packages/lib
COPY packages/utils ./packages/utils
COPY packages/tools ./packages/tools
COPY packages/renderer ./packages/renderer
COPY packages/htmlpack ./packages/htmlpack
COPY packages/transcribe ./packages/transcribe

# We don't want to build onenote-converter since it is not used by the server
RUN sed --in-place '/onenote-converter/d' ./packages/lib/package.json

RUN BUILD_SEQUENCIAL=1 yarn install --inline-builds \
    && yarn cache clean \
    && rm -rf .yarn/berry

# Create data directory and set permissions
RUN mkdir -p /data/images \
    && chown -R transcribe:transcribe /data

WORKDIR /app/packages/transcribe

# Switch to non-root user
USER transcribe

# Set environment variables
ENV HTR_CLI_BINARY_PATH=/opt/llama/build/bin/llama-mtmd-cli
ENV LD_LIBRARY_PATH=/opt/llama/build/bin
ENV DATA_DIR=/data
ENV QUEUE_DRIVER=sqlite

# Start the Node.js application
ENTRYPOINT ["/usr/bin/tini", "--"]
CMD ["yarn", "start"]