1
0
mirror of https://github.com/laurent22/joplin.git synced 2026-06-03 16:53:41 +02:00
Files
joplin/Dockerfile.transcribe.gpu
renovate[bot] 4895882843 chore(deps): update nvidia/cuda docker tag to v13.1.1 (#15511)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2026-05-22 21:59:29 +01:00

92 lines
3.1 KiB
Docker

# GPU-enabled variant of the Transcribe server image (NVIDIA / CUDA).
#
# This Dockerfile is the same as Dockerfile.transcribe, but:
# - The base image is nvidia/cuda (so the CUDA runtime libs are present).
# - Node.js 24 is installed on top of the CUDA base image.
# - The CUDA build of llama-mtmd-cli is copied from the official llama.cpp
# CUDA image (ghcr.io/ggml-org/llama.cpp:full-cuda-b5449). We use a
# multi-stage COPY here because llama.cpp does not publish a prebuilt
# Linux/CUDA zip on its release page — only Windows CUDA zips and the
# official Docker images.
#
# To run with GPU:
# docker run --gpus all -e HTR_CLI_GPU_LAYERS=9999 \
# --rm --env-file .env-transcribe -p 4567:4567 \
# -v ./data:/data joplin/transcribe:gpu-latest
#
# Requires the NVIDIA Container Toolkit on the host.
# Apple Silicon / Metal is not supported through Docker (Docker Desktop on
# macOS cannot expose the GPU to containers) — see packages/transcribe/README.md
# for the native-run instructions.
# Stage 1: pull the CUDA-built llama.cpp binaries from the official image.
FROM ghcr.io/ggml-org/llama.cpp:full-cuda-b5449 AS llama-cuda
# Stage 2: our runtime image.
FROM nvidia/cuda:13.1.1-cudnn-runtime-ubuntu22.04
RUN apt-get update \
&& apt-get install -y \
ca-certificates curl wget \
python3 tini gnupg \
&& curl -fsSL https://deb.nodesource.com/setup_24.x | bash - \
&& apt-get install -y nodejs \
&& rm -rf /var/lib/apt/lists/*
ENV NODE_ENV=production
RUN corepack enable
# Copy the CUDA-built llama-mtmd-cli binary and its runtime shared libraries
# from the official llama.cpp CUDA image. The /app directory of that image
# contains the built binaries and their .so dependencies.
RUN mkdir -p /opt/llama/build/bin
COPY --from=llama-cuda /app/llama-mtmd-cli /opt/llama/build/bin/llama-mtmd-cli
COPY --from=llama-cuda /app/*.so /opt/llama/build/bin/
RUN chmod +x /opt/llama/build/bin/llama-mtmd-cli
# Create non-root user for security
RUN groupadd -r transcribe && useradd -r -g transcribe -m transcribe
WORKDIR /app
COPY .yarn/releases ./.yarn/releases
COPY .yarn/patches ./.yarn/patches
COPY package.json .
COPY .yarnrc.yml .
COPY yarn.lock .
COPY gulpfile.js .
COPY tsconfig.json .
COPY packages/lib ./packages/lib
COPY packages/utils ./packages/utils
COPY packages/tools ./packages/tools
COPY packages/renderer ./packages/renderer
COPY packages/htmlpack ./packages/htmlpack
COPY packages/transcribe ./packages/transcribe
# We don't want to build onenote-converter since it is not used by the server
RUN sed --in-place '/onenote-converter/d' ./packages/lib/package.json
RUN BUILD_SEQUENCIAL=1 yarn install --inline-builds \
&& yarn cache clean \
&& rm -rf .yarn/berry
# Create data directory and set permissions
RUN mkdir -p /data/images \
&& chown -R transcribe:transcribe /data
WORKDIR /app/packages/transcribe
# Switch to non-root user
USER transcribe
# Set environment variables
ENV HTR_CLI_BINARY_PATH=/opt/llama/build/bin/llama-mtmd-cli
ENV LD_LIBRARY_PATH=/opt/llama/build/bin
ENV DATA_DIR=/data
ENV QUEUE_DRIVER=sqlite
# Start the Node.js application
ENTRYPOINT ["/usr/bin/tini", "--"]
CMD ["yarn", "start"]