add llama.cpp

2024-11-21 17:56:53 +02:00 · 2024-08-20 18:31:26 +08:00 · 2024-08-20 18:31:26 +08:00 · ef1887567c
commit ef1887567c
parent 60f8d9f257
3 changed files with 31 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -404,6 +404,7 @@ A collection of delicious docker recipes.
 - [x] ipfs/kubo
 - [x] heartexlabs/label-studio
 - [x] martialblog/limesurvey
+- [x] ghcr.io/ggerganov/llama.cpp
 - [x] lldap/lldap
 - [x] mailhog/mailhog
 - [x] linuxserver/mastodon
--- a/llama.cpp/README.md
+++ b/llama.cpp/README.md
@ -0,0 +1,21 @@
+llama.cpp
+=========
+
+[llama.cpp][1] is to enable LLM inference with minimal setup and
+state-of-the-art performance on a wide variety of hardware - locally and in the
+cloud.
+
+```bash
+$ mkdir -p data
+
+$ wget -P data https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q2_K.gguf
+
+$ docker compose up -d
+
+$ curl --request POST \
+    --url http://localhost:8080/completion \
+    --header "Content-Type: application/json" \
+    --data '{"prompt": "Building a website can be done in 10 simple steps:","n_predict": 128}'
+```
+
+[1]: https://github.com/ggerganov/llama.cpp
--- a/llama.cpp/docker-compose.yml
+++ b/llama.cpp/docker-compose.yml
@ -0,0 +1,9 @@
+services:
+  llama.cpp:
+    image: ghcr.io/ggerganov/llama.cpp:server
+    command: -m /models/tinyllama-1.1b-chat-v1.0.Q2_K.gguf -c 512 --host 0.0.0.0 --port 8080
+    ports:
+    - "8080:8080"
+    volumes:
+    - ./data:/models
+    restart: unless-stopped