chore(ml): move to fastAPI (#2336)

2024-11-24 08:52:28 +02:00 · 2023-04-26 05:39:24 -05:00 · 2023-04-26 05:39:24 -05:00 · 7e965cb6d4
commit 7e965cb6d4
parent 6631b286c1
3 changed files with 73 additions and 70 deletions
--- a/machine-learning/Dockerfile
+++ b/machine-learning/Dockerfile
@ -6,9 +6,10 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
 RUN python -m venv /opt/venv
 RUN /opt/venv/bin/pip install --pre torch  -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
-RUN /opt/venv/bin/pip install transformers tqdm numpy scikit-learn scipy nltk sentencepiece flask Pillow gunicorn
+RUN /opt/venv/bin/pip install transformers tqdm numpy scikit-learn scipy nltk sentencepiece fastapi Pillow uvicorn[standard]
 RUN /opt/venv/bin/pip install --no-deps sentence-transformers
 FROM python:3.10-slim
 ENV NODE_ENV=production
@ -23,5 +24,5 @@ ENV TRANSFORMERS_CACHE=/cache \
 WORKDIR /usr/src/app
 COPY . .
-
+ENV PYTHONPATH=`pwd`
-CMD ["gunicorn", "src.main:server"]
+CMD ["python", "main.py"]
--- a/machine-learning/gunicorn.conf.py
+++ b/machine-learning/gunicorn.conf.py
@ -1,29 +0,0 @@
 """
 Gunicorn configuration options.
 https://docs.gunicorn.org/en/stable/settings.html
 """
 import os
 # Set the bind address based on the env
 port = os.getenv("MACHINE_LEARNING_PORT") or "3003"
 listen_ip = os.getenv("MACHINE_LEARNING_IP") or "0.0.0.0"
 bind = [f"{listen_ip}:{port}"]
 # Preload the Flask app / models etc. before starting the server
 preload_app = True
 # Logging settings - log to stdout and set log level
 accesslog = "-"
 loglevel = os.getenv("MACHINE_LEARNING_LOG_LEVEL") or "info"
 # Worker settings
 # ----------------------
 # It is important these are chosen carefully as per
 # https://pythonspeed.com/articles/gunicorn-in-docker/
 # Otherwise we get workers failing to respond to heartbeat checks,
 # especially as requests take a long time to complete.
 workers = 2
 threads = 4
 worker_tmp_dir = "/dev/shm"
 timeout = 60
--- a/machine-learning/src/main.py
+++ b/machine-learning/src/main.py
@ -1,58 +1,77 @@
 import os
 from flask import Flask, request
 from transformers import pipeline
 from sentence_transformers import SentenceTransformer, util
 from PIL import Image
 from fastapi import FastAPI
 import uvicorn
 import os
 from pydantic import BaseModel
 class MlRequestBody(BaseModel):
    thumbnailPath: str
 class ClipRequestBody(BaseModel):
    text: str
 is_dev = os.getenv('NODE_ENV') == 'development'
 server_port = os.getenv('MACHINE_LEARNING_PORT', 3003)
 server_host = os.getenv('MACHINE_LEARNING_HOST', '0.0.0.0')
-classification_model = os.getenv('MACHINE_LEARNING_CLASSIFICATION_MODEL', 'microsoft/resnet-50')
+app = FastAPI()
 """
 Model Initialization
 """
 classification_model = os.getenv(
    'MACHINE_LEARNING_CLASSIFICATION_MODEL', 'microsoft/resnet-50')
 object_model = os.getenv('MACHINE_LEARNING_OBJECT_MODEL', 'hustvl/yolos-tiny')
-clip_image_model = os.getenv('MACHINE_LEARNING_CLIP_IMAGE_MODEL', 'clip-ViT-B-32')
+clip_image_model = os.getenv(
-clip_text_model = os.getenv('MACHINE_LEARNING_CLIP_TEXT_MODEL', 'clip-ViT-B-32')
+    'MACHINE_LEARNING_CLIP_IMAGE_MODEL', 'clip-ViT-B-32')
 clip_text_model = os.getenv(
    'MACHINE_LEARNING_CLIP_TEXT_MODEL', 'clip-ViT-B-32')
 _model_cache = {}
 def _get_model(model, task=None):
  global _model_cache
  key = '|'.join([model, str(task)])
  if key not in _model_cache:
    if task:
      _model_cache[key] = pipeline(model=model, task=task)
    else:
      _model_cache[key] = SentenceTransformer(model)
  return _model_cache[key]
 server = Flask(__name__)
-@server.route("/ping")
+@app.get("/")
 async def root():
    return {"message": "Immich ML"}
@app.get("/ping")
 def ping():
    return "pong"
-@server.route("/object-detection/detect-object", methods=['POST'])
+
-def object_detection():
+@app.post("/object-detection/detect-object", status_code=200)
 def object_detection(payload: MlRequestBody):
    model = _get_model(object_model, 'object-detection')
-    assetPath = request.json['thumbnailPath']
+    assetPath = payload.thumbnailPath
-    return run_engine(model, assetPath), 200
+    return run_engine(model, assetPath)
-@server.route("/image-classifier/tag-image", methods=['POST'])
+
-def image_classification():
+@app.post("/image-classifier/tag-image", status_code=200)
 def image_classification(payload: MlRequestBody):
    model = _get_model(classification_model, 'image-classification')
-    assetPath = request.json['thumbnailPath']
+    assetPath = payload.thumbnailPath
-    return run_engine(model, assetPath), 200
+    return run_engine(model, assetPath)
-@server.route("/sentence-transformer/encode-image", methods=['POST'])
+
-def clip_encode_image():
+@app.post("/sentence-transformer/encode-image", status_code=200)
 def clip_encode_image(payload: MlRequestBody):
    model = _get_model(clip_image_model)
-    assetPath = request.json['thumbnailPath']
+    assetPath = payload.thumbnailPath
-    return model.encode(Image.open(assetPath)).tolist(), 200
+    return model.encode(Image.open(assetPath)).tolist()
-@server.route("/sentence-transformer/encode-text", methods=['POST'])
+
-def clip_encode_text():
+@app.post("/sentence-transformer/encode-text", status_code=200)
 def clip_encode_text(payload: ClipRequestBody):
    model = _get_model(clip_text_model)
-    text = request.json['text']
+    text = payload.text
-    return model.encode(text).tolist(), 200
+    return model.encode(text).tolist()
 def run_engine(engine, path):
    result = []
@ -69,5 +88,17 @@ def run_engine(engine, path):
    return result
 def _get_model(model, task=None):
    global _model_cache
    key = '|'.join([model, str(task)])
    if key not in _model_cache:
        if task:
            _model_cache[key] = pipeline(model=model, task=task)
        else:
            _model_cache[key] = SentenceTransformer(model)
    return _model_cache[key]
 if __name__ == "__main__":
-    server.run(debug=is_dev, host=server_host, port=server_port)
+    uvicorn.run("main:app", host=server_host,
                port=int(server_port), reload=is_dev, workers=1)