1
0
mirror of https://github.com/immich-app/immich.git synced 2025-06-15 03:30:33 +02:00

feat(ml)!: switch image classification and CLIP models to ONNX (#3809)

This commit is contained in:
Mert
2023-08-25 00:28:51 -04:00
committed by GitHub
parent 8211afb726
commit 165b91b068
14 changed files with 1617 additions and 507 deletions

View File

@ -1,4 +1,6 @@
import asyncio
import os
from concurrent.futures import ThreadPoolExecutor
from io import BytesIO
from typing import Any
@ -8,6 +10,8 @@ import uvicorn
from fastapi import Body, Depends, FastAPI
from PIL import Image
from app.models.base import InferenceModel
from .config import settings
from .models.cache import ModelCache
from .schemas import (
@ -25,19 +29,21 @@ app = FastAPI()
def init_state() -> None:
app.state.model_cache = ModelCache(ttl=settings.model_ttl, revalidate=settings.model_ttl > 0)
# asyncio is a huge bottleneck for performance, so we use a thread pool to run blocking code
app.state.thread_pool = ThreadPoolExecutor(settings.request_threads)
async def load_models() -> None:
models = [
(settings.classification_model, ModelType.IMAGE_CLASSIFICATION),
(settings.clip_image_model, ModelType.CLIP),
(settings.clip_text_model, ModelType.CLIP),
(settings.facial_recognition_model, ModelType.FACIAL_RECOGNITION),
models: list[tuple[str, ModelType, dict[str, Any]]] = [
(settings.classification_model, ModelType.IMAGE_CLASSIFICATION, {}),
(settings.clip_image_model, ModelType.CLIP, {"mode": "vision"}),
(settings.clip_text_model, ModelType.CLIP, {"mode": "text"}),
(settings.facial_recognition_model, ModelType.FACIAL_RECOGNITION, {}),
]
# Get all models
for model_name, model_type in models:
await app.state.model_cache.get(model_name, model_type, eager=settings.eager_startup)
for model_name, model_type, model_kwargs in models:
await app.state.model_cache.get(model_name, model_type, eager=settings.eager_startup, **model_kwargs)
@app.on_event("startup")
@ -46,11 +52,16 @@ async def startup_event() -> None:
await load_models()
@app.on_event("shutdown")
async def shutdown_event() -> None:
app.state.thread_pool.shutdown()
def dep_pil_image(byte_image: bytes = Body(...)) -> Image.Image:
return Image.open(BytesIO(byte_image))
def dep_cv_image(byte_image: bytes = Body(...)) -> cv2.Mat:
def dep_cv_image(byte_image: bytes = Body(...)) -> np.ndarray[int, np.dtype[Any]]:
byte_image_np = np.frombuffer(byte_image, np.uint8)
return cv2.imdecode(byte_image_np, cv2.IMREAD_COLOR)
@ -74,7 +85,7 @@ async def image_classification(
image: Image.Image = Depends(dep_pil_image),
) -> list[str]:
model = await app.state.model_cache.get(settings.classification_model, ModelType.IMAGE_CLASSIFICATION)
labels = model.predict(image)
labels = await predict(model, image)
return labels
@ -86,8 +97,8 @@ async def image_classification(
async def clip_encode_image(
image: Image.Image = Depends(dep_pil_image),
) -> list[float]:
model = await app.state.model_cache.get(settings.clip_image_model, ModelType.CLIP)
embedding = model.predict(image)
model = await app.state.model_cache.get(settings.clip_image_model, ModelType.CLIP, mode="vision")
embedding = await predict(model, image)
return embedding
@ -97,8 +108,8 @@ async def clip_encode_image(
status_code=200,
)
async def clip_encode_text(payload: TextModelRequest) -> list[float]:
model = await app.state.model_cache.get(settings.clip_text_model, ModelType.CLIP)
embedding = model.predict(payload.text)
model = await app.state.model_cache.get(settings.clip_text_model, ModelType.CLIP, mode="text")
embedding = await predict(model, payload.text)
return embedding
@ -111,10 +122,14 @@ async def facial_recognition(
image: cv2.Mat = Depends(dep_cv_image),
) -> list[dict[str, Any]]:
model = await app.state.model_cache.get(settings.facial_recognition_model, ModelType.FACIAL_RECOGNITION)
faces = model.predict(image)
faces = await predict(model, image)
return faces
async def predict(model: InferenceModel, inputs: Any) -> Any:
return await asyncio.get_running_loop().run_in_executor(app.state.thread_pool, model.predict, inputs)
if __name__ == "__main__":
is_dev = os.getenv("NODE_ENV") == "development"
uvicorn.run(