1
0
mirror of https://github.com/immich-app/immich.git synced 2025-08-08 23:07:06 +02:00

refactor: migrate person repository to kysely (#15242)

* refactor: migrate person repository to kysely

* `asVector` begone

* linting

* fix metadata faces

* update test

---------

Co-authored-by: Alex <alex.tran1502@gmail.com>
Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com>
This commit is contained in:
Daniel Dietzler
2025-01-21 19:12:28 +01:00
committed by GitHub
parent 0c152366ec
commit 332a865ce6
29 changed files with 715 additions and 747 deletions

View File

@ -10,7 +10,7 @@ from tokenizers import Encoding, Tokenizer
from app.config import log
from app.models.base import InferenceModel
from app.models.transforms import clean_text
from app.models.transforms import clean_text, serialize_np_array
from app.schemas import ModelSession, ModelTask, ModelType
@ -18,9 +18,9 @@ class BaseCLIPTextualEncoder(InferenceModel):
depends = []
identity = (ModelType.TEXTUAL, ModelTask.SEARCH)
def _predict(self, inputs: str, **kwargs: Any) -> NDArray[np.float32]:
def _predict(self, inputs: str, **kwargs: Any) -> str:
res: NDArray[np.float32] = self.session.run(None, self.tokenize(inputs))[0][0]
return res
return serialize_np_array(res)
def _load(self) -> ModelSession:
session = super()._load()

View File

@ -10,7 +10,15 @@ from PIL import Image
from app.config import log
from app.models.base import InferenceModel
from app.models.transforms import crop_pil, decode_pil, get_pil_resampling, normalize, resize_pil, to_numpy
from app.models.transforms import (
crop_pil,
decode_pil,
get_pil_resampling,
normalize,
resize_pil,
serialize_np_array,
to_numpy,
)
from app.schemas import ModelSession, ModelTask, ModelType
@ -18,10 +26,10 @@ class BaseCLIPVisualEncoder(InferenceModel):
depends = []
identity = (ModelType.VISUAL, ModelTask.SEARCH)
def _predict(self, inputs: Image.Image | bytes, **kwargs: Any) -> NDArray[np.float32]:
def _predict(self, inputs: Image.Image | bytes, **kwargs: Any) -> str:
image = decode_pil(inputs)
res: NDArray[np.float32] = self.session.run(None, self.transform(image))[0][0]
return res
return serialize_np_array(res)
@abstractmethod
def transform(self, image: Image.Image) -> dict[str, NDArray[np.float32]]:

View File

@ -12,7 +12,7 @@ from PIL import Image
from app.config import log, settings
from app.models.base import InferenceModel
from app.models.transforms import decode_cv2
from app.models.transforms import decode_cv2, serialize_np_array
from app.schemas import FaceDetectionOutput, FacialRecognitionOutput, ModelFormat, ModelSession, ModelTask, ModelType
@ -61,7 +61,7 @@ class FaceRecognizer(InferenceModel):
return [
{
"boundingBox": {"x1": x1, "y1": y1, "x2": x2, "y2": y2},
"embedding": embedding,
"embedding": serialize_np_array(embedding),
"score": score,
}
for (x1, y1, x2, y2), embedding, score in zip(faces["boxes"], embeddings, faces["scores"])

View File

@ -4,6 +4,7 @@ from typing import IO
import cv2
import numpy as np
import orjson
from numpy.typing import NDArray
from PIL import Image
@ -69,3 +70,9 @@ def clean_text(text: str, canonicalize: bool = False) -> str:
if canonicalize:
text = text.translate(_PUNCTUATION_TRANS).lower()
return text
# this allows the client to use the array as a string without deserializing only to serialize back to a string
# TODO: use this in a less invasive way
def serialize_np_array(arr: NDArray[np.float32]) -> str:
return orjson.dumps(arr, option=orjson.OPT_SERIALIZE_NUMPY).decode()