1
0
mirror of https://github.com/immich-app/immich.git synced 2024-11-28 09:33:27 +02:00
immich/machine-learning/app/schemas.py
Mert 2b1b43a7e4
feat(ml): composable ml (#9973)
* modularize model classes

* various fixes

* expose port

* change response

* round coordinates

* simplify preload

* update server

* simplify interface

simplify

* update tests

* composable endpoint

* cleanup

fixes

remove unnecessary interface

support text input, cleanup

* ew camelcase

* update server

server fixes

fix typing

* ml fixes

update locustfile

fixes

* cleaner response

* better repo response

* update tests

formatting and typing

rename

* undo compose change

* linting

fix type

actually fix typing

* stricter typing

fix detection-only response

no need for defaultdict

* update spec file

update api

linting

* update e2e

* unnecessary dimension

* remove commented code

* remove duplicate code

* remove unused imports

* add batch dim
2024-06-07 03:09:47 +00:00

115 lines
2.3 KiB
Python

from enum import Enum
from typing import Any, Literal, Protocol, TypedDict, TypeGuard, TypeVar
import numpy as np
import numpy.typing as npt
from pydantic import BaseModel
class StrEnum(str, Enum):
value: str
def __str__(self) -> str:
return self.value
class TextResponse(BaseModel):
__root__: str
class MessageResponse(BaseModel):
message: str
class BoundingBox(TypedDict):
x1: int
y1: int
x2: int
y2: int
class ModelTask(StrEnum):
FACIAL_RECOGNITION = "facial-recognition"
SEARCH = "clip"
class ModelType(StrEnum):
DETECTION = "detection"
RECOGNITION = "recognition"
TEXTUAL = "textual"
VISUAL = "visual"
class ModelFormat(StrEnum):
ARMNN = "armnn"
ONNX = "onnx"
class ModelSource(StrEnum):
INSIGHTFACE = "insightface"
MCLIP = "mclip"
OPENCLIP = "openclip"
ModelIdentity = tuple[ModelType, ModelTask]
class ModelSession(Protocol):
def run(
self,
output_names: list[str] | None,
input_feed: dict[str, npt.NDArray[np.float32]] | dict[str, npt.NDArray[np.int32]],
run_options: Any = None,
) -> list[npt.NDArray[np.float32]]: ...
class HasProfiling(Protocol):
profiling: dict[str, float]
class FaceDetectionOutput(TypedDict):
boxes: npt.NDArray[np.float32]
scores: npt.NDArray[np.float32]
landmarks: npt.NDArray[np.float32]
class DetectedFace(TypedDict):
boundingBox: BoundingBox
embedding: npt.NDArray[np.float32]
score: float
FacialRecognitionOutput = list[DetectedFace]
class PipelineEntry(TypedDict):
modelName: str
options: dict[str, Any]
PipelineRequest = dict[ModelTask, dict[ModelType, PipelineEntry]]
class InferenceEntry(TypedDict):
name: str
task: ModelTask
type: ModelType
options: dict[str, Any]
InferenceEntries = tuple[list[InferenceEntry], list[InferenceEntry]]
InferenceResponse = dict[ModelTask | Literal["imageHeight"] | Literal["imageWidth"], Any]
def has_profiling(obj: Any) -> TypeGuard[HasProfiling]:
return hasattr(obj, "profiling") and isinstance(obj.profiling, dict)
def is_ndarray(obj: Any, dtype: "type[np._DTypeScalar_co]") -> "TypeGuard[npt.NDArray[np._DTypeScalar_co]]":
return isinstance(obj, np.ndarray) and obj.dtype == dtype
T = TypeVar("T")