1
0
mirror of https://github.com/immich-app/immich.git synced 2025-06-27 05:11:11 +02:00

refactor(ml): model sessions (#10559)

This commit is contained in:
Mert
2024-06-25 12:00:24 -04:00
committed by GitHub
parent 6538ad8de7
commit 6356c28f64
11 changed files with 529 additions and 375 deletions

View File

@ -22,129 +22,16 @@ from app.models.clip.textual import MClipTextualEncoder, OpenClipTextualEncoder
from app.models.clip.visual import OpenClipVisualEncoder
from app.models.facial_recognition.detection import FaceDetector
from app.models.facial_recognition.recognition import FaceRecognizer
from app.sessions.ann import AnnSession
from app.sessions.ort import OrtSession
from .config import Settings, log, settings
from .config import Settings, settings
from .models.base import InferenceModel
from .models.cache import ModelCache
from .schemas import ModelFormat, ModelTask, ModelType
class TestBase:
CPU_EP = ["CPUExecutionProvider"]
CUDA_EP = ["CUDAExecutionProvider", "CPUExecutionProvider"]
OV_EP = ["OpenVINOExecutionProvider", "CPUExecutionProvider"]
CUDA_EP_OUT_OF_ORDER = ["CPUExecutionProvider", "CUDAExecutionProvider"]
TRT_EP = ["TensorrtExecutionProvider", "CUDAExecutionProvider", "CPUExecutionProvider"]
@pytest.mark.providers(CPU_EP)
def test_sets_cpu_provider(self, providers: list[str]) -> None:
encoder = OpenClipTextualEncoder("ViT-B-32__openai")
assert encoder.providers == self.CPU_EP
@pytest.mark.providers(CUDA_EP)
def test_sets_cuda_provider_if_available(self, providers: list[str]) -> None:
encoder = OpenClipTextualEncoder("ViT-B-32__openai")
assert encoder.providers == self.CUDA_EP
@pytest.mark.providers(OV_EP)
def test_sets_openvino_provider_if_available(self, providers: list[str], mocker: MockerFixture) -> None:
mocked = mocker.patch("app.models.base.ort.capi._pybind_state")
mocked.get_available_openvino_device_ids.return_value = ["GPU.0", "CPU"]
encoder = OpenClipTextualEncoder("ViT-B-32__openai")
assert encoder.providers == self.OV_EP
@pytest.mark.providers(OV_EP)
def test_avoids_openvino_if_gpu_not_available(self, providers: list[str], mocker: MockerFixture) -> None:
mocked = mocker.patch("app.models.base.ort.capi._pybind_state")
mocked.get_available_openvino_device_ids.return_value = ["CPU"]
encoder = OpenClipTextualEncoder("ViT-B-32__openai")
assert encoder.providers == self.CPU_EP
@pytest.mark.providers(CUDA_EP_OUT_OF_ORDER)
def test_sets_providers_in_correct_order(self, providers: list[str]) -> None:
encoder = OpenClipTextualEncoder("ViT-B-32__openai")
assert encoder.providers == self.CUDA_EP
@pytest.mark.providers(TRT_EP)
def test_ignores_unsupported_providers(self, providers: list[str]) -> None:
encoder = OpenClipTextualEncoder("ViT-B-32__openai")
assert encoder.providers == self.CUDA_EP
def test_sets_provider_kwarg(self) -> None:
providers = ["CUDAExecutionProvider"]
encoder = OpenClipTextualEncoder("ViT-B-32__openai", providers=providers)
assert encoder.providers == providers
def test_sets_default_provider_options(self, mocker: MockerFixture) -> None:
mocked = mocker.patch("app.models.base.ort.capi._pybind_state")
mocked.get_available_openvino_device_ids.return_value = ["GPU.0", "CPU"]
encoder = OpenClipTextualEncoder(
"ViT-B-32__openai", providers=["OpenVINOExecutionProvider", "CPUExecutionProvider"]
)
assert encoder.provider_options == [
{"device_type": "GPU_FP32", "cache_dir": (encoder.cache_dir / "openvino").as_posix()},
{"arena_extend_strategy": "kSameAsRequested"},
]
def test_sets_provider_options_kwarg(self) -> None:
encoder = OpenClipTextualEncoder(
"ViT-B-32__openai",
providers=["OpenVINOExecutionProvider", "CPUExecutionProvider"],
provider_options=[],
)
assert encoder.provider_options == []
def test_sets_default_sess_options(self) -> None:
encoder = OpenClipTextualEncoder("ViT-B-32__openai")
assert encoder.sess_options.execution_mode == ort.ExecutionMode.ORT_SEQUENTIAL
assert encoder.sess_options.inter_op_num_threads == 1
assert encoder.sess_options.intra_op_num_threads == 2
assert encoder.sess_options.enable_cpu_mem_arena is False
def test_sets_default_sess_options_does_not_set_threads_if_non_cpu_and_default_threads(self) -> None:
encoder = OpenClipTextualEncoder(
"ViT-B-32__openai", providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
)
assert encoder.sess_options.inter_op_num_threads == 0
assert encoder.sess_options.intra_op_num_threads == 0
def test_sets_default_sess_options_sets_threads_if_non_cpu_and_set_threads(self, mocker: MockerFixture) -> None:
mock_settings = mocker.patch("app.models.base.settings", autospec=True)
mock_settings.model_inter_op_threads = 2
mock_settings.model_intra_op_threads = 4
encoder = OpenClipTextualEncoder(
"ViT-B-32__openai", providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
)
assert encoder.sess_options.inter_op_num_threads == 2
assert encoder.sess_options.intra_op_num_threads == 4
def test_sets_sess_options_kwarg(self) -> None:
sess_options = ort.SessionOptions()
encoder = OpenClipTextualEncoder(
"ViT-B-32__openai",
providers=["OpenVINOExecutionProvider", "CPUExecutionProvider"],
provider_options=[],
sess_options=sess_options,
)
assert sess_options is encoder.sess_options
def test_sets_default_cache_dir(self) -> None:
encoder = OpenClipTextualEncoder("ViT-B-32__openai")
@ -162,15 +49,16 @@ class TestBase:
encoder = OpenClipTextualEncoder("ViT-B-32__openai")
assert encoder.preferred_format == ModelFormat.ONNX
assert encoder.model_format == ModelFormat.ONNX
def test_sets_default_preferred_format_to_armnn_if_available(self, mocker: MockerFixture) -> None:
def test_sets_default_preferred_format_to_armnn_if_available(self, path: mock.Mock, mocker: MockerFixture) -> None:
mocker.patch.object(settings, "ann", True)
mocker.patch("ann.ann.is_available", True)
path.suffix = ".armnn"
encoder = OpenClipTextualEncoder("ViT-B-32__openai")
encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=path)
assert encoder.preferred_format == ModelFormat.ARMNN
assert encoder.model_format == ModelFormat.ARMNN
def test_sets_preferred_format_kwarg(self, mocker: MockerFixture) -> None:
mocker.patch.object(settings, "ann", False)
@ -178,7 +66,7 @@ class TestBase:
encoder = OpenClipTextualEncoder("ViT-B-32__openai", preferred_format=ModelFormat.ARMNN)
assert encoder.preferred_format == ModelFormat.ARMNN
assert encoder.model_format == ModelFormat.ARMNN
def test_casts_cache_dir_string_to_path(self) -> None:
cache_dir = "/test_cache"
@ -186,120 +74,53 @@ class TestBase:
assert encoder.cache_dir == Path(cache_dir)
def test_clear_cache(self, mocker: MockerFixture) -> None:
mock_rmtree = mocker.patch("app.models.base.rmtree", autospec=True)
mock_rmtree.avoids_symlink_attacks = True
mock_cache_dir = mocker.Mock()
mock_cache_dir.exists.return_value = True
mock_cache_dir.is_dir.return_value = True
mocker.patch("app.models.base.Path", return_value=mock_cache_dir)
info = mocker.spy(log, "info")
encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=mock_cache_dir)
def test_clear_cache(self, rmtree: mock.Mock, path: mock.Mock, info: mock.Mock) -> None:
encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=path)
encoder.clear_cache()
mock_rmtree.assert_called_once_with(encoder.cache_dir)
rmtree.assert_called_once_with(encoder.cache_dir)
info.assert_called_with(f"Cleared cache directory for model '{encoder.model_name}'.")
def test_clear_cache_warns_if_path_does_not_exist(self, mocker: MockerFixture) -> None:
mock_rmtree = mocker.patch("app.models.base.rmtree", autospec=True)
mock_rmtree.avoids_symlink_attacks = True
mock_cache_dir = mocker.Mock()
mock_cache_dir.exists.return_value = False
mock_cache_dir.is_dir.return_value = True
mocker.patch("app.models.base.Path", return_value=mock_cache_dir)
warning = mocker.spy(log, "warning")
def test_clear_cache_warns_if_path_does_not_exist(
self, rmtree: mock.Mock, path: mock.Mock, warning: mock.Mock
) -> None:
path.return_value.exists.return_value = False
encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=mock_cache_dir)
encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=path)
encoder.clear_cache()
mock_rmtree.assert_not_called()
rmtree.assert_not_called()
warning.assert_called_once()
def test_clear_cache_raises_exception_if_vulnerable_to_symlink_attack(self, mocker: MockerFixture) -> None:
mock_rmtree = mocker.patch("app.models.base.rmtree", autospec=True)
mock_rmtree.avoids_symlink_attacks = False
mock_cache_dir = mocker.Mock()
mock_cache_dir.exists.return_value = True
mock_cache_dir.is_dir.return_value = True
mocker.patch("app.models.base.Path", return_value=mock_cache_dir)
def test_clear_cache_raises_exception_if_vulnerable_to_symlink_attack(
self, rmtree: mock.Mock, path: mock.Mock
) -> None:
rmtree.avoids_symlink_attacks = False
encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=mock_cache_dir)
encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=path)
with pytest.raises(RuntimeError):
encoder.clear_cache()
mock_rmtree.assert_not_called()
rmtree.assert_not_called()
def test_clear_cache_replaces_file_with_dir_if_path_is_file(self, mocker: MockerFixture) -> None:
mock_rmtree = mocker.patch("app.models.base.rmtree", autospec=True)
mock_rmtree.avoids_symlink_attacks = True
mock_cache_dir = mocker.Mock()
mock_cache_dir.exists.return_value = True
mock_cache_dir.is_dir.return_value = False
mocker.patch("app.models.base.Path", return_value=mock_cache_dir)
warning = mocker.spy(log, "warning")
def test_clear_cache_replaces_file_with_dir_if_path_is_file(
self, rmtree: mock.Mock, path: mock.Mock, warning: mock.Mock
) -> None:
path.return_value.is_dir.return_value = False
encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=mock_cache_dir)
encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=path)
encoder.clear_cache()
mock_rmtree.assert_not_called()
mock_cache_dir.unlink.assert_called_once()
mock_cache_dir.mkdir.assert_called_once()
rmtree.assert_not_called()
path.return_value.unlink.assert_called_once()
path.return_value.mkdir.assert_called_once()
warning.assert_called_once()
def test_make_session_return_ann_if_available(self, mocker: MockerFixture) -> None:
mock_model_path = mocker.Mock()
mock_model_path.is_file.return_value = True
mock_model_path.suffix = ".armnn"
mock_model_path.with_suffix.return_value = mock_model_path
mock_ann = mocker.patch("app.models.base.AnnSession")
encoder = OpenClipTextualEncoder("ViT-B-32__openai")
encoder._make_session(mock_model_path)
mock_ann.assert_called_once()
def test_make_session_return_ort_if_available_and_ann_is_not(self, mocker: MockerFixture) -> None:
mock_armnn_path = mocker.Mock()
mock_armnn_path.is_file.return_value = False
mock_armnn_path.suffix = ".armnn"
mock_onnx_path = mocker.Mock()
mock_onnx_path.is_file.return_value = True
mock_onnx_path.suffix = ".onnx"
mock_armnn_path.with_suffix.return_value = mock_onnx_path
mock_ann = mocker.patch("app.models.base.AnnSession")
mock_ort = mocker.patch("app.models.base.ort.InferenceSession")
encoder = OpenClipTextualEncoder("ViT-B-32__openai")
encoder._make_session(mock_armnn_path)
mock_ort.assert_called_once()
mock_ann.assert_not_called()
def test_make_session_raises_exception_if_path_does_not_exist(self, mocker: MockerFixture) -> None:
mock_model_path = mocker.Mock()
mock_model_path.is_file.return_value = False
mock_model_path.suffix = ".onnx"
mock_model_path.with_suffix.return_value = mock_model_path
mock_ann = mocker.patch("app.models.base.AnnSession")
mock_ort = mocker.patch("app.models.base.ort.InferenceSession")
encoder = OpenClipTextualEncoder("ViT-B-32__openai")
with pytest.raises(ValueError):
encoder._make_session(mock_model_path)
mock_ann.assert_not_called()
mock_ort.assert_not_called()
def test_download(self, mocker: MockerFixture) -> None:
mock_snapshot_download = mocker.patch("app.models.base.snapshot_download")
def test_download(self, snapshot_download: mock.Mock) -> None:
encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir="/path/to/cache")
encoder.download()
mock_snapshot_download.assert_called_once_with(
snapshot_download.assert_called_once_with(
"immich-app/ViT-B-32__openai",
cache_dir=encoder.cache_dir,
local_dir=encoder.cache_dir,
@ -307,13 +128,11 @@ class TestBase:
ignore_patterns=["*.armnn"],
)
def test_download_downloads_armnn_if_preferred_format(self, mocker: MockerFixture) -> None:
mock_snapshot_download = mocker.patch("app.models.base.snapshot_download")
def test_download_downloads_armnn_if_preferred_format(self, snapshot_download: mock.Mock) -> None:
encoder = OpenClipTextualEncoder("ViT-B-32__openai", preferred_format=ModelFormat.ARMNN)
encoder.download()
mock_snapshot_download.assert_called_once_with(
snapshot_download.assert_called_once_with(
"immich-app/ViT-B-32__openai",
cache_dir=encoder.cache_dir,
local_dir=encoder.cache_dir,
@ -322,6 +141,167 @@ class TestBase:
)
@pytest.mark.usefixtures("ort_session")
class TestOrtSession:
CPU_EP = ["CPUExecutionProvider"]
CUDA_EP = ["CUDAExecutionProvider", "CPUExecutionProvider"]
OV_EP = ["OpenVINOExecutionProvider", "CPUExecutionProvider"]
CUDA_EP_OUT_OF_ORDER = ["CPUExecutionProvider", "CUDAExecutionProvider"]
TRT_EP = ["TensorrtExecutionProvider", "CUDAExecutionProvider", "CPUExecutionProvider"]
@pytest.mark.providers(CPU_EP)
def test_sets_cpu_provider(self, providers: list[str]) -> None:
session = OrtSession("ViT-B-32__openai")
assert session.providers == self.CPU_EP
@pytest.mark.providers(CUDA_EP)
def test_sets_cuda_provider_if_available(self, providers: list[str]) -> None:
session = OrtSession("ViT-B-32__openai")
assert session.providers == self.CUDA_EP
@pytest.mark.ov_device_ids(["GPU.0", "CPU"])
@pytest.mark.providers(OV_EP)
def test_sets_openvino_provider_if_available(self, providers: list[str], ov_device_ids: list[str]) -> None:
session = OrtSession("ViT-B-32__openai")
assert session.providers == self.OV_EP
@pytest.mark.ov_device_ids(["CPU"])
@pytest.mark.providers(OV_EP)
def test_avoids_openvino_if_gpu_not_available(self, providers: list[str], ov_device_ids: list[str]) -> None:
session = OrtSession("ViT-B-32__openai")
assert session.providers == self.CPU_EP
@pytest.mark.providers(CUDA_EP_OUT_OF_ORDER)
def test_sets_providers_in_correct_order(self, providers: list[str]) -> None:
session = OrtSession("ViT-B-32__openai")
assert session.providers == self.CUDA_EP
@pytest.mark.providers(TRT_EP)
def test_ignores_unsupported_providers(self, providers: list[str]) -> None:
session = OrtSession("ViT-B-32__openai")
assert session.providers == self.CUDA_EP
def test_sets_provider_kwarg(self) -> None:
providers = ["CUDAExecutionProvider"]
session = OrtSession("ViT-B-32__openai", providers=providers)
assert session.providers == providers
@pytest.mark.ov_device_ids(["GPU.0", "CPU"])
def test_sets_default_provider_options(self, ov_device_ids: list[str]) -> None:
model_path = "/cache/ViT-B-32__openai/model.onnx"
session = OrtSession(model_path, providers=["OpenVINOExecutionProvider", "CPUExecutionProvider"])
assert session.provider_options == [
{"device_type": "GPU_FP32", "cache_dir": "/cache/ViT-B-32__openai/openvino"},
{"arena_extend_strategy": "kSameAsRequested"},
]
def test_sets_provider_options_kwarg(self) -> None:
session = OrtSession(
"ViT-B-32__openai",
providers=["OpenVINOExecutionProvider", "CPUExecutionProvider"],
provider_options=[],
)
assert session.provider_options == []
def test_sets_default_sess_options(self) -> None:
session = OrtSession("ViT-B-32__openai")
assert session.sess_options.execution_mode == ort.ExecutionMode.ORT_SEQUENTIAL
assert session.sess_options.inter_op_num_threads == 1
assert session.sess_options.intra_op_num_threads == 2
assert session.sess_options.enable_cpu_mem_arena is False
def test_sets_default_sess_options_does_not_set_threads_if_non_cpu_and_default_threads(self) -> None:
session = OrtSession("ViT-B-32__openai", providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
assert session.sess_options.inter_op_num_threads == 0
assert session.sess_options.intra_op_num_threads == 0
def test_sets_default_sess_options_sets_threads_if_non_cpu_and_set_threads(self, mocker: MockerFixture) -> None:
mock_settings = mocker.patch("app.sessions.ort.settings", autospec=True)
mock_settings.model_inter_op_threads = 2
mock_settings.model_intra_op_threads = 4
session = OrtSession("ViT-B-32__openai", providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
assert session.sess_options.inter_op_num_threads == 2
assert session.sess_options.intra_op_num_threads == 4
def test_sets_sess_options_kwarg(self) -> None:
sess_options = ort.SessionOptions()
session = OrtSession(
"ViT-B-32__openai",
providers=["OpenVINOExecutionProvider", "CPUExecutionProvider"],
provider_options=[],
sess_options=sess_options,
)
assert sess_options is session.sess_options
class TestAnnSession:
def test_creates_ann_session(self, ann_session: mock.Mock, info: mock.Mock) -> None:
model_path = mock.MagicMock(spec=Path)
cache_dir = mock.MagicMock(spec=Path)
AnnSession(model_path, cache_dir)
ann_session.assert_called_once_with(tuning_level=3, tuning_file=(cache_dir / "gpu-tuning.ann").as_posix())
ann_session.return_value.load.assert_called_once_with(
model_path.as_posix(), cached_network_path=model_path.with_suffix(".anncache").as_posix()
)
info.assert_has_calls(
[
mock.call("Loading ANN model %s ...", model_path),
mock.call("Loaded ANN model with ID %d", ann_session.return_value.load.return_value),
]
)
def test_get_inputs(self, ann_session: mock.Mock) -> None:
ann_session.return_value.load.return_value = 123
ann_session.return_value.input_shapes = {123: [(1, 3, 224, 224)]}
session = AnnSession(Path("ViT-B-32__openai"))
inputs = session.get_inputs()
assert len(inputs) == 1
assert inputs[0].name is None
assert inputs[0].shape == (1, 3, 224, 224)
def test_get_outputs(self, ann_session: mock.Mock) -> None:
ann_session.return_value.load.return_value = 123
ann_session.return_value.output_shapes = {123: [(1, 3, 224, 224)]}
session = AnnSession(Path("ViT-B-32__openai"))
outputs = session.get_outputs()
assert len(outputs) == 1
assert outputs[0].name is None
assert outputs[0].shape == (1, 3, 224, 224)
def test_run(self, ann_session: mock.Mock, mocker: MockerFixture) -> None:
ann_session.return_value.load.return_value = 123
np_spy = mocker.spy(np, "ascontiguousarray")
session = AnnSession(Path("ViT-B-32__openai"))
[input1, input2] = [np.random.rand(1, 3, 224, 224).astype(np.float32) for _ in range(2)]
input_feed = {"input.1": input1, "input.2": input2}
session.run(None, input_feed)
ann_session.return_value.execute.assert_called_once_with(123, [input1, input2])
np_spy.call_count == 2
np_spy.assert_has_calls([mock.call(input1), mock.call(input2)])
class TestCLIP:
embedding = np.random.rand(512).astype(np.float32)
cache_dir = Path("test_cache")
@ -487,6 +467,59 @@ class TestFaceRecognition:
assert isinstance(call_args[0][0], np.ndarray)
assert call_args[0][0].shape == (112, 112, 3)
def test_recognition_adds_batch_axis_for_ort(self, ort_session: mock.Mock, mocker: MockerFixture) -> None:
onnx = mocker.patch("app.models.facial_recognition.recognition.onnx", autospec=True)
update_dims = mocker.patch(
"app.models.facial_recognition.recognition.update_inputs_outputs_dims", autospec=True
)
mocker.patch("app.models.base.InferenceModel.download")
mocker.patch("app.models.facial_recognition.recognition.ArcFaceONNX")
ort_session.return_value.get_inputs.return_value = [SimpleNamespace(name="input.1", shape=(1, 3, 224, 224))]
ort_session.return_value.get_outputs.return_value = [SimpleNamespace(name="output.1", shape=(1, 800))]
proto = mock.Mock()
input_dims = mock.Mock()
input_dims.name = "input.1"
input_dims.type.tensor_type.shape.dim = [SimpleNamespace(dim_value=size) for size in [1, 3, 224, 224]]
proto.graph.input = [input_dims]
output_dims = mock.Mock()
output_dims.name = "output.1"
output_dims.type.tensor_type.shape.dim = [SimpleNamespace(dim_value=size) for size in [1, 800]]
proto.graph.output = [output_dims]
onnx.load.return_value = proto
face_recognizer = FaceRecognizer("buffalo_s")
face_recognizer.load()
assert face_recognizer.batch is True
update_dims.assert_called_once_with(proto, {"input.1": ["batch", 3, 224, 224]}, {"output.1": ["batch", 800]})
onnx.save.assert_called_once_with(update_dims.return_value, face_recognizer.model_path)
def test_recognition_does_not_add_batch_axis_if_exists(self, ort_session: mock.Mock, mocker: MockerFixture) -> None:
onnx = mocker.patch("app.models.facial_recognition.recognition.onnx", autospec=True)
update_dims = mocker.patch(
"app.models.facial_recognition.recognition.update_inputs_outputs_dims", autospec=True
)
mocker.patch("app.models.base.InferenceModel.download")
mocker.patch("app.models.facial_recognition.recognition.ArcFaceONNX")
inputs = [SimpleNamespace(name="input.1", shape=("batch", 3, 224, 224))]
outputs = [SimpleNamespace(name="output.1", shape=("batch", 800))]
ort_session.return_value.get_inputs.return_value = inputs
ort_session.return_value.get_outputs.return_value = outputs
face_recognizer = FaceRecognizer("buffalo_s")
face_recognizer.load()
assert face_recognizer.batch is True
update_dims.assert_not_called()
onnx.load.assert_not_called()
onnx.save.assert_not_called()
@pytest.mark.asyncio
class TestCache: