1
0
mirror of https://github.com/immich-app/immich.git synced 2025-03-11 15:09:45 +02:00

add tests for country code and unknown language

This commit is contained in:
mertalev 2025-03-05 14:06:39 -05:00
parent 4207c15b6e
commit 7c718a973d
No known key found for this signature in database
GPG Key ID: 3A2B5BFC678DBC80

View File

@ -447,6 +447,48 @@ class TestCLIP:
mock_tokenizer.encode.assert_called_once_with("deu_Latntest search query")
def test_openclip_tokenizer_removes_country_code_from_language_for_nllb_if_not_found(
self,
mocker: MockerFixture,
clip_model_cfg: dict[str, Any],
clip_tokenizer_cfg: Callable[[Path], dict[str, Any]],
) -> None:
mocker.patch.object(OpenClipTextualEncoder, "download")
mocker.patch.object(OpenClipTextualEncoder, "model_cfg", clip_model_cfg)
mocker.patch.object(OpenClipTextualEncoder, "tokenizer_cfg", clip_tokenizer_cfg)
mocker.patch.object(InferenceModel, "_make_session", autospec=True).return_value
mock_tokenizer = mocker.patch("app.models.clip.textual.Tokenizer.from_file", autospec=True).return_value
mock_ids = [randint(0, 50000) for _ in range(77)]
mock_tokenizer.encode.return_value = SimpleNamespace(ids=mock_ids)
clip_encoder = OpenClipTextualEncoder("nllb-clip-base-siglip__mrl", cache_dir="test_cache")
clip_encoder._load()
clip_encoder.tokenize("test search query", language="de-CH")
mock_tokenizer.encode.assert_called_once_with("deu_Latntest search query")
def test_openclip_tokenizer_falls_back_to_english_for_nllb_if_language_code_not_found(
self,
mocker: MockerFixture,
clip_model_cfg: dict[str, Any],
clip_tokenizer_cfg: Callable[[Path], dict[str, Any]],
warning: mock.Mock,
) -> None:
mocker.patch.object(OpenClipTextualEncoder, "download")
mocker.patch.object(OpenClipTextualEncoder, "model_cfg", clip_model_cfg)
mocker.patch.object(OpenClipTextualEncoder, "tokenizer_cfg", clip_tokenizer_cfg)
mocker.patch.object(InferenceModel, "_make_session", autospec=True).return_value
mock_tokenizer = mocker.patch("app.models.clip.textual.Tokenizer.from_file", autospec=True).return_value
mock_ids = [randint(0, 50000) for _ in range(77)]
mock_tokenizer.encode.return_value = SimpleNamespace(ids=mock_ids)
clip_encoder = OpenClipTextualEncoder("nllb-clip-base-siglip__mrl", cache_dir="test_cache")
clip_encoder._load()
clip_encoder.tokenize("test search query", language="unknown")
mock_tokenizer.encode.assert_called_once_with("eng_Latntest search query")
warning.assert_called_once_with("Language 'unknown' not found, defaulting to 'en'")
def test_openclip_tokenizer_does_not_add_flores_token_for_non_nllb_model(
self,
mocker: MockerFixture,