feat(ml): export clip models to ONNX and host models on Hugging Face (#4700)

* export clip models * export to hf refactored export code * export mclip, general refactoring cleanup * updated conda deps * do transforms with pillow and numpy, add tokenization config to export, general refactoring * moved conda dockerfile, re-added poetry * minor fixes * updated link * updated tests * removed `requirements.txt` from workflow * fixed mimalloc path * removed torchvision * cleaner np typing * review suggestions * update default model name * update test
2025-12-06 01:23:16 +02:00 · 2023-10-31 06:02:04 -04:00
parent 3212a47720
commit 87a0ba3db3
29 changed files with 6192 additions and 2043 deletions
--- a/machine-learning/export/models/optimize.py
+++ b/machine-learning/export/models/optimize.py
@@ -0,0 +1,38 @@
+from pathlib import Path
+
+import onnx
+import onnxruntime as ort
+import onnxsim
+
+
+def optimize_onnxsim(model_path: Path | str, output_path: Path | str) -> None:
+    model_path = Path(model_path)
+    output_path = Path(output_path)
+    model = onnx.load(model_path.as_posix())
+    model, check = onnxsim.simplify(model, skip_shape_inference=True)
+    assert check, "Simplified ONNX model could not be validated"
+    onnx.save(model, output_path.as_posix())
+
+
+def optimize_ort(
+    model_path: Path | str,
+    output_path: Path | str,
+    level: ort.GraphOptimizationLevel = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC,
+) -> None:
+    model_path = Path(model_path)
+    output_path = Path(output_path)
+
+    sess_options = ort.SessionOptions()
+    sess_options.graph_optimization_level = level
+    sess_options.optimized_model_filepath = output_path.as_posix()
+
+    ort.InferenceSession(model_path.as_posix(), providers=["CPUExecutionProvider"], sess_options=sess_options)
+
+
+def optimize(model_path: Path | str) -> None:
+    model_path = Path(model_path)
+
+    optimize_ort(model_path, model_path)
+    # onnxsim serializes large models as a blob, which uses much more memory when loading the model at runtime
+    if not any(file.name.startswith("Constant") for file in model_path.parent.iterdir()):
+        optimize_onnxsim(model_path, model_path)