You've already forked immich
mirror of
https://github.com/immich-app/immich.git
synced 2025-06-16 03:40:33 +02:00
feat(ml): ARMNN acceleration (#5667)
* feat(ml): ARMNN acceleration for CLIP * wrap ANN as ONNX-Session * strict typing * normalize ARMNN CLIP embedding * mutex to handle concurrent execution * make inputs contiguous * fine-grained locking; concurrent network execution --------- Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
29747437f6
commit
753292956e
157
machine-learning/ann/export/run.py
Normal file
157
machine-learning/ann/export/run.py
Normal file
@ -0,0 +1,157 @@
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
from abc import abstractmethod
|
||||
|
||||
import onnx
|
||||
import open_clip
|
||||
import torch
|
||||
from onnx2torch import convert
|
||||
from onnxruntime.tools.onnx_model_utils import fix_output_shapes, make_input_shape_fixed
|
||||
from tinynn.converter import TFLiteConverter
|
||||
|
||||
|
||||
class ExportBase(torch.nn.Module):
|
||||
input_shape: tuple[int, ...]
|
||||
|
||||
def __init__(self, device: torch.device, name: str):
|
||||
super().__init__()
|
||||
self.device = device
|
||||
self.name = name
|
||||
self.optimize = 5
|
||||
self.nchw_transpose = False
|
||||
|
||||
@abstractmethod
|
||||
def forward(self, input_tensor: torch.Tensor) -> torch.Tensor | tuple[torch.Tensor]:
|
||||
pass
|
||||
|
||||
def dummy_input(self) -> torch.FloatTensor:
|
||||
return torch.rand((1, 3, 224, 224), device=self.device)
|
||||
|
||||
|
||||
class ArcFace(ExportBase):
|
||||
input_shape = (1, 3, 112, 112)
|
||||
|
||||
def __init__(self, onnx_model_path: str, device: torch.device):
|
||||
name, _ = os.path.splitext(os.path.basename(onnx_model_path))
|
||||
super().__init__(device, name)
|
||||
onnx_model = onnx.load_model(onnx_model_path)
|
||||
make_input_shape_fixed(onnx_model.graph, onnx_model.graph.input[0].name, self.input_shape)
|
||||
fix_output_shapes(onnx_model)
|
||||
self.model = convert(onnx_model).to(device)
|
||||
if self.device.type == "cuda":
|
||||
self.model = self.model.half()
|
||||
|
||||
def forward(self, input_tensor: torch.Tensor) -> torch.FloatTensor:
|
||||
embedding: torch.FloatTensor = self.model(
|
||||
input_tensor.half() if self.device.type == "cuda" else input_tensor
|
||||
).float()
|
||||
assert isinstance(embedding, torch.FloatTensor)
|
||||
return embedding
|
||||
|
||||
def dummy_input(self) -> torch.FloatTensor:
|
||||
return torch.rand(self.input_shape, device=self.device)
|
||||
|
||||
|
||||
class RetinaFace(ExportBase):
|
||||
input_shape = (1, 3, 640, 640)
|
||||
|
||||
def __init__(self, onnx_model_path: str, device: torch.device):
|
||||
name, _ = os.path.splitext(os.path.basename(onnx_model_path))
|
||||
super().__init__(device, name)
|
||||
self.optimize = 3
|
||||
self.model = convert(onnx_model_path).eval().to(device)
|
||||
if self.device.type == "cuda":
|
||||
self.model = self.model.half()
|
||||
|
||||
def forward(self, input_tensor: torch.Tensor) -> tuple[torch.FloatTensor]:
|
||||
out: torch.Tensor = self.model(input_tensor.half() if self.device.type == "cuda" else input_tensor)
|
||||
return tuple(o.float() for o in out)
|
||||
|
||||
def dummy_input(self) -> torch.FloatTensor:
|
||||
return torch.rand(self.input_shape, device=self.device)
|
||||
|
||||
|
||||
class ClipVision(ExportBase):
|
||||
input_shape = (1, 3, 224, 224)
|
||||
|
||||
def __init__(self, model_name: str, weights: str, device: torch.device):
|
||||
super().__init__(device, model_name + "__" + weights)
|
||||
self.model = open_clip.create_model(
|
||||
model_name,
|
||||
weights,
|
||||
precision="fp16" if device.type == "cuda" else "fp32",
|
||||
jit=False,
|
||||
require_pretrained=True,
|
||||
device=device,
|
||||
)
|
||||
|
||||
def forward(self, input_tensor: torch.Tensor) -> torch.FloatTensor:
|
||||
embedding: torch.Tensor = self.model.encode_image(
|
||||
input_tensor.half() if self.device.type == "cuda" else input_tensor,
|
||||
normalize=True,
|
||||
).float()
|
||||
return embedding
|
||||
|
||||
|
||||
def export(model: ExportBase) -> None:
|
||||
model.eval()
|
||||
for param in model.parameters():
|
||||
param.requires_grad = False
|
||||
dummy_input = model.dummy_input()
|
||||
model(dummy_input)
|
||||
jit = torch.jit.trace(model, dummy_input) # type: ignore[no-untyped-call,attr-defined]
|
||||
tflite_model_path = f"output/{model.name}.tflite"
|
||||
os.makedirs("output", exist_ok=True)
|
||||
|
||||
converter = TFLiteConverter(
|
||||
jit,
|
||||
dummy_input,
|
||||
tflite_model_path,
|
||||
optimize=model.optimize,
|
||||
nchw_transpose=model.nchw_transpose,
|
||||
)
|
||||
# segfaults on ARM, must run on x86_64 / AMD64
|
||||
converter.convert()
|
||||
|
||||
armnn_model_path = f"output/{model.name}.armnn"
|
||||
os.environ["LD_LIBRARY_PATH"] = "armnn"
|
||||
subprocess.run(
|
||||
[
|
||||
"./armnnconverter",
|
||||
"-f",
|
||||
"tflite-binary",
|
||||
"-m",
|
||||
tflite_model_path,
|
||||
"-i",
|
||||
"input_tensor",
|
||||
"-o",
|
||||
"output_tensor",
|
||||
"-p",
|
||||
armnn_model_path,
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if platform.machine() not in ("x86_64", "AMD64"):
|
||||
raise RuntimeError(f"Can only run on x86_64 / AMD64, not {platform.machine()}")
|
||||
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
if device.type != "cuda":
|
||||
logging.warning(
|
||||
"No CUDA available, cannot create fp16 model! proceeding to create a fp32 model (use only for testing)"
|
||||
)
|
||||
models = [
|
||||
ClipVision("ViT-B-32", "openai", device),
|
||||
ArcFace("buffalo_l_rec.onnx", device),
|
||||
RetinaFace("buffalo_l_det.onnx", device),
|
||||
]
|
||||
for model in models:
|
||||
export(model)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
with torch.no_grad():
|
||||
main()
|
Reference in New Issue
Block a user