1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-08-04 22:03:09 +02:00

avfilter/x86/scene_sad: add AVX512 implementation

Trivial to add, but a lot faster (on my machine).

scene_sad8_c:                                       114476.4 ( 1.00x)
scene_sad8_sse2:                                      8644.3 (13.24x)
scene_sad8_avx2:                                      4520.1 (25.33x)
scene_sad8_avx512:                                    3153.0 (36.31x)
This commit is contained in:
Niklas Haas
2025-07-11 13:52:37 +02:00
parent 9251af058a
commit 91f2d146d4
2 changed files with 14 additions and 0 deletions

View File

@ -72,3 +72,10 @@ INIT_YMM avx2
SAD_FRAMES SAD_FRAMES
%endif %endif
%if HAVE_AVX512_EXTERNAL
INIT_ZMM avx512
SAD_FRAMES
%endif

View File

@ -41,6 +41,9 @@ SCENE_SAD_FUNC(scene_sad_sse2, ff_scene_sad_sse2, 16)
#if HAVE_AVX2_EXTERNAL #if HAVE_AVX2_EXTERNAL
SCENE_SAD_FUNC(scene_sad_avx2, ff_scene_sad_avx2, 32) SCENE_SAD_FUNC(scene_sad_avx2, ff_scene_sad_avx2, 32)
#endif #endif
#if HAVE_AVX512_EXTERNAL
SCENE_SAD_FUNC(scene_sad_avx512, ff_scene_sad_avx512, 64)
#endif
#endif #endif
ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth) ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth)
@ -48,6 +51,10 @@ ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth)
#if HAVE_X86ASM #if HAVE_X86ASM
int cpu_flags = av_get_cpu_flags(); int cpu_flags = av_get_cpu_flags();
if (depth <= 8) { if (depth <= 8) {
#if HAVE_AVX512_EXTERNAL
if (EXTERNAL_AVX512(cpu_flags))
return scene_sad_avx512;
#endif
#if HAVE_AVX2_EXTERNAL #if HAVE_AVX2_EXTERNAL
if (EXTERNAL_AVX2_FAST(cpu_flags)) if (EXTERNAL_AVX2_FAST(cpu_flags))
return scene_sad_avx2; return scene_sad_avx2;