1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-08-04 22:03:09 +02:00

avfilter/x86/vf_colordetect: make the AVX512 functions run only on ICL targets or newer

For detect_range, the usage of vpbroadcast{b,w} requires the AVX512BW extension, and for
detect_alpha we don't want ZMM instructions downclocking old CPUs.

Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
James Almer
2025-07-21 17:25:15 -03:00
parent b9287fde10
commit c62813a057
2 changed files with 13 additions and 13 deletions

View File

@ -141,7 +141,7 @@ detect_alpha_fn w, d, full
detect_alpha_fn b, w, limited
detect_alpha_fn w, d, limited
INIT_ZMM avx512
INIT_ZMM avx512icl
detect_range_fn b
detect_range_fn w
detect_alpha_fn b, w, full

View File

@ -58,13 +58,13 @@ static int FUNC_NAME(const uint8_t *color, ptrdiff_t color_stride,
}
#if HAVE_X86ASM
#if HAVE_AVX512_EXTERNAL
DETECT_RANGE_FUNC(detect_range_avx512, ff_detect_rangeb_avx512, ff_detect_range_c, 0, 64)
DETECT_RANGE_FUNC(detect_range16_avx512, ff_detect_rangew_avx512, ff_detect_range16_c, 1, 64)
DETECT_ALPHA_FUNC(detect_alpha_full_avx512, ff_detect_alphab_full_avx512, ff_detect_alpha_full_c, 0, 64)
DETECT_ALPHA_FUNC(detect_alpha16_full_avx512, ff_detect_alphaw_full_avx512, ff_detect_alpha16_full_c, 1, 64)
DETECT_ALPHA_FUNC(detect_alpha_limited_avx512, ff_detect_alphab_limited_avx512, ff_detect_alpha_limited_c, 0, 64)
DETECT_ALPHA_FUNC(detect_alpha16_limited_avx512, ff_detect_alphaw_limited_avx512, ff_detect_alpha16_limited_c, 1, 64)
#if HAVE_AVX512ICL_EXTERNAL
DETECT_RANGE_FUNC(detect_range_avx512icl, ff_detect_rangeb_avx512icl, ff_detect_range_c, 0, 64)
DETECT_RANGE_FUNC(detect_range16_avx512icl, ff_detect_rangew_avx512icl, ff_detect_range16_c, 1, 64)
DETECT_ALPHA_FUNC(detect_alpha_full_avx512icl, ff_detect_alphab_full_avx512icl, ff_detect_alpha_full_c, 0, 64)
DETECT_ALPHA_FUNC(detect_alpha16_full_avx512icl, ff_detect_alphaw_full_avx512icl, ff_detect_alpha16_full_c, 1, 64)
DETECT_ALPHA_FUNC(detect_alpha_limited_avx512icl, ff_detect_alphab_limited_avx512icl, ff_detect_alpha_limited_c, 0, 64)
DETECT_ALPHA_FUNC(detect_alpha16_limited_avx512icl, ff_detect_alphaw_limited_avx512icl, ff_detect_alpha16_limited_c, 1, 64)
#endif
#if HAVE_AVX2_EXTERNAL
DETECT_RANGE_FUNC(detect_range_avx2, ff_detect_rangeb_avx2, ff_detect_range_c, 0, 32)
@ -91,13 +91,13 @@ av_cold void ff_color_detect_dsp_init_x86(FFColorDetectDSPContext *dsp, int dept
}
}
#endif
#if HAVE_AVX512_EXTERNAL
if (EXTERNAL_AVX512(cpu_flags)) {
dsp->detect_range = depth > 8 ? detect_range16_avx512 : detect_range_avx512;
#if HAVE_AVX512ICL_EXTERNAL
if (EXTERNAL_AVX512ICL(cpu_flags)) {
dsp->detect_range = depth > 8 ? detect_range16_avx512icl : detect_range_avx512icl;
if (color_range == AVCOL_RANGE_JPEG) {
dsp->detect_alpha = depth > 8 ? detect_alpha16_full_avx512 : detect_alpha_full_avx512;
dsp->detect_alpha = depth > 8 ? detect_alpha16_full_avx512icl : detect_alpha_full_avx512icl;
} else {
dsp->detect_alpha = depth > 8 ? detect_alpha16_limited_avx512 : detect_alpha_limited_avx512;
dsp->detect_alpha = depth > 8 ? detect_alpha16_limited_avx512icl : detect_alpha_limited_avx512icl;
}
}
#endif