From 4fc05c28f426d6073e6e15db334b0c88ff925f1d Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt Date: Sun, 21 Sep 2025 13:12:31 +0200 Subject: [PATCH] avfilter/x86/vf_gradfun: Remove MMXEXT func overridden by SSSE3 SSSE3 is already quite old (introduced 2006 for Intel, 2011 for AMD), so that the overwhelming majority of our users (particularly those that actually update their FFmpeg) will be using the SSSE3 version of filter_line. This commit therefore removes the overridden MMXEXT version (which didn't abide by the ABI) which allows us to remove an emms_c() from vf_gradfun.c, so that users with SSSE3 no longer pay a price for the mere existence of an MMXEXT version. Reviewed-by: Lynne Signed-off-by: Andreas Rheinhardt --- libavfilter/vf_gradfun.c | 2 -- libavfilter/x86/vf_gradfun.asm | 42 ++++++++----------------------- libavfilter/x86/vf_gradfun_init.c | 22 ---------------- 3 files changed, 10 insertions(+), 56 deletions(-) diff --git a/libavfilter/vf_gradfun.c b/libavfilter/vf_gradfun.c index 088b3c9143..4f211c3ddf 100644 --- a/libavfilter/vf_gradfun.c +++ b/libavfilter/vf_gradfun.c @@ -32,7 +32,6 @@ * Dither it back to 8bit. */ -#include "libavutil/emms.h" #include "libavutil/imgutils.h" #include "libavutil/common.h" #include "libavutil/mem.h" @@ -119,7 +118,6 @@ static void filter(GradFunContext *ctx, uint8_t *dst, const uint8_t *src, int wi ctx->filter_line(dst + y * dst_linesize, src + y * src_linesize, dc - r / 2, width, thresh, dither[y & 7]); if (++y >= height) break; } - emms_c(); } static av_cold int init(AVFilterContext *ctx) diff --git a/libavfilter/x86/vf_gradfun.asm b/libavfilter/x86/vf_gradfun.asm index d106d52100..55e7c1ea0f 100644 --- a/libavfilter/x86/vf_gradfun.asm +++ b/libavfilter/x86/vf_gradfun.asm @@ -27,7 +27,15 @@ pw_ff: times 8 dw 0xFF SECTION .text -%macro FILTER_LINE 1 +INIT_XMM ssse3 +cglobal gradfun_filter_line, 6, 6, 8 + movd m5, r4d + pxor m7, m7 + pshuflw m5, m5, 0 + mova m6, [pw_7f] + punpcklqdq m5, m5 + mova m4, [r5] +.loop: movh m0, [r2+r0] movh m1, [r3+r0] punpcklbw m0, m7 @@ -40,42 +48,12 @@ SECTION .text pminsw m2, m7 pmullw m2, m2 psllw m1, 2 - paddw m0, %1 + paddw m0, m4 pmulhw m1, m2 paddw m0, m1 psraw m0, 7 packuswb m0, m0 movh [r1+r0], m0 -%endmacro - -INIT_MMX mmxext -cglobal gradfun_filter_line, 6, 6 - movh m5, r4d - pxor m7, m7 - pshufw m5, m5,0 - mova m6, [pw_7f] - mova m3, [r5] - mova m4, [r5+8] -.loop: - FILTER_LINE m3 - add r0, 4 - jge .end - FILTER_LINE m4 - add r0, 4 - jl .loop -.end: - RET - -INIT_XMM ssse3 -cglobal gradfun_filter_line, 6, 6, 8 - movd m5, r4d - pxor m7, m7 - pshuflw m5, m5, 0 - mova m6, [pw_7f] - punpcklqdq m5, m5 - mova m4, [r5] -.loop: - FILTER_LINE m4 add r0, 8 jl .loop RET diff --git a/libavfilter/x86/vf_gradfun_init.c b/libavfilter/x86/vf_gradfun_init.c index 56e6774a79..f262f0a1bb 100644 --- a/libavfilter/x86/vf_gradfun_init.c +++ b/libavfilter/x86/vf_gradfun_init.c @@ -24,9 +24,6 @@ #include "libavutil/x86/cpu.h" #include "libavfilter/gradfun.h" -void ff_gradfun_filter_line_mmxext(intptr_t x, uint8_t *dst, const uint8_t *src, - const uint16_t *dc, int thresh, - const uint16_t *dithers); void ff_gradfun_filter_line_ssse3(intptr_t x, uint8_t *dst, const uint8_t *src, const uint16_t *dc, int thresh, const uint16_t *dithers); @@ -39,23 +36,6 @@ void ff_gradfun_blur_line_movdqu_sse2(intptr_t x, uint16_t *buf, const uint8_t *src1, const uint8_t *src2); #if HAVE_X86ASM -static void gradfun_filter_line_mmxext(uint8_t *dst, const uint8_t *src, - const uint16_t *dc, - int width, int thresh, - const uint16_t *dithers) -{ - intptr_t x; - if (width & 3) { - x = width & ~3; - ff_gradfun_filter_line_c(dst + x, src + x, dc + x / 2, - width - x, thresh, dithers); - width = x; - } - x = -width; - ff_gradfun_filter_line_mmxext(x, dst + width, src + width, dc + width / 2, - thresh, dithers); -} - static void gradfun_filter_line_ssse3(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers) @@ -93,8 +73,6 @@ av_cold void ff_gradfun_init_x86(GradFunContext *gf) #if HAVE_X86ASM int cpu_flags = av_get_cpu_flags(); - if (EXTERNAL_MMXEXT(cpu_flags)) - gf->filter_line = gradfun_filter_line_mmxext; if (EXTERNAL_SSSE3(cpu_flags)) gf->filter_line = gradfun_filter_line_ssse3;