From c7161befb4ae7d0f40e35676f52507e7de1c8b01 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt Date: Sun, 21 Sep 2025 15:12:49 +0200 Subject: [PATCH] avcodec/x86/h264_qpel: Remove MMX(EXT) funcs overridden by SSSE3 SSSE3 is already quite old (introduced 2006 for Intel, 2011 for AMD), so that the overwhelming majority of our users (particularly those that actually update their FFmpeg) will be using the SSSE3 versions. This commit therefore removes the MMX(EXT) functions overridden by them (which don't abide by the ABI) to get closer to a removal of emms_c. Reviewed-by: Lynne Signed-off-by: Andreas Rheinhardt --- libavcodec/x86/h264_qpel.c | 34 ++---------------- libavcodec/x86/h264_qpel_8bit.asm | 60 ------------------------------- 2 files changed, 3 insertions(+), 91 deletions(-) diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c index 69ffd001e0..18d80a52f6 100644 --- a/libavcodec/x86/h264_qpel.c +++ b/libavcodec/x86/h264_qpel.c @@ -46,12 +46,10 @@ void ff_avg_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t #define ff_avg_pixels8_l2_sse2 ff_avg_pixels8_l2_mmxext #define ff_put_pixels16_l2_sse2 ff_put_pixels16_l2_mmxext #define ff_avg_pixels16_l2_sse2 ff_avg_pixels16_l2_mmxext -#define ff_put_pixels8_mmxext(...) #define ff_put_pixels4_mmxext(...) #define DEF_QPEL(OPNAME)\ void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\ -void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\ void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_ssse3(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\ void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_l2_mmxext(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\ void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_mmxext(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\ @@ -91,15 +89,6 @@ static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX }while(w--);\ }\ \ -static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ - ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ - ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ - src += 8*srcStride;\ - dst += 8*dstStride;\ - ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ - ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ -}\ -\ static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride){\ ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\ ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ @@ -196,10 +185,6 @@ static av_always_inline void ff_ ## OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uin #define ff_put_h264_qpel8or16_hv2_lowpass_sse2 ff_put_h264_qpel8or16_hv2_lowpass_mmxext #define ff_avg_h264_qpel8or16_hv2_lowpass_sse2 ff_avg_h264_qpel8or16_hv2_lowpass_mmxext -#define H264_MC_C_H(OPNAME, SIZE, MMX, ALIGN) \ -H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\ -H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\ - #define H264_MC_C_V_H_HV(OPNAME, SIZE, MMX, ALIGN) \ H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\ H264_MC_V(OPNAME, SIZE, MMX, ALIGN)\ @@ -356,8 +341,7 @@ QPEL_H264_HV_XMM(put_, PUT_OP, ssse3) QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3) H264_MC(H264_MC_C_V_H_HV, 4, mmxext, 8) -H264_MC(H264_MC_C_H, 8, mmxext, 8) -H264_MC(H264_MC_H, 16, mmxext, 8) +H264_MC_C(avg_, 8, mmxext, 8) H264_MC_816(H264_MC_V, sse2) H264_MC_816(H264_MC_HV, sse2) H264_MC_816(H264_MC_H, ssse3) @@ -421,20 +405,11 @@ LUMA_MC_816(10, mc33, sse2) #endif /* HAVE_X86ASM */ -#define SET_QPEL_FUNCS123(PFX, IDX, SIZE, CPU, PREFIX) \ +#define SET_QPEL_FUNCS_1PP(PFX, IDX, SIZE, CPU, PREFIX) \ do { \ c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \ c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \ c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \ - } while (0) -#define SET_QPEL_FUNCS0123(PFX, IDX, SIZE, CPU, PREFIX) \ - do { \ - c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \ - SET_QPEL_FUNCS123(PFX, IDX, SIZE, CPU, PREFIX); \ - } while (0) -#define SET_QPEL_FUNCS_1PP(PFX, IDX, SIZE, CPU, PREFIX) \ - do { \ - SET_QPEL_FUNCS123(PFX, IDX, SIZE, CPU, PREFIX); \ c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \ c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \ c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \ @@ -478,11 +453,8 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth) if (EXTERNAL_MMXEXT(cpu_flags)) { if (!high_bit_depth) { - SET_QPEL_FUNCS123 (put_h264_qpel, 0, 16, mmxext, ); - SET_QPEL_FUNCS123 (put_h264_qpel, 1, 8, mmxext, ); SET_QPEL_FUNCS_1PP(put_h264_qpel, 2, 4, mmxext, ); - SET_QPEL_FUNCS123 (avg_h264_qpel, 0, 16, mmxext, ); - SET_QPEL_FUNCS0123(avg_h264_qpel, 1, 8, mmxext, ); + c->avg_h264_qpel_pixels_tab[1][0] = avg_h264_qpel8_mc00_mmxext; SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, ); } else if (bit_depth == 10) { SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_); diff --git a/libavcodec/x86/h264_qpel_8bit.asm b/libavcodec/x86/h264_qpel_8bit.asm index 4e64329991..89e7c282b2 100644 --- a/libavcodec/x86/h264_qpel_8bit.asm +++ b/libavcodec/x86/h264_qpel_8bit.asm @@ -96,66 +96,6 @@ INIT_MMX mmxext QPEL4_H_LOWPASS_OP put QPEL4_H_LOWPASS_OP avg -%macro QPEL8_H_LOWPASS_OP 1 -cglobal %1_h264_qpel8_h_lowpass, 4,5 ; dst, src, dstStride, srcStride - movsxdifnidn r2, r2d - movsxdifnidn r3, r3d - mov r4d, 8 - pxor m7, m7 - mova m6, [pw_5] -.loop: - mova m0, [r1] - mova m2, [r1+1] - mova m1, m0 - mova m3, m2 - punpcklbw m0, m7 - punpckhbw m1, m7 - punpcklbw m2, m7 - punpckhbw m3, m7 - paddw m0, m2 - paddw m1, m3 - psllw m0, 2 - psllw m1, 2 - mova m2, [r1-1] - mova m4, [r1+2] - mova m3, m2 - mova m5, m4 - punpcklbw m2, m7 - punpckhbw m3, m7 - punpcklbw m4, m7 - punpckhbw m5, m7 - paddw m2, m4 - paddw m5, m3 - psubw m0, m2 - psubw m1, m5 - pmullw m0, m6 - pmullw m1, m6 - movd m2, [r1-2] - movd m5, [r1+7] - punpcklbw m2, m7 - punpcklbw m5, m7 - paddw m2, m3 - paddw m4, m5 - mova m5, [pw_16] - paddw m2, m5 - paddw m4, m5 - paddw m0, m2 - paddw m1, m4 - psraw m0, 5 - psraw m1, 5 - packuswb m0, m1 - op_%1 m0, [r0], m4 - add r0, r2 - add r1, r3 - dec r4d - jg .loop - RET -%endmacro - -INIT_MMX mmxext -QPEL8_H_LOWPASS_OP put -QPEL8_H_LOWPASS_OP avg - %macro QPEL8_H_LOWPASS_OP_XMM 1 cglobal %1_h264_qpel8_h_lowpass, 4,5,8 ; dst, src, dstStride, srcStride movsxdifnidn r2, r2d