From 8fad52bd57d5bcedce8dc4ae3166c1a50f895690 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt Date: Tue, 30 Sep 2025 00:32:27 +0200 Subject: [PATCH] avcodec/x86/h264_qpel: Use ptrdiff_t for strides Avoids having to sign-extend the strides in the assembly (it also is more correct given that the qpel_mc_func already uses ptrdiff_t). Reviewed-by: James Almer Signed-off-by: Andreas Rheinhardt --- libavcodec/x86/h264_qpel.c | 73 +++++++++++++++++-------------- libavcodec/x86/h264_qpel_8bit.asm | 27 ------------ 2 files changed, 41 insertions(+), 59 deletions(-) diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c index 6b9b4f7bc6..636be54530 100644 --- a/libavcodec/x86/h264_qpel.c +++ b/libavcodec/x86/h264_qpel.c @@ -19,14 +19,14 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include + #include "libavutil/attributes.h" #include "libavutil/avassert.h" #include "libavutil/cpu.h" #include "libavutil/mem_internal.h" -#include "libavutil/x86/asm.h" #include "libavutil/x86/cpu.h" #include "libavcodec/h264qpel.h" -#include "libavcodec/pixels.h" #include "fpel.h" #if HAVE_X86ASM @@ -55,29 +55,30 @@ void ff_avg_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t ff_avg_pixels16_l2_mmxext((dst), (src1), (src2), (dststride), (src1stride)) #define DEF_QPEL(OPNAME)\ -void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\ -void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_ssse3(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\ -void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_l2_mmxext(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\ -void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_sse2(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\ -void ff_ ## OPNAME ## _h264_qpel16_h_lowpass_l2_sse2(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\ -void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\ -void ff_ ## OPNAME ## _h264_qpel4_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\ -void ff_ ## OPNAME ## _h264_qpel8or16_v_lowpass_sse2(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h);\ -void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_v_mmxext(const uint8_t *src, int16_t *tmp, int srcStride);\ -void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_h_mmxext(int16_t *tmp, uint8_t *dst, int dstStride);\ -void ff_ ## OPNAME ## _h264_qpel8or16_hv1_lowpass_op_sse2(const uint8_t *src, int16_t *tmp, int srcStride, int size);\ -void ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_op_mmxext(uint8_t *dst, int16_t *tmp, int dstStride, int h);\ -void ff_ ## OPNAME ## _h264_qpel8_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, int dstStride);\ -void ff_ ## OPNAME ## _h264_qpel16_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, int dstStride);\ -void ff_ ## OPNAME ## _pixels4_l2_shift5_mmxext(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride);\ -void ff_ ## OPNAME ## _pixels8_l2_shift5_sse2(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride);\ -void ff_ ## OPNAME ## _pixels16_l2_shift5_sse2(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride);\ +void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride);\ +void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_ssse3(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride);\ +void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_l2_mmxext(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);\ +void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_sse2(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);\ +void ff_ ## OPNAME ## _h264_qpel16_h_lowpass_l2_sse2(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);\ +void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);\ +void ff_ ## OPNAME ## _h264_qpel4_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride);\ +void ff_ ## OPNAME ## _h264_qpel8or16_v_lowpass_sse2(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h);\ +void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_v_mmxext(const uint8_t *src, int16_t *tmp, ptrdiff_t srcStride);\ +void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_h_mmxext(int16_t *tmp, uint8_t *dst, ptrdiff_t dstStride);\ +void ff_ ## OPNAME ## _h264_qpel8or16_hv1_lowpass_op_sse2(const uint8_t *src, int16_t *tmp, ptrdiff_t srcStride, int size);\ +void ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_op_mmxext(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride, int h);\ +void ff_ ## OPNAME ## _h264_qpel8_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride);\ +void ff_ ## OPNAME ## _h264_qpel16_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride);\ +void ff_ ## OPNAME ## _pixels4_l2_shift5_mmxext(uint8_t *dst, const int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride);\ +void ff_ ## OPNAME ## _pixels8_l2_shift5_sse2(uint8_t *dst, const int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride);\ +void ff_ ## OPNAME ## _pixels16_l2_shift5_sse2(uint8_t *dst, const int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride);\ DEF_QPEL(avg) DEF_QPEL(put) #define QPEL_H264(OPNAME, OP, MMX)\ -static av_always_inline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int srcStride){\ +static av_always_inline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ +{\ int w=3;\ src -= 2*srcStride+2;\ while(w--){\ @@ -89,7 +90,8 @@ static av_always_inline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *ds ff_ ## OPNAME ## h264_qpel4_hv_lowpass_h_mmxext(tmp, dst, dstStride);\ }\ \ -static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int size){\ +static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride, int size)\ +{\ int w = size>>4;\ do{\ ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_op_mmxext(dst, tmp, dstStride, size);\ @@ -99,7 +101,8 @@ static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX }\ #define QPEL_H264_H16(OPNAME, EXT) \ -static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## EXT(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride){\ +static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## EXT(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)\ +{\ ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## EXT(dst , src , src2 , dstStride, src2Stride);\ ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## EXT(dst+8, src+8, src2+8, dstStride, src2Stride);\ src += 8*dstStride;\ @@ -113,8 +116,8 @@ static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## EXT(u #if ARCH_X86_64 #define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\ -void ff_avg_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride); -void ff_put_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride); +void ff_avg_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride); +void ff_put_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride); #else // ARCH_X86_64 #define QPEL_H264_H16_XMM(OPNAME, OP, EXT) QPEL_H264_H16(OPNAME, EXT) @@ -122,7 +125,8 @@ void ff_put_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, con #define QPEL_H264_H_XMM(OPNAME, OP, MMX)\ QPEL_H264_H16_XMM(OPNAME, OP, MMX)\ -static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ +static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ +{\ ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ src += 8*srcStride;\ @@ -132,17 +136,19 @@ static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint }\ #define QPEL_H264_V_XMM(OPNAME, OP, MMX)\ -static av_always_inline void ff_ ## OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ +static av_always_inline void ff_ ## OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ +{\ ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\ }\ -static av_always_inline void ff_ ## OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ +static av_always_inline void ff_ ## OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ +{\ ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\ ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ } static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp, const uint8_t *src, - int srcStride, + ptrdiff_t srcStride, int size) { int w = (size+8)>>3; @@ -155,20 +161,23 @@ static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp, } #define QPEL_H264_HV_XMM(OPNAME, OP, MMX)\ -static av_always_inline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int srcStride, int size){\ +static av_always_inline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int size)\ +{\ put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, srcStride, size);\ ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, size);\ }\ -static av_always_inline void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int srcStride){\ +static av_always_inline void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ +{\ OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, srcStride, 8);\ }\ -static av_always_inline void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int srcStride){\ +static av_always_inline void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\ +{\ OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, srcStride, 16);\ }\ #define SSSE3_HV2_LOWPASS_WRAPPER(OPNAME) \ static av_always_inline void \ -ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, int dstStride, int size) \ +ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride, int size) \ {\ if (size == 8)\ ff_ ## OPNAME ## _h264_qpel8_hv2_lowpass_ssse3(dst, tmp, dstStride);\ diff --git a/libavcodec/x86/h264_qpel_8bit.asm b/libavcodec/x86/h264_qpel_8bit.asm index 9ca78b0775..ede4f382e1 100644 --- a/libavcodec/x86/h264_qpel_8bit.asm +++ b/libavcodec/x86/h264_qpel_8bit.asm @@ -97,8 +97,6 @@ PIXELS4_L2 avg %macro QPEL4_H_LOWPASS_OP 1 cglobal %1_h264_qpel4_h_lowpass, 4,5 ; dst, src, dstStride, srcStride - movsxdifnidn r2, r2d - movsxdifnidn r3, r3d pxor m7, m7 mova m4, [pw_5] mova m5, [pw_16] @@ -140,8 +138,6 @@ QPEL4_H_LOWPASS_OP avg %macro QPEL8_H_LOWPASS_OP_XMM 1 cglobal %1_h264_qpel8_h_lowpass, 4,5,8 ; dst, src, dstStride, srcStride - movsxdifnidn r2, r2d - movsxdifnidn r3, r3d mov r4d, 8 pxor m7, m7 mova m6, [pw_5] @@ -184,8 +180,6 @@ QPEL8_H_LOWPASS_OP_XMM avg %macro QPEL4_H_LOWPASS_L2_OP 1 cglobal %1_h264_qpel4_h_lowpass_l2, 5,6 ; dst, src, src2, dstStride, srcStride - movsxdifnidn r3, r3d - movsxdifnidn r4, r4d pxor m7, m7 mova m4, [pw_5] mova m5, [pw_16] @@ -231,8 +225,6 @@ QPEL4_H_LOWPASS_L2_OP avg %macro QPEL8_H_LOWPASS_L2_OP 1 cglobal %1_h264_qpel8_h_lowpass_l2, 5,6,6 ; dst, src, src2, dstStride, srcStride - movsxdifnidn r3, r3d - movsxdifnidn r4, r4d mova m3, [pw_16] mov r5d, 8 pxor m5, m5 @@ -285,8 +277,6 @@ cglobal %1_h264_qpel16_h_lowpass_l2, 5,6,9 ; dst, src, src2, dstStride, srcStrid cglobal %1_h264_qpel16_h_lowpass_l2, 5,6,8 ; dst, src, src2, dstStride, srcStride %define PW_16 [pw_16] %endif - movsxdifnidn r3, r3d - movsxdifnidn r4, r4d mov r5d, 16 pxor m7, m7 mova m6, [pw_5] @@ -352,8 +342,6 @@ QPEL16_H_LOWPASS_L2 avg %macro QPEL8_H_LOWPASS_L2_OP_XMM 1 cglobal %1_h264_qpel8_h_lowpass_l2, 5,6,8 ; dst, src, src2, dstStride, src2Stride - movsxdifnidn r3, r3d - movsxdifnidn r4, r4d mov r5d, 8 pxor m7, m7 mova m6, [pw_5] @@ -421,8 +409,6 @@ QPEL8_H_LOWPASS_L2_OP_XMM avg %macro QPEL4_V_LOWPASS_OP 1 cglobal %1_h264_qpel4_v_lowpass, 4,4 ; dst, src, dstStride, srcStride - movsxdifnidn r2, r2d - movsxdifnidn r3, r3d sub r1, r3 sub r1, r3 pxor m7, m7 @@ -454,8 +440,6 @@ QPEL4_V_LOWPASS_OP avg %macro QPEL8OR16_V_LOWPASS_OP 1 cglobal %1_h264_qpel8or16_v_lowpass, 5,5,8 ; dst, src, dstStride, srcStride, h - movsxdifnidn r2, r2d - movsxdifnidn r3, r3d sub r1, r3 sub r1, r3 pxor m7, m7 @@ -520,7 +504,6 @@ QPEL8OR16_V_LOWPASS_OP avg %macro QPEL4_HV1_LOWPASS_OP 1 cglobal %1_h264_qpel4_hv_lowpass_v, 3,3 ; src, tmp, srcStride - movsxdifnidn r2, r2d pxor m7, m7 movh m0, [r0] movh m1, [r0+r2] @@ -542,7 +525,6 @@ cglobal %1_h264_qpel4_hv_lowpass_v, 3,3 ; src, tmp, srcStride RET cglobal %1_h264_qpel4_hv_lowpass_h, 3,4 ; tmp, dst, dstStride - movsxdifnidn r2, r2d mov r3d, 4 .loop: mova m0, [r0] @@ -573,7 +555,6 @@ QPEL4_HV1_LOWPASS_OP avg INIT_XMM sse2 cglobal put_h264_qpel8or16_hv1_lowpass_op, 4,4,8 ; src, tmp, srcStride, size - movsxdifnidn r2, r2d pxor m7, m7 movh m0, [r0] movh m1, [r0+r2] @@ -612,7 +593,6 @@ cglobal put_h264_qpel8or16_hv1_lowpass_op, 4,4,8 ; src, tmp, srcStride, size %macro QPEL8OR16_HV2_LOWPASS_OP 1 cglobal %1_h264_qpel8or16_hv2_lowpass_op, 4,4 ; dst, tmp, dstStride, h - movsxdifnidn r2, r2d .loop: mova m0, [r1] mova m3, [r1+8] @@ -659,7 +639,6 @@ cglobal %1_h264_qpel8_hv2_lowpass, 3,4,7 ; dst, tmp, dstStride %else cglobal %1_h264_qpel8_hv2_lowpass, 3,4,6 ; dst, tmp, dstStride %endif - movsxdifnidn r2, r2d mov r3d, 8 .loop: mova m1, [r1+16] @@ -692,7 +671,6 @@ cglobal %1_h264_qpel8_hv2_lowpass, 3,4,6 ; dst, tmp, dstStride RET cglobal %1_h264_qpel16_hv2_lowpass, 3,4,8 ; dst, tmp, dstStride - movsxdifnidn r2, r2d mov r3d, 16 .loop: mova m4, [r1+32] @@ -752,7 +730,6 @@ QPEL8OR16_HV2_LOWPASS_OP_XMM avg %macro PIXELS4_L2_SHIFT5 1 cglobal %1_pixels4_l2_shift5,4,4 ; dst, src16, src8, dstStride - movsxdifnidn r3, r3d mova m0, [r1] mova m1, [r1+24] psraw m0, 5 @@ -783,7 +760,6 @@ PIXELS4_L2_SHIFT5 avg %macro PIXELS8_L2_SHIFT5 1 cglobal %1_pixels8_l2_shift5, 5, 5, 3 ; dst, src16, src8, dstStride - movsxdifnidn r3, r3d mov r4d, 8 .loop: movu m0, [r1] @@ -805,7 +781,6 @@ cglobal %1_pixels8_l2_shift5, 5, 5, 3 ; dst, src16, src8, dstStride %macro PIXELS16_L2_SHIFT5 2 cglobal %1_pixels%2_l2_shift5, 5, 5, 4 ; dst, src16, src8, dstStride - movsxdifnidn r3, r3d mov r4d, %2 .loop: movu m0, [r1] @@ -840,8 +815,6 @@ PIXELS16_L2_SHIFT5 avg, 16 %if ARCH_X86_64 %macro QPEL16_H_LOWPASS_L2_OP 1 cglobal %1_h264_qpel16_h_lowpass_l2, 5, 6, 16 ; dst, src, src2, dstStride, src2Stride - movsxdifnidn r3, r3d - movsxdifnidn r4, r4d mov r5d, 16 pxor m15, m15 mova m14, [pw_5]