diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c index d2a20c3d6c..43e68d2d97 100644 --- a/libavcodec/x86/h264_qpel.c +++ b/libavcodec/x86/h264_qpel.c @@ -31,21 +31,27 @@ #if HAVE_X86ASM void ff_put_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, - ptrdiff_t dstStride, ptrdiff_t src1Stride, int h); + ptrdiff_t stride); void ff_avg_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, - ptrdiff_t dstStride, ptrdiff_t src1Stride, int h); + ptrdiff_t stride); void ff_put_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src1Stride, int h); void ff_avg_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, - ptrdiff_t dstStride, ptrdiff_t src1Stride, int h); + ptrdiff_t dstStride, ptrdiff_t src1Stride); void ff_put_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src1Stride, int h); void ff_avg_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, - ptrdiff_t dstStride, ptrdiff_t src1Stride, int h); + ptrdiff_t dstStride, ptrdiff_t src1Stride); +#define ff_put_pixels4_l2_mmxext(dst, src1, src2, dststride, src1stride, h) \ + ff_put_pixels4_l2_mmxext((dst), (src1), (src2), (dststride)) +#define ff_avg_pixels4_l2_mmxext(dst, src1, src2, dststride, src1stride, h) \ + ff_avg_pixels4_l2_mmxext((dst), (src1), (src2), (dststride)) #define ff_put_pixels8_l2_sse2 ff_put_pixels8_l2_mmxext -#define ff_avg_pixels8_l2_sse2 ff_avg_pixels8_l2_mmxext +#define ff_avg_pixels8_l2_sse2(dst, src1, src2, dststride, src1stride, h) \ + ff_avg_pixels8_l2_mmxext((dst), (src1), (src2), (dststride), (src1stride)) #define ff_put_pixels16_l2_sse2 ff_put_pixels16_l2_mmxext -#define ff_avg_pixels16_l2_sse2 ff_avg_pixels16_l2_mmxext +#define ff_avg_pixels16_l2_sse2(dst, src1, src2, dststride, src1stride, h) \ + ff_avg_pixels16_l2_mmxext((dst), (src1), (src2), (dststride), (src1stride)) #define ff_put_pixels4_mmxext(...) #define DEF_QPEL(OPNAME)\ diff --git a/libavcodec/x86/qpel.asm b/libavcodec/x86/qpel.asm index 16da2dbc3b..043f7b0a66 100644 --- a/libavcodec/x86/qpel.asm +++ b/libavcodec/x86/qpel.asm @@ -45,20 +45,20 @@ SECTION .text %endmacro ; void ff_put/avg_pixels4_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, -; ptrdiff_t dstStride, ptrdiff_t src1Stride, int h) +; ptrdiff_t stride) %macro PIXELS4_L2 1 %define OP op_%1h -cglobal %1_pixels4_l2, 6,6 +cglobal %1_pixels4_l2, 4,4 mova m0, [r1] - mova m1, [r1+r4] - lea r1, [r1+2*r4] + mova m1, [r1+r3] + lea r1, [r1+2*r3] pavgb m0, [r2] pavgb m1, [r2+4] OP m0, [r0], m3 OP m1, [r0+r3], m3 lea r0, [r0+2*r3] mova m0, [r1] - mova m1, [r1+r4] + mova m1, [r1+r3] pavgb m0, [r2+8] pavgb m1, [r2+12] OP m0, [r0], m3 @@ -70,12 +70,12 @@ INIT_MMX mmxext PIXELS4_L2 put PIXELS4_L2 avg -; void ff_put/avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, -; ptrdiff_t dstStride, ptrdiff_t src1Stride, int h) %macro PIXELS8_L2 1 %define OP op_%1 -cglobal %1_pixels8_l2, 6,6 %ifidn %1, put +; void ff_put_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, +; ptrdiff_t dstStride, ptrdiff_t src1Stride, int h) +cglobal put_pixels8_l2, 6,6 test r5d, 1 je .loop mova m0, [r1] @@ -86,6 +86,11 @@ cglobal %1_pixels8_l2, 6,6 OP m0, [r0] add r0, r3 dec r5d +%else +; void ff_avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, +; ptrdiff_t dstStride, ptrdiff_t src1Stride) +cglobal avg_pixels8_l2, 5,6 + mov r5d, 8 %endif .loop: mova m0, [r1] @@ -114,12 +119,12 @@ INIT_MMX mmxext PIXELS8_L2 put PIXELS8_L2 avg -; void ff_put/avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, -; ptrdiff_t dstStride, ptrdiff_t src1Stride, int h) %macro PIXELS16_L2 1 %define OP op_%1 -cglobal %1_pixels16_l2, 6,6 %ifidn %1, put +; void ff_put_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, +; ptrdiff_t dstStride, ptrdiff_t src1Stride, int h) +cglobal put_pixels16_l2, 6,6 test r5d, 1 je .loop mova m0, [r1] @@ -132,6 +137,11 @@ cglobal %1_pixels16_l2, 6,6 OP m1, [r0+8] add r0, r3 dec r5d +%else +; void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, +; ptrdiff_t dstStride, ptrdiff_t src1Stride) +cglobal avg_pixels16_l2, 5,6 + mov r5d, 16 %endif .loop: mova m0, [r1] diff --git a/libavcodec/x86/qpeldsp_init.c b/libavcodec/x86/qpeldsp_init.c index 33e1643669..0bb39402d4 100644 --- a/libavcodec/x86/qpeldsp_init.c +++ b/libavcodec/x86/qpeldsp_init.c @@ -39,13 +39,13 @@ void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst, ptrdiff_t dstStride, ptrdiff_t src1Stride, int h); void ff_avg_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, - ptrdiff_t dstStride, ptrdiff_t src1Stride, int h); + ptrdiff_t dstStride, ptrdiff_t src1Stride); void ff_put_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src1Stride, int h); void ff_avg_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, - ptrdiff_t dstStride, ptrdiff_t src1Stride, int h); + ptrdiff_t dstStride, ptrdiff_t src1Stride); void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src1Stride, int h); @@ -82,7 +82,7 @@ void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, #if HAVE_X86ASM -#define QPEL_OP(OPNAME, RND, MMX) \ +#define QPEL_OP(OPNAME, RND, MMX, ARG) \ static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, \ const uint8_t *src, \ ptrdiff_t stride) \ @@ -91,8 +91,8 @@ static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, \ uint8_t *const half = (uint8_t *) temp; \ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \ stride, 8); \ - ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \ - stride, stride, 8); \ + ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, src, half, \ + stride, stride, 8)); \ } \ \ static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, \ @@ -111,8 +111,8 @@ static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, \ uint8_t *const half = (uint8_t *) temp; \ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \ stride, 8); \ - ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride, \ - stride, 8); \ + ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, src + 1, half, \ + stride, stride, 8)); \ } \ \ static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, \ @@ -123,8 +123,8 @@ static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, \ uint8_t *const half = (uint8_t *) temp; \ ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \ 8, stride); \ - ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \ - stride, stride, 8); \ + ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, src, half, \ + stride, stride, 8)); \ } \ \ static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, \ @@ -143,8 +143,8 @@ static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, \ uint8_t *const half = (uint8_t *) temp; \ ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \ 8, stride); \ - ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\ - stride, 8); \ + ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, src + stride, half, \ + stride, stride, 8)); \ } \ \ static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, \ @@ -159,8 +159,8 @@ static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, \ ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \ stride, 9); \ ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ - ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \ - stride, 8, 8); \ + ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, halfH, halfHV, \ + stride, 8, 8)); \ } \ \ static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, \ @@ -175,8 +175,8 @@ static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, \ ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \ stride, 9); \ ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ - ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \ - stride, 8, 8); \ + ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, halfH, halfHV, \ + stride, 8, 8)); \ } \ \ static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, \ @@ -191,8 +191,8 @@ static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, \ ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \ stride, 9); \ ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ - ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \ - stride, 8, 8); \ + ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, halfH + 8, halfHV, \ + stride, 8, 8)); \ } \ \ static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, \ @@ -207,8 +207,8 @@ static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, \ ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \ stride, 9); \ ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ - ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \ - stride, 8, 8); \ + ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, halfH + 8, halfHV, \ + stride, 8, 8)); \ } \ \ static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, \ @@ -221,8 +221,8 @@ static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, \ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ stride, 9); \ ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ - ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \ - stride, 8, 8); \ + ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, halfH, halfHV, \ + stride, 8, 8)); \ } \ \ static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, \ @@ -235,8 +235,8 @@ static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, \ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ stride, 9); \ ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ - ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \ - stride, 8, 8); \ + ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, halfH + 8, halfHV, \ + stride, 8, 8)); \ } \ \ static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, \ @@ -287,8 +287,8 @@ static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, \ uint8_t *const half = (uint8_t *) temp; \ ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \ stride, 16); \ - ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \ - stride, 16); \ + ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, src, half, \ + stride, stride, 16)); \ } \ \ static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, \ @@ -307,8 +307,8 @@ static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, \ uint8_t *const half = (uint8_t*) temp; \ ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \ stride, 16); \ - ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half, \ - stride, stride, 16); \ + ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, src + 1, half, \ + stride, stride, 16)); \ } \ \ static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, \ @@ -319,8 +319,8 @@ static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, \ uint8_t *const half = (uint8_t *) temp; \ ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \ stride); \ - ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \ - stride, 16); \ + ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, src, half, \ + stride, stride, 16)); \ } \ \ static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, \ @@ -339,8 +339,8 @@ static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, \ uint8_t *const half = (uint8_t *) temp; \ ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \ stride); \ - ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, \ - stride, stride, 16); \ + ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, src+stride, half, \ + stride, stride, 16)); \ } \ \ static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, \ @@ -356,8 +356,8 @@ static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, \ stride, 17); \ ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 16, 16); \ - ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \ - stride, 16, 16); \ + ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, halfH, halfHV, \ + stride, 16, 16)); \ } \ \ static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, \ @@ -373,8 +373,8 @@ static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, \ stride, 17); \ ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 16, 16); \ - ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \ - stride, 16, 16); \ + ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, halfH, halfHV, \ + stride, 16, 16)); \ } \ \ static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, \ @@ -390,8 +390,8 @@ static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, \ stride, 17); \ ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 16, 16); \ - ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \ - stride, 16, 16); \ + ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, halfH + 16, halfHV, \ + stride, 16, 16)); \ } \ \ static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, \ @@ -407,8 +407,8 @@ static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, \ stride, 17); \ ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 16, 16); \ - ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \ - stride, 16, 16); \ + ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, halfH + 16, halfHV, \ + stride, 16, 16)); \ } \ \ static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, \ @@ -422,8 +422,8 @@ static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, \ stride, 17); \ ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 16, 16); \ - ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \ - stride, 16, 16); \ + ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, halfH, halfHV, \ + stride, 16, 16)); \ } \ \ static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, \ @@ -437,8 +437,8 @@ static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, \ stride, 17); \ ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 16, 16); \ - ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \ - stride, 16, 16); \ + ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, halfH + 16, halfHV, \ + stride, 16, 16)); \ } \ \ static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, \ @@ -481,9 +481,13 @@ static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, \ stride, 16); \ } -QPEL_OP(put_, _, mmxext) -QPEL_OP(avg_, _, mmxext) -QPEL_OP(put_no_rnd_, _no_rnd_, mmxext) +#define PASSTHROUGH(...) __VA_ARGS__ +#define STRIP_HEIGHT(dst, src1, src2, dststride, srcstride, height) \ + (dst), (src1), (src2), (dststride), (srcstride) + +QPEL_OP(put_, _, mmxext, PASSTHROUGH) +QPEL_OP(avg_, _, mmxext, STRIP_HEIGHT) +QPEL_OP(put_no_rnd_, _no_rnd_, mmxext, PASSTHROUGH) #define MC00(OPNAME, SIZE, EXT) \ static void OPNAME ## _qpel ## SIZE ## _mc00_ ## EXT(uint8_t *dst, \