diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c index 30851e266e..b17ee7e02d 100644 --- a/libavcodec/x86/h264_qpel.c +++ b/libavcodec/x86/h264_qpel.c @@ -32,20 +32,18 @@ #if HAVE_X86ASM void ff_avg_pixels4_mmxext(uint8_t *dst, const uint8_t *src, ptrdiff_t stride); -void ff_put_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, - ptrdiff_t stride); -void ff_avg_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, - ptrdiff_t stride); -#define ff_put_pixels4_l2_mmxext(dst, src1, src2, dststride, src1stride, h) \ - ff_put_pixels4_l2_mmxext((dst), (src1), (src2), (dststride)) -#define ff_avg_pixels4_l2_mmxext(dst, src1, src2, dststride, src1stride, h) \ - ff_avg_pixels4_l2_mmxext((dst), (src1), (src2), (dststride)) -#define ff_put_pixels8_l2_sse2 ff_put_pixels8_l2_mmxext -#define ff_avg_pixels8_l2_sse2(dst, src1, src2, dststride, src1stride, h) \ - ff_avg_pixels8_l2_mmxext((dst), (src1), (src2), (dststride), (src1stride)) -#define ff_put_pixels16_l2_sse2 ff_put_pixels16_l2_mmxext -#define ff_avg_pixels16_l2_sse2(dst, src1, src2, dststride, src1stride, h) \ - ff_avg_pixels16_l2_mmxext((dst), (src1), (src2), (dststride), (src1stride)) +void ff_put_pixels4x4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, + ptrdiff_t stride); +void ff_avg_pixels4x4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, + ptrdiff_t stride); +#define ff_put_pixels4x4_l2_mmxext(dst, src1, src2, dststride, src1stride) \ + ff_put_pixels4x4_l2_mmxext((dst), (src1), (src2), (dststride)) +#define ff_avg_pixels4x4_l2_mmxext(dst, src1, src2, dststride, src1stride) \ + ff_avg_pixels4x4_l2_mmxext((dst), (src1), (src2), (dststride)) +#define ff_put_pixels8x8_l2_sse2 ff_put_pixels8x8_l2_mmxext +#define ff_avg_pixels8x8_l2_sse2 ff_avg_pixels8x8_l2_mmxext +#define ff_put_pixels16x16_l2_sse2 ff_put_pixels16x16_l2_mmxext +#define ff_avg_pixels16x16_l2_sse2 ff_avg_pixels16x16_l2_mmxext #define DEF_QPEL(OPNAME)\ void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride);\ @@ -177,7 +175,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, const uin {\ LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\ ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ - ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\ + ff_ ## OPNAME ## pixels ## SIZE ## x ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride);\ }\ \ static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ @@ -189,7 +187,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uin {\ LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\ ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ - ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\ + ff_ ## OPNAME ## pixels ## SIZE ## x ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride);\ }\ #define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN, SHIFT5_EXT) \ diff --git a/libavcodec/x86/h264_qpel_8bit.asm b/libavcodec/x86/h264_qpel_8bit.asm index 6e082819ac..3aa1f233a0 100644 --- a/libavcodec/x86/h264_qpel_8bit.asm +++ b/libavcodec/x86/h264_qpel_8bit.asm @@ -69,11 +69,11 @@ cglobal avg_pixels4, 3,4 mova %2, %1 %endmacro -; void ff_put/avg_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, -; ptrdiff_t stride) +; void ff_put/avg_pixels4x4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, +; ptrdiff_t stride) %macro PIXELS4_L2 1 %define OP op_%1h -cglobal %1_pixels4_l2, 4,4 +cglobal %1_pixels4x4_l2, 4,4 mova m0, [r1] mova m1, [r1+r3] lea r1, [r1+2*r3] diff --git a/libavcodec/x86/qpel.asm b/libavcodec/x86/qpel.asm index 93f0d007c3..be8bc4f579 100644 --- a/libavcodec/x86/qpel.asm +++ b/libavcodec/x86/qpel.asm @@ -37,11 +37,9 @@ SECTION .text %macro PIXELS8_L2 1 %define OP op_%1 %ifidn %1, put -; void ff_put_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, -; ptrdiff_t dstStride, ptrdiff_t src1Stride, int h) -cglobal put_pixels8_l2, 6,6 - test r5d, 1 - je .loop +; void ff_put_pixels8x9_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, +; ptrdiff_t dstStride, ptrdiff_t src1Stride) +cglobal put_pixels8x9_l2, 5,6 mova m0, [r1] mova m1, [r2] add r1, r4 @@ -49,13 +47,14 @@ cglobal put_pixels8_l2, 6,6 pavgb m0, m1 OP m0, [r0] add r0, r3 - dec r5d -%else -; void ff_avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, -; ptrdiff_t dstStride, ptrdiff_t src1Stride) -cglobal avg_pixels8_l2, 5,6 - mov r5d, 8 + ; FIXME: avoid jump if prologue is empty + jmp %1_pixels8x8_after_prologue_ %+ cpuname %endif +; void ff_avg/put_pixels8x8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, +; ptrdiff_t dstStride, ptrdiff_t src1Stride) +cglobal %1_pixels8x8_l2, 5,6 +%1_pixels8x8_after_prologue_ %+ cpuname: + mov r5d, 8 .loop: mova m0, [r1] mova m1, [r1+r4] @@ -86,11 +85,9 @@ PIXELS8_L2 avg %macro PIXELS16_L2 1 %define OP op_%1 %ifidn %1, put -; void ff_put_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, -; ptrdiff_t dstStride, ptrdiff_t src1Stride, int h) -cglobal put_pixels16_l2, 6,6 - test r5d, 1 - je .loop +; void ff_put_pixels16x17_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, +; ptrdiff_t dstStride, ptrdiff_t src1Stride) +cglobal put_pixels16x17_l2, 5,6 mova m0, [r1] mova m1, [r1+8] pavgb m0, [r2] @@ -100,13 +97,14 @@ cglobal put_pixels16_l2, 6,6 OP m0, [r0] OP m1, [r0+8] add r0, r3 - dec r5d -%else -; void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, -; ptrdiff_t dstStride, ptrdiff_t src1Stride) -cglobal avg_pixels16_l2, 5,6 - mov r5d, 16 + ; FIXME: avoid jump if prologue is empty + jmp %1_pixels16x16_after_prologue_ %+ cpuname %endif +; void ff_avg/put_pixels16x16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, +; ptrdiff_t dstStride, ptrdiff_t src1Stride) +cglobal %1_pixels16x16_l2, 5,6 +%1_pixels16x16_after_prologue_ %+ cpuname: + mov r5d, 16 .loop: mova m0, [r1] mova m1, [r1+8] diff --git a/libavcodec/x86/qpel.h b/libavcodec/x86/qpel.h index b30d5e23dc..c4b6ee0413 100644 --- a/libavcodec/x86/qpel.h +++ b/libavcodec/x86/qpel.h @@ -22,17 +22,17 @@ #include #include -void ff_put_pixels8_l2_mmxext(uint8_t *dst, - const uint8_t *src1, const uint8_t *src2, - ptrdiff_t dstStride, ptrdiff_t src1Stride, int h); -void ff_avg_pixels8_l2_mmxext(uint8_t *dst, - const uint8_t *src1, const uint8_t *src2, - ptrdiff_t dstStride, ptrdiff_t src1Stride); -void ff_put_pixels16_l2_mmxext(uint8_t *dst, - const uint8_t *src1, const uint8_t *src2, - ptrdiff_t dstStride, ptrdiff_t src1Stride, int h); -void ff_avg_pixels16_l2_mmxext(uint8_t *dst, - const uint8_t *src1, const uint8_t *src2, - ptrdiff_t dstStride, ptrdiff_t src1Stride); +void ff_put_pixels8x8_l2_mmxext(uint8_t *dst, + const uint8_t *src1, const uint8_t *src2, + ptrdiff_t dstStride, ptrdiff_t src1Stride); +void ff_avg_pixels8x8_l2_mmxext(uint8_t *dst, + const uint8_t *src1, const uint8_t *src2, + ptrdiff_t dstStride, ptrdiff_t src1Stride); +void ff_put_pixels16x16_l2_mmxext(uint8_t *dst, + const uint8_t *src1, const uint8_t *src2, + ptrdiff_t dstStride, ptrdiff_t src1Stride); +void ff_avg_pixels16x16_l2_mmxext(uint8_t *dst, + const uint8_t *src1, const uint8_t *src2, + ptrdiff_t dstStride, ptrdiff_t src1Stride); #endif /* AVCODEC_X86_QPEL_H */ diff --git a/libavcodec/x86/qpeldsp.asm b/libavcodec/x86/qpeldsp.asm index 7fa7dbd2dc..52ddd8a8b2 100644 --- a/libavcodec/x86/qpeldsp.asm +++ b/libavcodec/x86/qpeldsp.asm @@ -32,13 +32,11 @@ cextern pw_20 SECTION .text -; void ff_put_no_rnd_pixels8_l2(uint8_t *dst, uint8_t *src1, uint8_t *src2, -; ptrdiff_t dstStride, ptrdiff_t src1Stride, int h) %macro PUT_NO_RND_PIXELS8_L2 0 -cglobal put_no_rnd_pixels8_l2, 6,6 +; void ff_put_no_rnd_pixels8x9_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, +; ptrdiff_t dstStride, ptrdiff_t src1Stride) +cglobal put_no_rnd_pixels8x9_l2, 5,6 pcmpeqb m6, m6 - test r5d, 1 - je .loop mova m0, [r1] mova m1, [r2] add r1, r4 @@ -49,7 +47,14 @@ cglobal put_no_rnd_pixels8_l2, 6,6 pxor m0, m6 mova [r0], m0 add r0, r3 - dec r5d + jmp put_no_rnd_pixels8x8_after_prologue_ %+ cpuname + +; void ff_put_no_rnd_pixels8x8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, +; ptrdiff_t dstStride, ptrdiff_t src1Stride) +cglobal put_no_rnd_pixels8x8_l2, 5,6 + pcmpeqb m6, m6 +put_no_rnd_pixels8x8_after_prologue_ %+ cpuname: + mov r5d, 8 .loop: mova m0, [r1] add r1, r4 @@ -97,13 +102,11 @@ INIT_MMX mmxext PUT_NO_RND_PIXELS8_L2 -; void ff_put_no_rnd_pixels16_l2(uint8_t *dst, uint8_t *src1, uint8_t *src2, -; ptrdiff_t dstStride, ptrdiff_t src1Stride, int h) +; void ff_put_no_rnd_pixels16x17_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, +; ptrdiff_t dstStride, ptrdiff_t src1Stride) %macro PUT_NO_RND_PIXELS16_l2 0 -cglobal put_no_rnd_pixels16_l2, 6,6 +cglobal put_no_rnd_pixels16x17_l2, 5,6 pcmpeqb m6, m6 - test r5d, 1 - je .loop mova m0, [r1] mova m1, [r1+8] mova m2, [r2] @@ -121,7 +124,14 @@ cglobal put_no_rnd_pixels16_l2, 6,6 mova [r0], m0 mova [r0+8], m1 add r0, r3 - dec r5d + jmp put_no_rnd_pixels16x16_after_prologue_ %+ cpuname + +; void ff_put_no_rnd_pixels16x16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, +; ptrdiff_t dstStride, ptrdiff_t src1Stride) +cglobal put_no_rnd_pixels16x16_l2, 5,6 + pcmpeqb m6, m6 +put_no_rnd_pixels16x16_after_prologue_ %+ cpuname: + mov r5d, 16 .loop: mova m0, [r1] mova m1, [r1+8] diff --git a/libavcodec/x86/qpeldsp_init.c b/libavcodec/x86/qpeldsp_init.c index f88c804a48..cab2ac433a 100644 --- a/libavcodec/x86/qpeldsp_init.c +++ b/libavcodec/x86/qpeldsp_init.c @@ -31,12 +31,24 @@ #include "fpel.h" #include "qpel.h" -void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst, - const uint8_t *src1, const uint8_t *src2, - ptrdiff_t dstStride, ptrdiff_t src1Stride, int h); -void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst, - const uint8_t *src1, const uint8_t *src2, - ptrdiff_t dstStride, ptrdiff_t src1Stride, int h); +void ff_put_pixels8x9_l2_mmxext(uint8_t *dst, + const uint8_t *src1, const uint8_t *src2, + ptrdiff_t dstStride, ptrdiff_t src1Stride); +void ff_put_pixels16x17_l2_mmxext(uint8_t *dst, + const uint8_t *src1, const uint8_t *src2, + ptrdiff_t dstStride, ptrdiff_t src1Stride); +void ff_put_no_rnd_pixels8x8_l2_mmxext(uint8_t *dst, + const uint8_t *src1, const uint8_t *src2, + ptrdiff_t dstStride, ptrdiff_t src1Stride); +void ff_put_no_rnd_pixels8x9_l2_mmxext(uint8_t *dst, + const uint8_t *src1, const uint8_t *src2, + ptrdiff_t dstStride, ptrdiff_t src1Stride); +void ff_put_no_rnd_pixels16x16_l2_mmxext(uint8_t *dst, + const uint8_t *src1, const uint8_t *src2, + ptrdiff_t dstStride, ptrdiff_t src1Stride); +void ff_put_no_rnd_pixels16x17_l2_mmxext(uint8_t *dst, + const uint8_t *src1, const uint8_t *src2, + ptrdiff_t dstStride, ptrdiff_t src1Stride); void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h); void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, @@ -70,7 +82,7 @@ void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, #if HAVE_X86ASM -#define QPEL_OP(OPNAME, RND, MMX, ARG) \ +#define QPEL_OP(OPNAME, RND, MMX) \ static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, \ const uint8_t *src, \ ptrdiff_t stride) \ @@ -79,8 +91,8 @@ static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, \ uint8_t *const half = (uint8_t *) temp; \ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \ stride, 8); \ - ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, src, half, \ - stride, stride, 8)); \ + ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, src, half, \ + stride, stride); \ } \ \ static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, \ @@ -99,8 +111,8 @@ static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, \ uint8_t *const half = (uint8_t *) temp; \ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \ stride, 8); \ - ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, src + 1, half, \ - stride, stride, 8)); \ + ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, src + 1, half, \ + stride, stride); \ } \ \ static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, \ @@ -111,8 +123,8 @@ static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, \ uint8_t *const half = (uint8_t *) temp; \ ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \ 8, stride); \ - ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, src, half, \ - stride, stride, 8)); \ + ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, src, half, \ + stride, stride); \ } \ \ static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, \ @@ -131,8 +143,8 @@ static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, \ uint8_t *const half = (uint8_t *) temp; \ ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \ 8, stride); \ - ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, src + stride, half, \ - stride, stride, 8)); \ + ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, src + stride, half, \ + stride, stride); \ } \ \ static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, \ @@ -144,11 +156,11 @@ static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, \ uint8_t *const halfHV = (uint8_t *) half; \ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ stride, 9); \ - ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \ - stride, 9); \ + ff_put ## RND ## pixels8x9_l2_ ## MMX(halfH, src, halfH, 8, \ + stride); \ ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ - ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, halfH, halfHV, \ - stride, 8, 8)); \ + ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, halfH, halfHV, \ + stride, 8); \ } \ \ static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, \ @@ -160,11 +172,11 @@ static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, \ uint8_t *const halfHV = (uint8_t *) half; \ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ stride, 9); \ - ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \ - stride, 9); \ + ff_put ## RND ## pixels8x9_l2_ ## MMX(halfH, src + 1, halfH, 8, \ + stride); \ ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ - ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, halfH, halfHV, \ - stride, 8, 8)); \ + ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, halfH, halfHV, \ + stride, 8); \ } \ \ static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, \ @@ -176,11 +188,11 @@ static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, \ uint8_t *const halfHV = (uint8_t *) half; \ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ stride, 9); \ - ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \ - stride, 9); \ + ff_put ## RND ## pixels8x9_l2_ ## MMX(halfH, src, halfH, 8, \ + stride); \ ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ - ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, halfH + 8, halfHV, \ - stride, 8, 8)); \ + ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, halfH + 8, halfHV, \ + stride, 8); \ } \ \ static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, \ @@ -192,11 +204,11 @@ static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, \ uint8_t *const halfHV = (uint8_t *) half; \ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ stride, 9); \ - ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \ - stride, 9); \ + ff_put ## RND ## pixels8x9_l2_ ## MMX(halfH, src + 1, halfH, 8, \ + stride); \ ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ - ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, halfH + 8, halfHV, \ - stride, 8, 8)); \ + ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, halfH + 8, halfHV, \ + stride, 8); \ } \ \ static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, \ @@ -209,8 +221,8 @@ static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, \ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ stride, 9); \ ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ - ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, halfH, halfHV, \ - stride, 8, 8)); \ + ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, halfH, halfHV, \ + stride, 8); \ } \ \ static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, \ @@ -223,8 +235,8 @@ static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, \ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ stride, 9); \ ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ - ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, halfH + 8, halfHV, \ - stride, 8, 8)); \ + ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, halfH + 8, halfHV, \ + stride, 8); \ } \ \ static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, \ @@ -235,8 +247,8 @@ static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, \ uint8_t *const halfH = (uint8_t *) half; \ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ stride, 9); \ - ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, \ - 8, stride, 9); \ + ff_put ## RND ## pixels8x9_l2_ ## MMX(halfH, src, halfH, \ + 8, stride); \ ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \ stride, 8); \ } \ @@ -249,8 +261,8 @@ static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, \ uint8_t *const halfH = (uint8_t *) half; \ ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \ stride, 9); \ - ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \ - stride, 9); \ + ff_put ## RND ## pixels8x9_l2_ ## MMX(halfH, src + 1, halfH, 8, \ + stride); \ ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \ stride, 8); \ } \ @@ -275,8 +287,8 @@ static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, \ uint8_t *const half = (uint8_t *) temp; \ ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \ stride, 16); \ - ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, src, half, \ - stride, stride, 16)); \ + ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, src, half, \ + stride, stride); \ } \ \ static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, \ @@ -295,8 +307,8 @@ static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, \ uint8_t *const half = (uint8_t*) temp; \ ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \ stride, 16); \ - ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, src + 1, half, \ - stride, stride, 16)); \ + ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, src + 1, half, \ + stride, stride); \ } \ \ static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, \ @@ -307,8 +319,8 @@ static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, \ uint8_t *const half = (uint8_t *) temp; \ ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \ stride); \ - ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, src, half, \ - stride, stride, 16)); \ + ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, src, half, \ + stride, stride); \ } \ \ static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, \ @@ -327,8 +339,8 @@ static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, \ uint8_t *const half = (uint8_t *) temp; \ ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \ stride); \ - ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, src+stride, half, \ - stride, stride, 16)); \ + ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, src+stride, half, \ + stride, stride); \ } \ \ static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, \ @@ -340,12 +352,12 @@ static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, \ uint8_t *const halfHV = (uint8_t *) half; \ ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ stride, 17); \ - ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \ - stride, 17); \ + ff_put ## RND ## pixels16x17_l2_ ## MMX(halfH, src, halfH, 16, \ + stride); \ ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 16, 16); \ - ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, halfH, halfHV, \ - stride, 16, 16)); \ + ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, halfH, halfHV, \ + stride, 16); \ } \ \ static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, \ @@ -357,12 +369,12 @@ static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, \ uint8_t *const halfHV = (uint8_t *) half; \ ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ stride, 17); \ - ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \ - stride, 17); \ + ff_put ## RND ## pixels16x17_l2_ ## MMX(halfH, src + 1, halfH, 16, \ + stride); \ ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 16, 16); \ - ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, halfH, halfHV, \ - stride, 16, 16)); \ + ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, halfH, halfHV, \ + stride, 16); \ } \ \ static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, \ @@ -374,12 +386,12 @@ static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, \ uint8_t *const halfHV = (uint8_t *) half; \ ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ stride, 17); \ - ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \ - stride, 17); \ + ff_put ## RND ## pixels16x17_l2_ ## MMX(halfH, src, halfH, 16, \ + stride); \ ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 16, 16); \ - ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, halfH + 16, halfHV, \ - stride, 16, 16)); \ + ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, halfH + 16, halfHV, \ + stride, 16); \ } \ \ static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, \ @@ -391,12 +403,12 @@ static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, \ uint8_t *const halfHV = (uint8_t *) half; \ ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ stride, 17); \ - ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \ - stride, 17); \ + ff_put ## RND ## pixels16x17_l2_ ## MMX(halfH, src + 1, halfH, 16, \ + stride); \ ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 16, 16); \ - ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, halfH + 16, halfHV, \ - stride, 16, 16)); \ + ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, halfH + 16, halfHV, \ + stride, 16); \ } \ \ static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, \ @@ -410,8 +422,8 @@ static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, \ stride, 17); \ ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 16, 16); \ - ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, halfH, halfHV, \ - stride, 16, 16)); \ + ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, halfH, halfHV, \ + stride, 16); \ } \ \ static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, \ @@ -425,8 +437,8 @@ static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, \ stride, 17); \ ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \ 16, 16); \ - ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, halfH + 16, halfHV, \ - stride, 16, 16)); \ + ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, halfH + 16, halfHV, \ + stride, 16); \ } \ \ static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, \ @@ -437,8 +449,8 @@ static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, \ uint8_t *const halfH = (uint8_t *) half; \ ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ stride, 17); \ - ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \ - stride, 17); \ + ff_put ## RND ## pixels16x17_l2_ ## MMX(halfH, src, halfH, 16, \ + stride); \ ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \ stride, 16); \ } \ @@ -451,8 +463,8 @@ static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, \ uint8_t *const halfH = (uint8_t *) half; \ ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \ stride, 17); \ - ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \ - stride, 17); \ + ff_put ## RND ## pixels16x17_l2_ ## MMX(halfH, src + 1, halfH, 16, \ + stride); \ ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \ stride, 16); \ } \ @@ -469,13 +481,9 @@ static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, \ stride, 16); \ } -#define PASSTHROUGH(...) __VA_ARGS__ -#define STRIP_HEIGHT(dst, src1, src2, dststride, srcstride, height) \ - (dst), (src1), (src2), (dststride), (srcstride) - -QPEL_OP(put_, _, mmxext, PASSTHROUGH) -QPEL_OP(avg_, _, mmxext, STRIP_HEIGHT) -QPEL_OP(put_no_rnd_, _no_rnd_, mmxext, PASSTHROUGH) +QPEL_OP(put_, _, mmxext) +QPEL_OP(avg_, _, mmxext) +QPEL_OP(put_no_rnd_, _no_rnd_, mmxext) #endif /* HAVE_X86ASM */