diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c index 69ffd001e0..18d80a52f6 100644 --- a/libavcodec/x86/h264_qpel.c +++ b/libavcodec/x86/h264_qpel.c @@ -46,12 +46,10 @@ void ff_avg_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t #define ff_avg_pixels8_l2_sse2 ff_avg_pixels8_l2_mmxext #define ff_put_pixels16_l2_sse2 ff_put_pixels16_l2_mmxext #define ff_avg_pixels16_l2_sse2 ff_avg_pixels16_l2_mmxext -#define ff_put_pixels8_mmxext(...) #define ff_put_pixels4_mmxext(...) #define DEF_QPEL(OPNAME)\ void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\ -void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\ void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_ssse3(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\ void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_l2_mmxext(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\ void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_mmxext(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\ @@ -91,15 +89,6 @@ static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX }while(w--);\ }\ \ -static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ - ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ - ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ - src += 8*srcStride;\ - dst += 8*dstStride;\ - ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ - ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ -}\ -\ static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride){\ ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\ ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ @@ -196,10 +185,6 @@ static av_always_inline void ff_ ## OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uin #define ff_put_h264_qpel8or16_hv2_lowpass_sse2 ff_put_h264_qpel8or16_hv2_lowpass_mmxext #define ff_avg_h264_qpel8or16_hv2_lowpass_sse2 ff_avg_h264_qpel8or16_hv2_lowpass_mmxext -#define H264_MC_C_H(OPNAME, SIZE, MMX, ALIGN) \ -H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\ -H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\ - #define H264_MC_C_V_H_HV(OPNAME, SIZE, MMX, ALIGN) \ H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\ H264_MC_V(OPNAME, SIZE, MMX, ALIGN)\ @@ -356,8 +341,7 @@ QPEL_H264_HV_XMM(put_, PUT_OP, ssse3) QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3) H264_MC(H264_MC_C_V_H_HV, 4, mmxext, 8) -H264_MC(H264_MC_C_H, 8, mmxext, 8) -H264_MC(H264_MC_H, 16, mmxext, 8) +H264_MC_C(avg_, 8, mmxext, 8) H264_MC_816(H264_MC_V, sse2) H264_MC_816(H264_MC_HV, sse2) H264_MC_816(H264_MC_H, ssse3) @@ -421,20 +405,11 @@ LUMA_MC_816(10, mc33, sse2) #endif /* HAVE_X86ASM */ -#define SET_QPEL_FUNCS123(PFX, IDX, SIZE, CPU, PREFIX) \ +#define SET_QPEL_FUNCS_1PP(PFX, IDX, SIZE, CPU, PREFIX) \ do { \ c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \ c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \ c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \ - } while (0) -#define SET_QPEL_FUNCS0123(PFX, IDX, SIZE, CPU, PREFIX) \ - do { \ - c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \ - SET_QPEL_FUNCS123(PFX, IDX, SIZE, CPU, PREFIX); \ - } while (0) -#define SET_QPEL_FUNCS_1PP(PFX, IDX, SIZE, CPU, PREFIX) \ - do { \ - SET_QPEL_FUNCS123(PFX, IDX, SIZE, CPU, PREFIX); \ c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \ c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \ c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \ @@ -478,11 +453,8 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth) if (EXTERNAL_MMXEXT(cpu_flags)) { if (!high_bit_depth) { - SET_QPEL_FUNCS123 (put_h264_qpel, 0, 16, mmxext, ); - SET_QPEL_FUNCS123 (put_h264_qpel, 1, 8, mmxext, ); SET_QPEL_FUNCS_1PP(put_h264_qpel, 2, 4, mmxext, ); - SET_QPEL_FUNCS123 (avg_h264_qpel, 0, 16, mmxext, ); - SET_QPEL_FUNCS0123(avg_h264_qpel, 1, 8, mmxext, ); + c->avg_h264_qpel_pixels_tab[1][0] = avg_h264_qpel8_mc00_mmxext; SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, ); } else if (bit_depth == 10) { SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_); diff --git a/libavcodec/x86/h264_qpel_8bit.asm b/libavcodec/x86/h264_qpel_8bit.asm index 4e64329991..89e7c282b2 100644 --- a/libavcodec/x86/h264_qpel_8bit.asm +++ b/libavcodec/x86/h264_qpel_8bit.asm @@ -96,66 +96,6 @@ INIT_MMX mmxext QPEL4_H_LOWPASS_OP put QPEL4_H_LOWPASS_OP avg -%macro QPEL8_H_LOWPASS_OP 1 -cglobal %1_h264_qpel8_h_lowpass, 4,5 ; dst, src, dstStride, srcStride - movsxdifnidn r2, r2d - movsxdifnidn r3, r3d - mov r4d, 8 - pxor m7, m7 - mova m6, [pw_5] -.loop: - mova m0, [r1] - mova m2, [r1+1] - mova m1, m0 - mova m3, m2 - punpcklbw m0, m7 - punpckhbw m1, m7 - punpcklbw m2, m7 - punpckhbw m3, m7 - paddw m0, m2 - paddw m1, m3 - psllw m0, 2 - psllw m1, 2 - mova m2, [r1-1] - mova m4, [r1+2] - mova m3, m2 - mova m5, m4 - punpcklbw m2, m7 - punpckhbw m3, m7 - punpcklbw m4, m7 - punpckhbw m5, m7 - paddw m2, m4 - paddw m5, m3 - psubw m0, m2 - psubw m1, m5 - pmullw m0, m6 - pmullw m1, m6 - movd m2, [r1-2] - movd m5, [r1+7] - punpcklbw m2, m7 - punpcklbw m5, m7 - paddw m2, m3 - paddw m4, m5 - mova m5, [pw_16] - paddw m2, m5 - paddw m4, m5 - paddw m0, m2 - paddw m1, m4 - psraw m0, 5 - psraw m1, 5 - packuswb m0, m1 - op_%1 m0, [r0], m4 - add r0, r2 - add r1, r3 - dec r4d - jg .loop - RET -%endmacro - -INIT_MMX mmxext -QPEL8_H_LOWPASS_OP put -QPEL8_H_LOWPASS_OP avg - %macro QPEL8_H_LOWPASS_OP_XMM 1 cglobal %1_h264_qpel8_h_lowpass, 4,5,8 ; dst, src, dstStride, srcStride movsxdifnidn r2, r2d