diff --git a/libavcodec/x86/cavsdsp.c b/libavcodec/x86/cavsdsp.c index f974f93fc0..7ceb51a23c 100644 --- a/libavcodec/x86/cavsdsp.c +++ b/libavcodec/x86/cavsdsp.c @@ -36,16 +36,7 @@ #include "config.h" -#if HAVE_MMX_EXTERNAL - -void ff_cavs_idct8_mmx(int16_t *out, const int16_t *in); - -static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, ptrdiff_t stride) -{ - LOCAL_ALIGNED(16, int16_t, b2, [64]); - ff_cavs_idct8_mmx(b2, block); - ff_add_pixels_clamped_mmx(b2, dst, stride); -} +#if HAVE_SSE2_EXTERNAL void ff_cavs_idct8_sse2(int16_t *out, const int16_t *in); @@ -56,9 +47,9 @@ static void cavs_idct8_add_sse2(uint8_t *dst, int16_t *block, ptrdiff_t stride) ff_add_pixels_clamped_sse2(b2, dst, stride); } -#endif /* HAVE_MMX_EXTERNAL */ +#endif /* HAVE_SSE2_EXTERNAL */ -#if (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE) +#if HAVE_MMXEXT_INLINE /***************************************************************************** * @@ -326,7 +317,7 @@ static void OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uin "pavgb " #temp ", " #a " \n\t"\ "mov" #size " " #a ", " #b " \n\t" -#endif /* (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE) */ +#endif /* HAVE_MMXEXT_INLINE */ #if HAVE_MMX_EXTERNAL static void put_cavs_qpel8_mc00_mmx(uint8_t *dst, const uint8_t *src, @@ -335,36 +326,12 @@ static void put_cavs_qpel8_mc00_mmx(uint8_t *dst, const uint8_t *src, ff_put_pixels8_mmx(dst, src, stride, 8); } -static void avg_cavs_qpel8_mc00_mmx(uint8_t *dst, const uint8_t *src, - ptrdiff_t stride) -{ - ff_avg_pixels8_mmx(dst, src, stride, 8); -} - static void avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, const uint8_t *src, ptrdiff_t stride) { ff_avg_pixels8_mmxext(dst, src, stride, 8); } -static void put_cavs_qpel16_mc00_mmx(uint8_t *dst, const uint8_t *src, - ptrdiff_t stride) -{ - ff_put_pixels16_mmx(dst, src, stride, 16); -} - -static void avg_cavs_qpel16_mc00_mmx(uint8_t *dst, const uint8_t *src, - ptrdiff_t stride) -{ - ff_avg_pixels16_mmx(dst, src, stride, 16); -} - -static void avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, const uint8_t *src, - ptrdiff_t stride) -{ - ff_avg_pixels16_mmxext(dst, src, stride, 16); -} - static void put_cavs_qpel16_mc00_sse2(uint8_t *dst, const uint8_t *src, ptrdiff_t stride) { @@ -382,13 +349,7 @@ static av_cold void cavsdsp_init_mmx(CAVSDSPContext *c, AVCodecContext *avctx) { #if HAVE_MMX_EXTERNAL - c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_mmx; c->put_cavs_qpel_pixels_tab[1][0] = put_cavs_qpel8_mc00_mmx; - c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_mmx; - c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmx; - - c->cavs_idct8_add = cavs_idct8_add_mmx; - c->idct_perm = FF_IDCT_PERM_TRANSPOSE; #endif /* HAVE_MMX_EXTERNAL */ } @@ -408,25 +369,6 @@ CAVS_MC(avg_, 8, mmxext) CAVS_MC(avg_, 16, mmxext) #endif /* HAVE_MMXEXT_INLINE */ -#if HAVE_AMD3DNOW_INLINE -QPEL_CAVS(put_, PUT_OP, 3dnow) -QPEL_CAVS(avg_, AVG_3DNOW_OP, 3dnow) - -CAVS_MC(put_, 8, 3dnow) -CAVS_MC(put_, 16,3dnow) -CAVS_MC(avg_, 8, 3dnow) -CAVS_MC(avg_, 16,3dnow) - -static av_cold void cavsdsp_init_3dnow(CAVSDSPContext *c, - AVCodecContext *avctx) -{ - DSPFUNC(put, 0, 16, 3dnow); - DSPFUNC(put, 1, 8, 3dnow); - DSPFUNC(avg, 0, 16, 3dnow); - DSPFUNC(avg, 1, 8, 3dnow); -} -#endif /* HAVE_AMD3DNOW_INLINE */ - av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx) { av_unused int cpu_flags = av_get_cpu_flags(); @@ -434,10 +376,6 @@ av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx) if (X86_MMX(cpu_flags)) cavsdsp_init_mmx(c, avctx); -#if HAVE_AMD3DNOW_INLINE - if (INLINE_AMD3DNOW(cpu_flags)) - cavsdsp_init_3dnow(c, avctx); -#endif /* HAVE_AMD3DNOW_INLINE */ #if HAVE_MMXEXT_INLINE if (INLINE_MMXEXT(cpu_flags)) { DSPFUNC(put, 0, 16, mmxext); @@ -448,7 +386,6 @@ av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx) #endif #if HAVE_MMX_EXTERNAL if (EXTERNAL_MMXEXT(cpu_flags)) { - c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_mmxext; c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmxext; } #endif diff --git a/libavcodec/x86/cavsidct.asm b/libavcodec/x86/cavsidct.asm index 6c768c2646..f133c73546 100644 --- a/libavcodec/x86/cavsidct.asm +++ b/libavcodec/x86/cavsidct.asm @@ -107,68 +107,6 @@ SECTION .text SUMSUB_BA w, 1, 0 ; m1 = dst3, m0 = dst4 %endmacro -INIT_MMX mmx -cglobal cavs_idct8, 2, 4, 8, 8 * 16, out, in, cnt, tmp - mov cntd, 2 - mov tmpq, rsp - -.loop_1: - CAVS_IDCT8_1D inq, [pw_4] - psraw m7, 3 - psraw m6, 3 - psraw m5, 3 - psraw m4, 3 - psraw m3, 3 - psraw m2, 3 - psraw m1, 3 - psraw m0, 3 - mova [tmpq], m7 - TRANSPOSE4x4W 0, 2, 4, 6, 7 - mova [tmpq+1*8], m0 - mova [tmpq+3*8], m2 - mova [tmpq+5*8], m4 - mova [tmpq+7*8], m6 - mova m7, [tmpq] - TRANSPOSE4x4W 7, 5, 3, 1, 0 - mova [tmpq+0*8], m7 - mova [tmpq+2*8], m5 - mova [tmpq+4*8], m3 - mova [tmpq+6*8], m1 - - add inq, mmsize - add tmpq, 64 - dec cntd - jg .loop_1 - - mov cntd, 2 - mov tmpq, rsp -.loop_2: - CAVS_IDCT8_1D tmpq, [pw_64] - psraw m7, 7 - psraw m6, 7 - psraw m5, 7 - psraw m4, 7 - psraw m3, 7 - psraw m2, 7 - psraw m1, 7 - psraw m0, 7 - - mova [outq+0*16], m7 - mova [outq+1*16], m5 - mova [outq+2*16], m3 - mova [outq+3*16], m1 - mova [outq+4*16], m0 - mova [outq+5*16], m2 - mova [outq+6*16], m4 - mova [outq+7*16], m6 - - add outq, mmsize - add tmpq, mmsize - dec cntd - jg .loop_2 - - RET - INIT_XMM sse2 cglobal cavs_idct8, 2, 2, 8 + ARCH_X86_64, 0 - 8 * 16, out, in CAVS_IDCT8_1D inq, [pw_4]