diff --git a/libavcodec/cavsdec.c b/libavcodec/cavsdec.c index 819f2d545f..0f4f3acfa3 100644 --- a/libavcodec/cavsdec.c +++ b/libavcodec/cavsdec.c @@ -921,9 +921,10 @@ static int decode_pic(AVSContext *h) { enum cavs_mb mb_type; if (!s->context_initialized) { - s->avctx->idct_algo = FF_IDCT_CAVS; if (ff_MPV_common_init(s) < 0) return -1; + ff_init_scantable_permutation(s->dsp.idct_permutation, + h->cdsp.idct_perm); ff_init_scantable(s->dsp.idct_permutation,&h->scantable,ff_zigzag_direct); } skip_bits(&s->gb,16);//bbv_dwlay diff --git a/libavcodec/cavsdsp.c b/libavcodec/cavsdsp.c index b1b0d67c05..7eb8eca4d0 100644 --- a/libavcodec/cavsdsp.c +++ b/libavcodec/cavsdsp.c @@ -544,6 +544,7 @@ av_cold void ff_cavsdsp_init(CAVSDSPContext* c, AVCodecContext *avctx) { c->cavs_filter_cv = cavs_filter_cv_c; c->cavs_filter_ch = cavs_filter_ch_c; c->cavs_idct8_add = cavs_idct8_add_c; + c->idct_perm = FF_NO_IDCT_PERM; if (HAVE_MMX) ff_cavsdsp_init_mmx(c, avctx); } diff --git a/libavcodec/cavsdsp.h b/libavcodec/cavsdsp.h index b41ad21bca..b281133ff1 100644 --- a/libavcodec/cavsdsp.h +++ b/libavcodec/cavsdsp.h @@ -33,6 +33,7 @@ typedef struct CAVSDSPContext { void (*cavs_filter_cv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); void (*cavs_filter_ch)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); void (*cavs_idct8_add)(uint8_t *dst, DCTELEM *block, int stride); + int idct_perm; } CAVSDSPContext; void ff_cavsdsp_init(CAVSDSPContext* c, AVCodecContext *avctx); diff --git a/libavcodec/x86/cavsdsp.c b/libavcodec/x86/cavsdsp.c index 40875449fc..03a8b87ec3 100644 --- a/libavcodec/x86/cavsdsp.c +++ b/libavcodec/x86/cavsdsp.c @@ -461,6 +461,7 @@ static void ff_cavsdsp_init_mmx2(CAVSDSPContext* c, AVCodecContext *avctx) { dspfunc(avg_cavs_qpel, 1, 8); #undef dspfunc c->cavs_idct8_add = cavs_idct8_add_mmx; + c->idct_perm = FF_TRANSPOSE_IDCT_PERM; } static void ff_cavsdsp_init_3dnow(CAVSDSPContext* c, AVCodecContext *avctx) { @@ -477,6 +478,7 @@ static void ff_cavsdsp_init_3dnow(CAVSDSPContext* c, AVCodecContext *avctx) { dspfunc(avg_cavs_qpel, 1, 8); #undef dspfunc c->cavs_idct8_add = cavs_idct8_add_mmx; + c->idct_perm = FF_TRANSPOSE_IDCT_PERM; } #endif /* HAVE_INLINE_ASM */ diff --git a/libavcodec/x86/dnxhdenc.c b/libavcodec/x86/dnxhdenc.c index 8b0c2ad225..b2ba894bf3 100644 --- a/libavcodec/x86/dnxhdenc.c +++ b/libavcodec/x86/dnxhdenc.c @@ -24,7 +24,7 @@ #include "libavutil/x86/asm.h" #include "libavcodec/dnxhdenc.h" -#if HAVE_INLINE_ASM +#if HAVE_SSE2_INLINE static void get_pixels_8x4_sym_sse2(DCTELEM *block, const uint8_t *pixels, int line_size) { @@ -52,14 +52,14 @@ static void get_pixels_8x4_sym_sse2(DCTELEM *block, const uint8_t *pixels, int l ); } -#endif /* HAVE_INLINE_ASM */ +#endif /* HAVE_SSE2_INLINE */ void ff_dnxhdenc_init_x86(DNXHDEncContext *ctx) { -#if HAVE_INLINE_ASM +#if HAVE_SSE2_INLINE if (av_get_cpu_flags() & AV_CPU_FLAG_SSE2) { if (ctx->cid_table->bit_depth == 8) ctx->get_pixels_8x4_sym = get_pixels_8x4_sym_sse2; } -#endif /* HAVE_INLINE_ASM */ +#endif /* HAVE_SSE2_INLINE */ } diff --git a/libavcodec/x86/dsputil_mmx_avg_template.c b/libavcodec/x86/dsputil_avg_template.c similarity index 100% rename from libavcodec/x86/dsputil_mmx_avg_template.c rename to libavcodec/x86/dsputil_avg_template.c diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 93f09fb8b6..3109c70fb5 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -171,7 +171,7 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; #define PAVGB(a, b, c, e) PAVGB_MMX_NO_RND(a, b, c, e) #define OP_AVG(a, b, c, e) PAVGB_MMX(a, b, c, e) -#include "dsputil_mmx_rnd_template.c" +#include "dsputil_rnd_template.c" #undef DEF #undef SET_RND @@ -185,7 +185,7 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX(a, b, c, d, e, f) #define PAVGB(a, b, c, e) PAVGB_MMX(a, b, c, e) -#include "dsputil_mmx_rnd_template.c" +#include "dsputil_rnd_template.c" #undef DEF #undef SET_RND @@ -200,7 +200,7 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; #define PAVGB "pavgusb" #define OP_AVG PAVGB -#include "dsputil_mmx_avg_template.c" +#include "dsputil_avg_template.c" #undef DEF #undef PAVGB @@ -215,7 +215,7 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; #define PAVGB "pavgb" #define OP_AVG PAVGB -#include "dsputil_mmx_avg_template.c" +#include "dsputil_avg_template.c" #undef DEF #undef PAVGB @@ -2847,7 +2847,9 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx, c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmx2; } - c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2; + /* slower than cmov version on AMD */ + if (!(mm_flags & AV_CPU_FLAG_3DNOW)) + c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2; c->scalarproduct_int16 = ff_scalarproduct_int16_mmx2; c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmx2; @@ -2924,11 +2926,6 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx, } c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; - -#if HAVE_7REGS - if (mm_flags & AV_CPU_FLAG_CMOV) - c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov; -#endif #endif /* HAVE_INLINE_ASM */ #if HAVE_YASM @@ -3139,6 +3136,11 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) { int mm_flags = av_get_cpu_flags(); +#if HAVE_7REGS && HAVE_INLINE_ASM + if (mm_flags & AV_CPU_FLAG_CMOV) + c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov; +#endif + if (mm_flags & AV_CPU_FLAG_MMX) { #if HAVE_INLINE_ASM const int idct_algo = avctx->idct_algo; @@ -3162,8 +3164,6 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) } c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; #endif - } else if (idct_algo == FF_IDCT_CAVS) { - c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; } else if (idct_algo == FF_IDCT_XVIDMMX) { if (mm_flags & AV_CPU_FLAG_SSE2) { c->idct_put = ff_idct_xvid_sse2_put; diff --git a/libavcodec/x86/dsputil_mmx_qns_template.c b/libavcodec/x86/dsputil_qns_template.c similarity index 100% rename from libavcodec/x86/dsputil_mmx_qns_template.c rename to libavcodec/x86/dsputil_qns_template.c diff --git a/libavcodec/x86/dsputil_mmx_rnd_template.c b/libavcodec/x86/dsputil_rnd_template.c similarity index 100% rename from libavcodec/x86/dsputil_mmx_rnd_template.c rename to libavcodec/x86/dsputil_rnd_template.c diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c index 9f9680b596..325d55062b 100644 --- a/libavcodec/x86/dsputilenc_mmx.c +++ b/libavcodec/x86/dsputilenc_mmx.c @@ -1042,7 +1042,7 @@ static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, int si #define SET_RND MOVQ_WONE #define SCALE_OFFSET 1 -#include "dsputil_mmx_qns_template.c" +#include "dsputil_qns_template.c" #undef DEF #undef SET_RND @@ -1056,7 +1056,7 @@ static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, int si "pmulhrw " #s ", "#x " \n\t"\ "pmulhrw " #s ", "#y " \n\t" -#include "dsputil_mmx_qns_template.c" +#include "dsputil_qns_template.c" #undef DEF #undef SET_RND @@ -1075,7 +1075,7 @@ static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, int si "pmulhrsw " #s ", "#x " \n\t"\ "pmulhrsw " #s ", "#y " \n\t" -#include "dsputil_mmx_qns_template.c" +#include "dsputil_qns_template.c" #undef DEF #undef SET_RND diff --git a/libavcodec/x86/lpc.c b/libavcodec/x86/lpc.c index e0e6f8ba8a..a66f1e0da0 100644 --- a/libavcodec/x86/lpc.c +++ b/libavcodec/x86/lpc.c @@ -23,7 +23,7 @@ #include "libavutil/cpu.h" #include "libavcodec/lpc.h" -#if HAVE_INLINE_ASM +#if HAVE_SSE2_INLINE static void lpc_apply_welch_window_sse2(const int32_t *data, int len, double *w_data) @@ -138,16 +138,16 @@ static void lpc_compute_autocorr_sse2(const double *data, int len, int lag, } } -#endif /* HAVE_INLINE_ASM */ +#endif /* HAVE_SSE2_INLINE */ av_cold void ff_lpc_init_x86(LPCContext *c) { +#if HAVE_SSE2_INLINE int mm_flags = av_get_cpu_flags(); -#if HAVE_INLINE_ASM if (mm_flags & (AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW)) { c->lpc_apply_welch_window = lpc_apply_welch_window_sse2; c->lpc_compute_autocorr = lpc_compute_autocorr_sse2; } -#endif /* HAVE_INLINE_ASM */ +#endif /* HAVE_SSE2_INLINE */ } diff --git a/libavcodec/x86/mpegaudiodec.c b/libavcodec/x86/mpegaudiodec.c index c716af2ea2..8a6d59ad6a 100644 --- a/libavcodec/x86/mpegaudiodec.c +++ b/libavcodec/x86/mpegaudiodec.c @@ -36,7 +36,7 @@ void ff_four_imdct36_float_avx(float *out, float *buf, float *in, float *win, DECLARE_ALIGNED(16, static float, mdct_win_sse)[2][4][4*40]; -#if HAVE_INLINE_ASM +#if HAVE_SSE2_INLINE #define MACS(rt, ra, rb) rt+=(ra)*(rb) #define MLSS(rt, ra, rb) rt-=(ra)*(rb) @@ -180,7 +180,7 @@ static void apply_window_mp3(float *in, float *win, int *unused, float *out, *out = sum; } -#endif /* HAVE_INLINE_ASM */ +#endif /* HAVE_SSE2_INLINE */ #if HAVE_YASM #define DECL_IMDCT_BLOCKS(CPU1, CPU2) \ @@ -244,11 +244,12 @@ void ff_mpadsp_init_mmx(MPADSPContext *s) } } -#if HAVE_INLINE_ASM +#if HAVE_SSE2_INLINE if (mm_flags & AV_CPU_FLAG_SSE2) { s->apply_window_float = apply_window_mp3; } -#endif /* HAVE_INLINE_ASM */ +#endif /* HAVE_SSE2_INLINE */ + #if HAVE_YASM if (0) { #if HAVE_AVX_EXTERNAL diff --git a/libavformat/mov_chan.c b/libavformat/mov_chan.c index 074a32019a..58aec57374 100644 --- a/libavformat/mov_chan.c +++ b/libavformat/mov_chan.c @@ -580,9 +580,10 @@ int ff_mov_read_chan(AVFormatContext *s, AVIOContext *pb, AVStream *st, label_mask |= mask_incr; } } - if (layout_tag == 0) + if (layout_tag == 0) { + if (label_mask) st->codec->channel_layout = label_mask; - else + } else st->codec->channel_layout = ff_mov_get_channel_layout(layout_tag, bitmap); avio_skip(pb, size - 12); diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm index f1fd01b966..3af928f8b6 100644 --- a/libavutil/x86/float_dsp.asm +++ b/libavutil/x86/float_dsp.asm @@ -62,12 +62,12 @@ cglobal vector_fmac_scalar, 3,3,3, dst, src, len %else cglobal vector_fmac_scalar, 4,4,3, dst, src, mul, len %endif -%if WIN64 - SWAP 0, 2 -%endif %if ARCH_X86_32 VBROADCASTSS m0, mulm %else +%if WIN64 + mova xmm0, xmm2 +%endif shufps xmm0, xmm0, 0 %if cpuflag(avx) vinsertf128 m0, m0, xmm0, 1 diff --git a/libswscale/swscale.c b/libswscale/swscale.c index 7cc630e4f1..7ca29bc82c 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -661,7 +661,7 @@ static int swScale(SwsContext *c, const uint8_t *src[], if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf) fillPlane(dst[3], dstStride[3], dstW, dstY - lastDstY, lastDstY, 255); -#if HAVE_MMXEXT && HAVE_INLINE_ASM +#if HAVE_MMXEXT_INLINE if (av_get_cpu_flags() & AV_CPU_FLAG_MMXEXT) __asm__ volatile ("sfence" ::: "memory"); #endif diff --git a/tests/fate/amrnb.mak b/tests/fate/amrnb.mak index 1fe138136c..d163dc4e96 100644 --- a/tests/fate/amrnb.mak +++ b/tests/fate/amrnb.mak @@ -2,49 +2,41 @@ FATE_AMRNB += fate-amrnb-4k75 fate-amrnb-4k75: CMD = pcm -i $(SAMPLES)/amrnb/4.75k.amr fate-amrnb-4k75: CMP = stddev fate-amrnb-4k75: REF = $(SAMPLES)/amrnb/4.75k.pcm -fate-amrnb-4k75: FUZZ = 1 FATE_AMRNB += fate-amrnb-5k15 fate-amrnb-5k15: CMD = pcm -i $(SAMPLES)/amrnb/5.15k.amr fate-amrnb-5k15: CMP = stddev fate-amrnb-5k15: REF = $(SAMPLES)/amrnb/5.15k.pcm -fate-amrnb-5k15: FUZZ = 1 FATE_AMRNB += fate-amrnb-5k9 fate-amrnb-5k9: CMD = pcm -i $(SAMPLES)/amrnb/5.9k.amr fate-amrnb-5k9: CMP = stddev fate-amrnb-5k9: REF = $(SAMPLES)/amrnb/5.9k.pcm -fate-amrnb-5k9: FUZZ = 1 FATE_AMRNB += fate-amrnb-6k7 fate-amrnb-6k7: CMD = pcm -i $(SAMPLES)/amrnb/6.7k.amr fate-amrnb-6k7: CMP = stddev fate-amrnb-6k7: REF = $(SAMPLES)/amrnb/6.7k.pcm -fate-amrnb-6k7: FUZZ = 1 FATE_AMRNB += fate-amrnb-7k4 fate-amrnb-7k4: CMD = pcm -i $(SAMPLES)/amrnb/7.4k.amr fate-amrnb-7k4: CMP = stddev fate-amrnb-7k4: REF = $(SAMPLES)/amrnb/7.4k.pcm -fate-amrnb-7k4: FUZZ = 1 FATE_AMRNB += fate-amrnb-7k95 fate-amrnb-7k95: CMD = pcm -i $(SAMPLES)/amrnb/7.95k.amr fate-amrnb-7k95: CMP = stddev fate-amrnb-7k95: REF = $(SAMPLES)/amrnb/7.95k.pcm -fate-amrnb-7k95: FUZZ = 1 FATE_AMRNB += fate-amrnb-10k2 fate-amrnb-10k2: CMD = pcm -i $(SAMPLES)/amrnb/10.2k.amr fate-amrnb-10k2: CMP = stddev fate-amrnb-10k2: REF = $(SAMPLES)/amrnb/10.2k.pcm -fate-amrnb-10k2: FUZZ = 1 FATE_AMRNB += fate-amrnb-12k2 fate-amrnb-12k2: CMD = pcm -i $(SAMPLES)/amrnb/12.2k.amr fate-amrnb-12k2: CMP = stddev fate-amrnb-12k2: REF = $(SAMPLES)/amrnb/12.2k.pcm -fate-amrnb-12k2: FUZZ = 1 FATE_SAMPLES_AVCONV += $(FATE_AMRNB) fate-amrnb: $(FATE_AMRNB) diff --git a/tests/fate/amrwb.mak b/tests/fate/amrwb.mak index 571fac0b35..cdbe2acd63 100644 --- a/tests/fate/amrwb.mak +++ b/tests/fate/amrwb.mak @@ -2,19 +2,16 @@ FATE_AMRWB += fate-amrwb-6k60 fate-amrwb-6k60: CMD = pcm -i $(SAMPLES)/amrwb/seed-6k60.awb fate-amrwb-6k60: CMP = stddev fate-amrwb-6k60: REF = $(SAMPLES)/amrwb/seed-6k60.pcm -fate-amrwb-6k60: FUZZ = 1 FATE_AMRWB += fate-amrwb-8k85 fate-amrwb-8k85: CMD = pcm -i $(SAMPLES)/amrwb/seed-8k85.awb fate-amrwb-8k85: CMP = stddev fate-amrwb-8k85: REF = $(SAMPLES)/amrwb/seed-8k85.pcm -fate-amrwb-8k85: FUZZ = 1 FATE_AMRWB += fate-amrwb-12k65 fate-amrwb-12k65: CMD = pcm -i $(SAMPLES)/amrwb/seed-12k65.awb fate-amrwb-12k65: CMP = stddev fate-amrwb-12k65: REF = $(SAMPLES)/amrwb/seed-12k65.pcm -fate-amrwb-12k65: FUZZ = 1 FATE_AMRWB += fate-amrwb-14k25 fate-amrwb-14k25: CMD = pcm -i $(SAMPLES)/amrwb/seed-14k25.awb @@ -26,19 +23,16 @@ FATE_AMRWB += fate-amrwb-15k85 fate-amrwb-15k85: CMD = pcm -i $(SAMPLES)/amrwb/seed-15k85.awb fate-amrwb-15k85: CMP = stddev fate-amrwb-15k85: REF = $(SAMPLES)/amrwb/seed-15k85.pcm -fate-amrwb-15k85: FUZZ = 1 FATE_AMRWB += fate-amrwb-18k25 fate-amrwb-18k25: CMD = pcm -i $(SAMPLES)/amrwb/seed-18k25.awb fate-amrwb-18k25: CMP = stddev fate-amrwb-18k25: REF = $(SAMPLES)/amrwb/seed-18k25.pcm -fate-amrwb-18k25: FUZZ = 1 FATE_AMRWB += fate-amrwb-19k85 fate-amrwb-19k85: CMD = pcm -i $(SAMPLES)/amrwb/seed-19k85.awb fate-amrwb-19k85: CMP = stddev fate-amrwb-19k85: REF = $(SAMPLES)/amrwb/seed-19k85.pcm -fate-amrwb-19k85: FUZZ = 1 FATE_AMRWB += fate-amrwb-23k05 fate-amrwb-23k05: CMD = pcm -i $(SAMPLES)/amrwb/seed-23k05.awb @@ -56,7 +50,6 @@ FATE_AMRWB += fate-amrwb-23k85-2 fate-amrwb-23k85-2: CMD = pcm -i $(SAMPLES)/amrwb/deus-23k85.awb fate-amrwb-23k85-2: CMP = stddev fate-amrwb-23k85-2: REF = $(SAMPLES)/amrwb/deus-23k85.pcm -fate-amrwb-23k85-2: FUZZ = 1 FATE_SAMPLES_AVCONV += $(FATE_AMRWB) fate-amrwb: $(FATE_AMRWB) diff --git a/tests/fate/mpc.mak b/tests/fate/mpc.mak index ae68ee2a06..5505a9aef3 100644 --- a/tests/fate/mpc.mak +++ b/tests/fate/mpc.mak @@ -8,7 +8,6 @@ FATE_MPC += fate-musepack7 fate-musepack7: CMD = pcm -i $(SAMPLES)/musepack/inside-mp7.mpc fate-musepack7: CMP = oneoff fate-musepack7: REF = $(SAMPLES)/musepack/inside-mp7.pcm -fate-musepack7: FUZZ = 1 FATE_SAMPLES_FFMPEG += $(FATE_MPC) fate-mpc: $(FATE_MPC)