diff --git a/Doxyfile b/Doxyfile index 97896aa071..b91f351800 100644 --- a/Doxyfile +++ b/Doxyfile @@ -1378,7 +1378,7 @@ PREDEFINED = "__attribute__(x)=" \ "DEF(x)=x ## _TMPL" \ HAVE_AV_CONFIG_H \ HAVE_MMX \ - HAVE_MMX2 \ + HAVE_MMXEXT \ HAVE_AMD3DNOW \ "DECLARE_ALIGNED(a,t,n)=t n" \ "offsetof(x,y)=0x42" diff --git a/configure b/configure index 6cd170e0f8..dbc20153ad 100755 --- a/configure +++ b/configure @@ -267,7 +267,7 @@ Optimization options (experts only): --disable-amd3dnow disable 3DNow! optimizations --disable-amd3dnowext disable 3DNow! extended optimizations --disable-mmx disable MMX optimizations - --disable-mmx2 disable MMX2 optimizations + --disable-mmxext disable MMXEXT optimizations --disable-sse disable SSE optimizations --disable-ssse3 disable SSSE3 optimizations --disable-avx disable AVX optimizations @@ -1182,7 +1182,7 @@ ARCH_EXT_LIST=' fma4 mmi mmx - mmx2 + mmxext neon ppc4xx sse @@ -1459,7 +1459,7 @@ x86_64_suggest="cmov fast_cmov" amd3dnow_deps="mmx" amd3dnowext_deps="amd3dnow" mmx_deps="x86" -mmx2_deps="mmx" +mmxext_deps="mmx" sse_deps="mmx" ssse3_deps="sse" avx_deps="ssse3" @@ -3194,9 +3194,9 @@ EOF # check whether xmm clobbers are supported check_asm xmm_clobbers '"":::"%xmm0"' - # check whether binutils is new enough to compile SSSE3/MMX2 + # check whether binutils is new enough to compile SSSE3/MMXEXT enabled ssse3 && check_asm ssse3 '"pabsw %xmm0, %xmm0"' - enabled mmx2 && check_asm mmx2 '"pmaxub %mm0, %mm1"' + enabled mmxext && check_asm mmxext '"pmaxub %mm0, %mm1"' if ! disabled_any asm mmx yasm; then if check_cmd $yasmexe --version; then @@ -3748,7 +3748,7 @@ echo "runtime cpu detection ${runtime_cpudetect-no}" if enabled x86; then echo "${yasmexe} ${yasm-no}" echo "MMX enabled ${mmx-no}" - echo "MMX2 enabled ${mmx2-no}" + echo "MMXEXT enabled ${mmxext-no}" echo "3DNow! enabled ${amd3dnow-no}" echo "3DNow! extended enabled ${amd3dnowext-no}" echo "SSE enabled ${sse-no}" @@ -4019,6 +4019,7 @@ cat > $TMPH < AV_CPU_FLAG_MMXEXT. + Rename SWS_CPU_CAPS_MMX2 ---> SWS_CPU_CAPS_MMXEXT. + 2012-07-xx - xxxxxxx - lavf 54.13.0 - avformat.h Add AVFMT_FLAG_NOBUFFER for low latency use cases. diff --git a/libavcodec/arm/vp56_arith.h b/libavcodec/arm/vp56_arith.h index ece9ac2a6c..29a222844a 100644 --- a/libavcodec/arm/vp56_arith.h +++ b/libavcodec/arm/vp56_arith.h @@ -29,6 +29,14 @@ # define T(x) #endif +#if CONFIG_THUMB || defined __clang__ +# define L(x) +# define U(x) x +#else +# define L(x) x +# define U(x) +#endif + #if HAVE_ARMV6 && HAVE_INLINE_ASM #define vp56_rac_get_prob vp56_rac_get_prob_armv6 @@ -42,8 +50,8 @@ static inline int vp56_rac_get_prob_armv6(VP56RangeCoder *c, int pr) __asm__ ("adds %3, %3, %0 \n" "itt cs \n" "cmpcs %7, %4 \n" - A("ldrcsh %2, [%4], #2 \n") - T("ldrhcs %2, [%4], #2 \n") + L("ldrcsh %2, [%4], #2 \n") + U("ldrhcs %2, [%4], #2 \n") "rsb %0, %6, #256 \n" "smlabb %0, %5, %6, %0 \n" T("itttt cs \n") @@ -80,8 +88,8 @@ static inline int vp56_rac_get_prob_branchy_armv6(VP56RangeCoder *c, int pr) __asm__ ("adds %3, %3, %0 \n" "itt cs \n" "cmpcs %7, %4 \n" - A("ldrcsh %2, [%4], #2 \n") - T("ldrhcs %2, [%4], #2 \n") + L("ldrcsh %2, [%4], #2 \n") + U("ldrhcs %2, [%4], #2 \n") "rsb %0, %6, #256 \n" "smlabb %0, %5, %6, %0 \n" T("itttt cs \n") diff --git a/libavcodec/arm/vp8dsp_armv6.S b/libavcodec/arm/vp8dsp_armv6.S index a26a2a9813..40be926f9f 100644 --- a/libavcodec/arm/vp8dsp_armv6.S +++ b/libavcodec/arm/vp8dsp_armv6.S @@ -1226,7 +1226,13 @@ vp8_mc_1 bilin, 8, v vp8_mc_1 bilin, 4, h vp8_mc_1 bilin, 4, v -#define TMPSIZE \size * (8 - 8*(\size > 4) + \ytaps - 1) +/* True relational expressions have the value -1 in the GNU assembler, + +1 in Apple's. */ +#ifdef __APPLE__ +# define TMPSIZE \size * (8 + 8*(\size > 4) + \ytaps - 1) +#else +# define TMPSIZE \size * (8 - 8*(\size > 4) + \ytaps - 1) +#endif .macro vp8_mc_hv name, size, h, v, ytaps function ff_put_vp8_\name\size\()_\h\v\()_armv6, export=1 diff --git a/libavcodec/dct-test.c b/libavcodec/dct-test.c index b32d34f1ad..e37b23c59e 100644 --- a/libavcodec/dct-test.c +++ b/libavcodec/dct-test.c @@ -87,7 +87,7 @@ static const struct algo fdct_tab[] = { #if HAVE_MMX && HAVE_INLINE_ASM { "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX }, - { "MMX2", ff_fdct_mmx2, NO_PERM, AV_CPU_FLAG_MMX2 }, + { "MMXEXT", ff_fdct_mmx2, NO_PERM, AV_CPU_FLAG_MMXEXT }, { "SSE2", ff_fdct_sse2, NO_PERM, AV_CPU_FLAG_SSE2 }, #endif @@ -132,7 +132,7 @@ static const struct algo idct_tab[] = { #endif { "SIMPLE-MMX", ff_simple_idct_mmx, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX }, { "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 }, - { "XVID-MMX2", ff_idct_xvid_mmx2, NO_PERM, AV_CPU_FLAG_MMX2, 1 }, + { "XVID-MMXEXT", ff_idct_xvid_mmx2, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 }, { "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 }, #if ARCH_X86_64 && HAVE_YASM { "PR-SSE2", ff_prores_idct_put_10_sse2_wrap, TRANSPOSE_PERM, AV_CPU_FLAG_SSE2, 1 }, diff --git a/libavcodec/motion-test.c b/libavcodec/motion-test.c index 6b954ebc9b..ab53f19ed5 100644 --- a/libavcodec/motion-test.c +++ b/libavcodec/motion-test.c @@ -116,8 +116,8 @@ int main(int argc, char **argv) AVCodecContext *ctx; int c; DSPContext cctx, mmxctx; - int flags[2] = { AV_CPU_FLAG_MMX, AV_CPU_FLAG_MMX2 }; - int flags_size = HAVE_MMX2 ? 2 : 1; + int flags[2] = { AV_CPU_FLAG_MMX, AV_CPU_FLAG_MMXEXT }; + int flags_size = HAVE_MMXEXT ? 2 : 1; if (argc > 1) { help(); diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm index 4e0fce35d1..acbab35cae 100644 --- a/libavcodec/x86/ac3dsp.asm +++ b/libavcodec/x86/ac3dsp.asm @@ -68,7 +68,7 @@ cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset %define LOOP_ALIGN INIT_MMX AC3_EXPONENT_MIN mmx -%if HAVE_MMX2 +%if HAVE_MMXEXT %define PMINUB PMINUB_MMXEXT %define LOOP_ALIGN ALIGN 16 AC3_EXPONENT_MIN mmxext diff --git a/libavcodec/x86/ac3dsp_mmx.c b/libavcodec/x86/ac3dsp_mmx.c index 54fa380130..5549f3e550 100644 --- a/libavcodec/x86/ac3dsp_mmx.c +++ b/libavcodec/x86/ac3dsp_mmx.c @@ -65,7 +65,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) c->float_to_fixed24 = ff_float_to_fixed24_3dnow; } } - if (mm_flags & AV_CPU_FLAG_MMX2 && HAVE_MMX2) { + if (mm_flags & AV_CPU_FLAG_MMXEXT && HAVE_MMXEXT) { c->ac3_exponent_min = ff_ac3_exponent_min_mmxext; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2; } diff --git a/libavcodec/x86/cavsdsp_mmx.c b/libavcodec/x86/cavsdsp_mmx.c index eae6fea536..05f192fc71 100644 --- a/libavcodec/x86/cavsdsp_mmx.c +++ b/libavcodec/x86/cavsdsp_mmx.c @@ -486,7 +486,7 @@ void ff_cavsdsp_init_mmx(CAVSDSPContext *c, AVCodecContext *avctx) int mm_flags = av_get_cpu_flags(); #if HAVE_INLINE_ASM - if (mm_flags & AV_CPU_FLAG_MMX2) ff_cavsdsp_init_mmx2 (c, avctx); + if (mm_flags & AV_CPU_FLAG_MMXEXT) ff_cavsdsp_init_mmx2(c, avctx); if (mm_flags & AV_CPU_FLAG_3DNOW) ff_cavsdsp_init_3dnow(c, avctx); #endif /* HAVE_INLINE_ASM */ } diff --git a/libavcodec/x86/dct32_sse.asm b/libavcodec/x86/dct32_sse.asm index ca44106433..02b5f3fc89 100644 --- a/libavcodec/x86/dct32_sse.asm +++ b/libavcodec/x86/dct32_sse.asm @@ -42,39 +42,24 @@ ps_cos_vec: dd 0.500603, 0.505471, 0.515447, 0.531043 align 32 ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000 -%macro BUTTERFLY_SSE 4 - movaps %4, %1 - subps %1, %2 - addps %2, %4 - mulps %1, %3 +%macro BUTTERFLY 4 + subps %4, %1, %2 + addps %2, %2, %1 + mulps %1, %4, %3 %endmacro -%macro BUTTERFLY_AVX 4 - vsubps %4, %1, %2 - vaddps %2, %2, %1 - vmulps %1, %4, %3 -%endmacro - -%macro BUTTERFLY0_SSE 5 - movaps %4, %1 - shufps %1, %1, %5 - xorps %4, %2 - addps %1, %4 - mulps %1, %3 -%endmacro - -%macro BUTTERFLY0_SSE2 5 +%macro BUTTERFLY0 5 +%if cpuflag(sse2) && notcpuflag(avx) pshufd %4, %1, %5 xorps %1, %2 addps %1, %4 mulps %1, %3 -%endmacro - -%macro BUTTERFLY0_AVX 5 - vshufps %4, %1, %1, %5 - vxorps %1, %1, %2 - vaddps %4, %4, %1 - vmulps %1, %4, %3 +%else + shufps %4, %1, %1, %5 + xorps %1, %1, %2 + addps %4, %4, %1 + mulps %1, %4, %3 +%endif %endmacro %macro BUTTERFLY2 4 @@ -206,14 +191,11 @@ ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000 movss [outq+116], m6 %endmacro -%define BUTTERFLY BUTTERFLY_AVX -%define BUTTERFLY0 BUTTERFLY0_AVX - -INIT_YMM +INIT_YMM avx SECTION_TEXT %if HAVE_AVX ; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in) -cglobal dct32_float_avx, 2,3,8, out, in, tmp +cglobal dct32_float, 2,3,8, out, in, tmp ; pass 1 vmovaps m4, [inq+0] vinsertf128 m5, m5, [inq+96], 1 @@ -286,9 +268,6 @@ INIT_XMM RET %endif -%define BUTTERFLY BUTTERFLY_SSE -%define BUTTERFLY0 BUTTERFLY0_SSE - %if ARCH_X86_64 %define SPILL SWAP %define UNSPILL SWAP @@ -411,10 +390,9 @@ INIT_XMM %endif -INIT_XMM -%macro DCT32_FUNC 1 ; void ff_dct32_float_sse(FFTSample *out, const FFTSample *in) -cglobal dct32_float_%1, 2,3,16, out, in, tmp +%macro DCT32_FUNC 0 +cglobal dct32_float, 2, 3, 16, out, in, tmp ; pass 1 movaps m0, [inq+0] @@ -498,18 +476,16 @@ cglobal dct32_float_%1, 2,3,16, out, in, tmp RET %endmacro -%macro LOAD_INV_SSE 2 +%macro LOAD_INV 2 +%if cpuflag(sse2) + pshufd %1, %2, 0x1b +%elif cpuflag(sse) movaps %1, %2 shufps %1, %1, 0x1b +%endif %endmacro -%define LOAD_INV LOAD_INV_SSE -DCT32_FUNC sse - -%macro LOAD_INV_SSE2 2 - pshufd %1, %2, 0x1b -%endmacro - -%define LOAD_INV LOAD_INV_SSE2 -%define BUTTERFLY0 BUTTERFLY0_SSE2 -DCT32_FUNC sse2 +INIT_XMM sse +DCT32_FUNC +INIT_XMM sse2 +DCT32_FUNC diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 47b8ef1fc3..f4ed7565e7 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -3171,7 +3171,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) c->idct_add = ff_idct_xvid_sse2_add; c->idct = ff_idct_xvid_sse2; c->idct_permutation_type = FF_SSE2_IDCT_PERM; - } else if (mm_flags & AV_CPU_FLAG_MMX2) { + } else if (mm_flags & AV_CPU_FLAG_MMXEXT) { c->idct_put = ff_idct_xvid_mmx2_put; c->idct_add = ff_idct_xvid_mmx2_add; c->idct = ff_idct_xvid_mmx2; @@ -3187,7 +3187,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) dsputil_init_mmx(c, avctx, mm_flags); } - if (mm_flags & AV_CPU_FLAG_MMX2) + if (mm_flags & AV_CPU_FLAG_MMXEXT) dsputil_init_mmx2(c, avctx, mm_flags); if (mm_flags & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm index 5a6c3d1eae..06d2027c69 100644 --- a/libavcodec/x86/dsputil_yasm.asm +++ b/libavcodec/x86/dsputil_yasm.asm @@ -388,12 +388,16 @@ cglobal add_hfyu_median_prediction_mmx2, 6,6,0, dst, top, diff, w, left, left_to RET -%macro ADD_HFYU_LEFT_LOOP 1 ; %1 = is_aligned +%macro ADD_HFYU_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned add srcq, wq add dstq, wq neg wq %%.loop: +%if %2 mova m1, [srcq+wq] +%else + movu m1, [srcq+wq] +%endif mova m2, m1 psllw m1, 8 paddb m1, m2 @@ -435,7 +439,7 @@ cglobal add_hfyu_left_prediction_ssse3, 3,3,7, dst, src, w, left mova m3, [pb_zz11zz55zz99zzdd] movd m0, leftm psllq m0, 56 - ADD_HFYU_LEFT_LOOP 1 + ADD_HFYU_LEFT_LOOP 1, 1 INIT_XMM cglobal add_hfyu_left_prediction_sse4, 3,3,7, dst, src, w, left @@ -446,12 +450,14 @@ cglobal add_hfyu_left_prediction_sse4, 3,3,7, dst, src, w, left movd m0, leftm pslldq m0, 15 test srcq, 15 - jnz add_hfyu_left_prediction_ssse3.skip_prologue + jnz .src_unaligned test dstq, 15 - jnz .unaligned - ADD_HFYU_LEFT_LOOP 1 -.unaligned: - ADD_HFYU_LEFT_LOOP 0 + jnz .dst_unaligned + ADD_HFYU_LEFT_LOOP 1, 1 +.dst_unaligned: + ADD_HFYU_LEFT_LOOP 0, 1 +.src_unaligned: + ADD_HFYU_LEFT_LOOP 0, 0 ; float scalarproduct_float_sse(const float *v1, const float *v2, int len) diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c index cdc384ca9d..77b7b7620e 100644 --- a/libavcodec/x86/dsputilenc_mmx.c +++ b/libavcodec/x86/dsputilenc_mmx.c @@ -1112,7 +1112,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) (dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX)) { if(mm_flags & AV_CPU_FLAG_SSE2){ c->fdct = ff_fdct_sse2; - }else if(mm_flags & AV_CPU_FLAG_MMX2){ + } else if (mm_flags & AV_CPU_FLAG_MMXEXT) { c->fdct = ff_fdct_mmx2; }else{ c->fdct = ff_fdct_mmx; @@ -1145,8 +1145,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx; - - if (mm_flags & AV_CPU_FLAG_MMX2) { + if (mm_flags & AV_CPU_FLAG_MMXEXT) { c->sum_abs_dctelem= sum_abs_dctelem_mmx2; c->vsad[4]= vsad_intra16_mmx2; @@ -1187,7 +1186,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx; c->hadamard8_diff[1] = ff_hadamard8_diff_mmx; - if (mm_flags & AV_CPU_FLAG_MMX2) { + if (mm_flags & AV_CPU_FLAG_MMXEXT) { c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx2; c->hadamard8_diff[1] = ff_hadamard8_diff_mmx2; } diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm index f41381760d..faa27a01c6 100644 --- a/libavcodec/x86/fft_mmx.asm +++ b/libavcodec/x86/fft_mmx.asm @@ -1041,7 +1041,7 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i mova [r1+r5*8], m0 mova [r1+r6*8], m2 add r4, 2 - sub r4, 2 + sub r3, 2 %else %if ARCH_X86_64 movzx r5, word [rrevtab+r4-4] diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c index 5975d07706..59ab3ea27e 100644 --- a/libavcodec/x86/h264_intrapred_init.c +++ b/libavcodec/x86/h264_intrapred_init.c @@ -198,7 +198,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth } } - if (mm_flags & AV_CPU_FLAG_MMX2) { + if (mm_flags & AV_CPU_FLAG_MMXEXT) { h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx2; h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmx2; if (chroma_format_idc == 1) @@ -308,7 +308,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth } } } else if (bit_depth == 10) { - if (mm_flags & AV_CPU_FLAG_MMX2) { + if (mm_flags & AV_CPU_FLAG_MMXEXT) { h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext; h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext; diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c index bb77a96999..87b9452501 100644 --- a/libavcodec/x86/h264dsp_mmx.c +++ b/libavcodec/x86/h264dsp_mmx.c @@ -218,7 +218,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, #if HAVE_YASM int mm_flags = av_get_cpu_flags(); - if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMX2) + if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMXEXT) c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmx2; if (bit_depth == 8) { @@ -236,7 +236,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, if (mm_flags & AV_CPU_FLAG_CMOV) c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx; - if (mm_flags & AV_CPU_FLAG_MMX2) { + if (mm_flags & AV_CPU_FLAG_MMXEXT) { c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmx2; c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2; c->h264_idct_add16 = ff_h264_idct_add16_8_mmx2; @@ -304,7 +304,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, } } else if (bit_depth == 10) { if (mm_flags & AV_CPU_FLAG_MMX) { - if (mm_flags & AV_CPU_FLAG_MMX2) { + if (mm_flags & AV_CPU_FLAG_MMXEXT) { #if ARCH_X86_32 c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_mmx2; c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmx2; diff --git a/libavcodec/x86/motion_est_mmx.c b/libavcodec/x86/motion_est_mmx.c index a903c96950..1d3545a5e8 100644 --- a/libavcodec/x86/motion_est_mmx.c +++ b/libavcodec/x86/motion_est_mmx.c @@ -444,7 +444,7 @@ void ff_dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx) c->sad[0]= sad16_mmx; c->sad[1]= sad8_mmx; } - if (mm_flags & AV_CPU_FLAG_MMX2) { + if (mm_flags & AV_CPU_FLAG_MMXEXT) { c->pix_abs[0][0] = sad16_mmx2; c->pix_abs[1][0] = sad8_mmx2; diff --git a/libavcodec/x86/mpegvideo_mmx.c b/libavcodec/x86/mpegvideo_mmx.c index 8e72852b0a..44d4cd3a8a 100644 --- a/libavcodec/x86/mpegvideo_mmx.c +++ b/libavcodec/x86/mpegvideo_mmx.c @@ -595,15 +595,15 @@ static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){ #define HAVE_SSSE3 0 #undef HAVE_SSE2 -#undef HAVE_MMX2 +#undef HAVE_MMXEXT #define HAVE_SSE2 0 -#define HAVE_MMX2 0 +#define HAVE_MMXEXT 0 #define RENAME(a) a ## _MMX #define RENAMEl(a) a ## _mmx #include "mpegvideo_mmx_template.c" -#undef HAVE_MMX2 -#define HAVE_MMX2 1 +#undef HAVE_MMXEXT +#define HAVE_MMXEXT 1 #undef RENAME #undef RENAMEl #define RENAME(a) a ## _MMX2 @@ -660,7 +660,7 @@ void ff_MPV_common_init_mmx(MpegEncContext *s) #endif if(mm_flags & AV_CPU_FLAG_SSE2){ s->dct_quantize= dct_quantize_SSE2; - } else if(mm_flags & AV_CPU_FLAG_MMX2){ + } else if (mm_flags & AV_CPU_FLAG_MMXEXT) { s->dct_quantize= dct_quantize_MMX2; } else { s->dct_quantize= dct_quantize_MMX; diff --git a/libavcodec/x86/mpegvideo_mmx_template.c b/libavcodec/x86/mpegvideo_mmx_template.c index 9119476d36..82e4ffa558 100644 --- a/libavcodec/x86/mpegvideo_mmx_template.c +++ b/libavcodec/x86/mpegvideo_mmx_template.c @@ -48,7 +48,7 @@ #define MMREG_WIDTH "8" #define MM "%%mm" #define MOVQ "movq" -#if HAVE_MMX2 +#if HAVE_MMXEXT #define SPREADW(a) "pshufw $0, "a", "a" \n\t" #define PMAXW(a,b) "pmaxsw "a", "b" \n\t" #define PMAX(a,b) \ diff --git a/libavcodec/x86/pngdsp-init.c b/libavcodec/x86/pngdsp-init.c index f122b242fb..7a12730620 100644 --- a/libavcodec/x86/pngdsp-init.c +++ b/libavcodec/x86/pngdsp-init.c @@ -41,7 +41,7 @@ void ff_pngdsp_init_x86(PNGDSPContext *dsp) if (flags & AV_CPU_FLAG_MMX) dsp->add_bytes_l2 = ff_add_bytes_l2_mmx; #endif - if (flags & AV_CPU_FLAG_MMX2) + if (flags & AV_CPU_FLAG_MMXEXT) dsp->add_paeth_prediction = ff_add_png_paeth_prediction_mmx2; if (flags & AV_CPU_FLAG_SSE2) dsp->add_bytes_l2 = ff_add_bytes_l2_sse2; diff --git a/libavcodec/x86/rv34dsp_init.c b/libavcodec/x86/rv34dsp_init.c index d91818c375..7284a9beaf 100644 --- a/libavcodec/x86/rv34dsp_init.c +++ b/libavcodec/x86/rv34dsp_init.c @@ -37,7 +37,7 @@ av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c, DSPContext *dsp) if (mm_flags & AV_CPU_FLAG_MMX) c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx; - if (mm_flags & AV_CPU_FLAG_MMX2) { + if (mm_flags & AV_CPU_FLAG_MMXEXT) { c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmx2; c->rv34_idct_add = ff_rv34_idct_add_mmx2; } diff --git a/libavcodec/x86/rv40dsp_init.c b/libavcodec/x86/rv40dsp_init.c index bbb1c8eb57..c508ac9328 100644 --- a/libavcodec/x86/rv40dsp_init.c +++ b/libavcodec/x86/rv40dsp_init.c @@ -204,7 +204,7 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) QPEL_MC_SET(put_, _mmx) #endif } - if (mm_flags & AV_CPU_FLAG_MMX2) { + if (mm_flags & AV_CPU_FLAG_MMXEXT) { c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmx2; c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmx2; c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_mmx2; diff --git a/libavcodec/x86/snowdsp_mmx.c b/libavcodec/x86/snowdsp_mmx.c index eac43364e5..5d47206e81 100644 --- a/libavcodec/x86/snowdsp_mmx.c +++ b/libavcodec/x86/snowdsp_mmx.c @@ -889,7 +889,7 @@ void ff_dwt_init_x86(DWTContext *c) c->inner_add_yblock = ff_snow_inner_add_yblock_sse2; } else{ - if(mm_flags & AV_CPU_FLAG_MMX2){ + if (mm_flags & AV_CPU_FLAG_MMXEXT) { c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx; #if HAVE_7REGS c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; diff --git a/libavcodec/x86/vc1dsp_mmx.c b/libavcodec/x86/vc1dsp_mmx.c index bddac5ec77..5922a6dbf7 100644 --- a/libavcodec/x86/vc1dsp_mmx.c +++ b/libavcodec/x86/vc1dsp_mmx.c @@ -760,7 +760,7 @@ void ff_vc1dsp_init_mmx(VC1DSPContext *dsp) dsp->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd; } - if (mm_flags & AV_CPU_FLAG_MMX2){ + if (mm_flags & AV_CPU_FLAG_MMXEXT) { dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmx2; dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_mmx2; dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_mmx2; @@ -810,7 +810,7 @@ void ff_vc1dsp_init_mmx(VC1DSPContext *dsp) if (mm_flags & AV_CPU_FLAG_MMX) { } - if (mm_flags & AV_CPU_FLAG_MMX2) { + if (mm_flags & AV_CPU_FLAG_MMXEXT) { ASSIGN_LF(mmx2); } diff --git a/libavcodec/x86/vp3dsp_init.c b/libavcodec/x86/vp3dsp_init.c index e0366d0f06..8905452dcf 100644 --- a/libavcodec/x86/vp3dsp_init.c +++ b/libavcodec/x86/vp3dsp_init.c @@ -49,7 +49,7 @@ av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags) } #endif - if (HAVE_MMX2 && cpuflags & AV_CPU_FLAG_MMX2) { + if (HAVE_MMXEXT && cpuflags & AV_CPU_FLAG_MMXEXT) { c->idct_dc_add = ff_vp3_idct_dc_add_mmx2; if (!(flags & CODEC_FLAG_BITEXACT)) { diff --git a/libavcodec/x86/vp8dsp-init.c b/libavcodec/x86/vp8dsp-init.c index 33a908ed76..8c17fa0382 100644 --- a/libavcodec/x86/vp8dsp-init.c +++ b/libavcodec/x86/vp8dsp-init.c @@ -350,7 +350,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) /* note that 4-tap width=16 functions are missing because w=16 * is only used for luma, and luma is always a copy or sixtap. */ - if (mm_flags & AV_CPU_FLAG_MMX2) { + if (mm_flags & AV_CPU_FLAG_MMXEXT) { VP8_MC_FUNC(2, 4, mmx2); VP8_BILINEAR_MC_FUNC(2, 4, mmx2); #if ARCH_X86_32 diff --git a/libavfilter/x86/gradfun.c b/libavfilter/x86/gradfun.c index b61cdbaf75..230b9f1258 100644 --- a/libavfilter/x86/gradfun.c +++ b/libavfilter/x86/gradfun.c @@ -28,7 +28,7 @@ DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] = {0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F}; DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; -#if HAVE_MMX2 +#if HAVE_MMXEXT static void gradfun_filter_line_mmx2(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers) { intptr_t x; @@ -173,8 +173,8 @@ av_cold void ff_gradfun_init_x86(GradFunContext *gf) int cpu_flags = av_get_cpu_flags(); #if HAVE_INLINE_ASM -#if HAVE_MMX2 - if (cpu_flags & AV_CPU_FLAG_MMX2) +#if HAVE_MMXEXT + if (cpu_flags & AV_CPU_FLAG_MMXEXT) gf->filter_line = gradfun_filter_line_mmx2; #endif #if HAVE_SSSE3 diff --git a/libavfilter/x86/yadif.c b/libavfilter/x86/yadif.c index 68aa249ccb..18d7278ab1 100644 --- a/libavfilter/x86/yadif.c +++ b/libavfilter/x86/yadif.c @@ -45,7 +45,7 @@ DECLARE_ASM_CONST(16, const xmm_reg, pw_1) = {0x0001000100010001ULL, 0x000100010 #undef COMPILE_TEMPLATE_SSE #endif -#if HAVE_MMX2 +#if HAVE_MMXEXT #undef RENAME #define RENAME(a) a ## _mmx2 #include "yadif_template.c" @@ -58,8 +58,8 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif) int cpu_flags = av_get_cpu_flags(); #if HAVE_INLINE_ASM -#if HAVE_MMX2 - if (cpu_flags & AV_CPU_FLAG_MMX2) +#if HAVE_MMXEXT + if (cpu_flags & AV_CPU_FLAG_MMXEXT) yadif->filter_line = yadif_filter_line_mmx2; #endif #if HAVE_SSE diff --git a/libavresample/audio_mix.c b/libavresample/audio_mix.c index 2c2a356844..e8ab2e3ee7 100644 --- a/libavresample/audio_mix.c +++ b/libavresample/audio_mix.c @@ -314,7 +314,15 @@ int ff_audio_mix_init(AVAudioResampleContext *avr) } /* build matrix if the user did not already set one */ - if (!avr->am->matrix) { + if (avr->am->matrix) { + if (avr->am->coeff_type != avr->mix_coeff_type || + avr->am->in_layout != avr->in_channel_layout || + avr->am->out_layout != avr->out_channel_layout) { + av_log(avr, AV_LOG_ERROR, + "Custom matrix does not match current parameters\n"); + return AVERROR(EINVAL); + } + } else { int i, j; char in_layout_name[128]; char out_layout_name[128]; diff --git a/libavresample/audio_mix_matrix.c b/libavresample/audio_mix_matrix.c index f7121c846d..522a177f20 100644 --- a/libavresample/audio_mix_matrix.c +++ b/libavresample/audio_mix_matrix.c @@ -294,8 +294,8 @@ int avresample_get_matrix(AVAudioResampleContext *avr, double *matrix, in_channels = av_get_channel_layout_nb_channels(avr->in_channel_layout); out_channels = av_get_channel_layout_nb_channels(avr->out_channel_layout); - if ( in_channels < 0 || in_channels > AVRESAMPLE_MAX_CHANNELS || - out_channels < 0 || out_channels > AVRESAMPLE_MAX_CHANNELS) { + if ( in_channels <= 0 || in_channels > AVRESAMPLE_MAX_CHANNELS || + out_channels <= 0 || out_channels > AVRESAMPLE_MAX_CHANNELS) { av_log(avr, AV_LOG_ERROR, "Invalid channel layouts\n"); return AVERROR(EINVAL); } @@ -332,6 +332,7 @@ int avresample_get_matrix(AVAudioResampleContext *avr, double *matrix, av_log(avr, AV_LOG_ERROR, "Invalid mix coeff type\n"); return AVERROR(EINVAL); } + return 0; } @@ -343,14 +344,16 @@ int avresample_set_matrix(AVAudioResampleContext *avr, const double *matrix, in_channels = av_get_channel_layout_nb_channels(avr->in_channel_layout); out_channels = av_get_channel_layout_nb_channels(avr->out_channel_layout); - if ( in_channels < 0 || in_channels > AVRESAMPLE_MAX_CHANNELS || - out_channels < 0 || out_channels > AVRESAMPLE_MAX_CHANNELS) { + if ( in_channels <= 0 || in_channels > AVRESAMPLE_MAX_CHANNELS || + out_channels <= 0 || out_channels > AVRESAMPLE_MAX_CHANNELS) { av_log(avr, AV_LOG_ERROR, "Invalid channel layouts\n"); return AVERROR(EINVAL); } - if (avr->am->matrix) - av_freep(avr->am->matrix); + if (avr->am->matrix) { + av_free(avr->am->matrix[0]); + avr->am->matrix = NULL; + } #define CONVERT_MATRIX(type, expr) \ avr->am->matrix_## type[0] = av_mallocz(out_channels * in_channels * \ @@ -386,5 +389,11 @@ int avresample_set_matrix(AVAudioResampleContext *avr, const double *matrix, /* TODO: detect situations where we can just swap around pointers instead of doing matrix multiplications with 0.0 and 1.0 */ + /* set AudioMix params */ + avr->am->in_layout = avr->in_channel_layout; + avr->am->out_layout = avr->out_channel_layout; + avr->am->in_channels = in_channels; + avr->am->out_channels = out_channels; + return 0; } diff --git a/libavresample/utils.c b/libavresample/utils.c index 05ee65c68d..7588111003 100644 --- a/libavresample/utils.c +++ b/libavresample/utils.c @@ -48,9 +48,8 @@ int avresample_open(AVAudioResampleContext *avr) avr->resample_channels = FFMIN(avr->in_channels, avr->out_channels); avr->downmix_needed = avr->in_channels > avr->out_channels; avr->upmix_needed = avr->out_channels > avr->in_channels || - avr->am->matrix || - (avr->out_channels == avr->in_channels && - avr->in_channel_layout != avr->out_channel_layout); + (!avr->downmix_needed && (avr->am->matrix || + avr->in_channel_layout != avr->out_channel_layout)); avr->mixing_needed = avr->downmix_needed || avr->upmix_needed; /* set resampling parameters */ diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 7a7a272c82..a31ed5e04b 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -49,10 +49,10 @@ void av_set_cpu_flags_mask(int mask) int av_parse_cpu_flags(const char *s) { -#define CPUFLAG_MMX2 (AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMX2 | AV_CPU_FLAG_CMOV) +#define CPUFLAG_MMXEXT (AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT | AV_CPU_FLAG_CMOV) #define CPUFLAG_3DNOW (AV_CPU_FLAG_3DNOW | AV_CPU_FLAG_MMX) #define CPUFLAG_3DNOWEXT (AV_CPU_FLAG_3DNOWEXT | CPUFLAG_3DNOW) -#define CPUFLAG_SSE (AV_CPU_FLAG_SSE | CPUFLAG_MMX2) +#define CPUFLAG_SSE (AV_CPU_FLAG_SSE | CPUFLAG_MMXEXT) #define CPUFLAG_SSE2 (AV_CPU_FLAG_SSE2 | CPUFLAG_SSE) #define CPUFLAG_SSE2SLOW (AV_CPU_FLAG_SSE2SLOW | CPUFLAG_SSE2) #define CPUFLAG_SSE3 (AV_CPU_FLAG_SSE3 | CPUFLAG_SSE2) @@ -69,7 +69,7 @@ int av_parse_cpu_flags(const char *s) { "altivec" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_ALTIVEC }, .unit = "flags" }, #elif ARCH_X86 { "mmx" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_MMX }, .unit = "flags" }, - { "mmx2" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_MMX2 }, .unit = "flags" }, + { "mmxext" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_MMXEXT }, .unit = "flags" }, { "sse" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE }, .unit = "flags" }, { "sse2" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE2 }, .unit = "flags" }, { "sse2slow", NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE2SLOW }, .unit = "flags" }, @@ -174,7 +174,7 @@ static const struct { { AV_CPU_FLAG_ALTIVEC, "altivec" }, #elif ARCH_X86 { AV_CPU_FLAG_MMX, "mmx" }, - { AV_CPU_FLAG_MMX2, "mmx2" }, + { AV_CPU_FLAG_MMXEXT, "mmxext" }, { AV_CPU_FLAG_SSE, "sse" }, { AV_CPU_FLAG_SSE2, "sse2" }, { AV_CPU_FLAG_SSE2SLOW, "sse2(slow)" }, diff --git a/libavutil/cpu.h b/libavutil/cpu.h index d8ec32cdc2..3adccbdd68 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -27,6 +27,7 @@ /* lower 16 bits - CPU features */ #define AV_CPU_FLAG_MMX 0x0001 ///< standard MMX +#define AV_CPU_FLAG_MMXEXT 0x0002 ///< SSE integer functions or AMD MMX ext #define AV_CPU_FLAG_MMX2 0x0002 ///< SSE integer functions or AMD MMX ext #define AV_CPU_FLAG_3DNOW 0x0004 ///< AMD 3DNOW #define AV_CPU_FLAG_SSE 0x0008 ///< SSE functions diff --git a/libavutil/utils.c b/libavutil/utils.c index 971b48bef4..127c4b1461 100644 --- a/libavutil/utils.c +++ b/libavutil/utils.c @@ -33,6 +33,7 @@ unsigned avutil_version(void) av_assert0(AVMEDIA_TYPE_ATTACHMENT == 4); av_assert0(AV_PICTURE_TYPE_BI == 7); av_assert0(LIBAVUTIL_VERSION_MICRO >= 100); + av_assert0(HAVE_MMX2 == HAVE_MMXEXT); return LIBAVUTIL_VERSION_INT; } diff --git a/libavutil/version.h b/libavutil/version.h index 04d111b74b..f9448d5781 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -40,7 +40,7 @@ #define LIBAVUTIL_VERSION_MAJOR 51 #define LIBAVUTIL_VERSION_MINOR 66 -#define LIBAVUTIL_VERSION_MICRO 100 +#define LIBAVUTIL_VERSION_MICRO 101 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ LIBAVUTIL_VERSION_MINOR, \ diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index b1052247a0..7b31f2a88a 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -122,7 +122,7 @@ int ff_get_cpu_flags_x86(void) if (std_caps & (1 << 23)) rval |= AV_CPU_FLAG_MMX; if (std_caps & (1 << 25)) - rval |= AV_CPU_FLAG_MMX2; + rval |= AV_CPU_FLAG_MMXEXT; #if HAVE_SSE if (std_caps & (1 << 25)) rval |= AV_CPU_FLAG_SSE; @@ -159,7 +159,7 @@ int ff_get_cpu_flags_x86(void) if (ext_caps & (1 << 23)) rval |= AV_CPU_FLAG_MMX; if (ext_caps & (1 << 22)) - rval |= AV_CPU_FLAG_MMX2; + rval |= AV_CPU_FLAG_MMXEXT; /* Allow for selectively disabling SSE2 functions on AMD processors with SSE2 support but not SSE4a. This includes Athlon64, some diff --git a/libswscale/swscale.c b/libswscale/swscale.c index 5a28ce1d58..ae79eb66e8 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -663,8 +663,8 @@ static int swScale(SwsContext *c, const uint8_t *src[], if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf) fillPlane(dst[3], dstStride[3], dstW, dstY - lastDstY, lastDstY, 255); -#if HAVE_MMX2 && HAVE_INLINE_ASM - if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2) +#if HAVE_MMXEXT && HAVE_INLINE_ASM + if (av_get_cpu_flags() & AV_CPU_FLAG_MMXEXT) __asm__ volatile ("sfence" ::: "memory"); #endif emms_c(); diff --git a/libswscale/swscale.h b/libswscale/swscale.h index ba781803dd..da29d47e3a 100644 --- a/libswscale/swscale.h +++ b/libswscale/swscale.h @@ -82,7 +82,10 @@ const char *swscale_license(void); * are only provided for API compatibility. */ #define SWS_CPU_CAPS_MMX 0x80000000 +#define SWS_CPU_CAPS_MMXEXT 0x20000000 +#if LIBSWSCALE_VERSION_MAJOR < 3 #define SWS_CPU_CAPS_MMX2 0x20000000 +#endif #define SWS_CPU_CAPS_3DNOW 0x40000000 #define SWS_CPU_CAPS_ALTIVEC 0x10000000 #define SWS_CPU_CAPS_BFIN 0x01000000 diff --git a/libswscale/utils.c b/libswscale/utils.c index f4d3546845..2e387b52eb 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -599,7 +599,7 @@ fail: return ret; } -#if HAVE_MMX2 && HAVE_INLINE_ASM +#if HAVE_MMXEXT && HAVE_INLINE_ASM static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *filter, int32_t *filterPos, int numSplits) { @@ -762,7 +762,7 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, return fragmentPos + 1; } -#endif /* HAVE_MMX2 && HAVE_INLINE_ASM */ +#endif /* HAVE_MMXEXT && HAVE_INLINE_ASM */ static void getSubSampleFactors(int *h, int *v, enum PixelFormat format) { @@ -1024,7 +1024,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, c->srcBpc = 16; if (c->dstBpc == 16) dst_stride <<= 1; - if (HAVE_MMX2 && HAVE_INLINE_ASM && cpu_flags & AV_CPU_FLAG_MMX2 && + if (HAVE_MMXEXT && HAVE_INLINE_ASM && cpu_flags & AV_CPU_FLAG_MMXEXT && c->srcBpc == 8 && c->dstBpc <= 14) { c->canMMX2BeUsed = (dstW >= srcW && (dstW & 31) == 0 && (srcW & 15) == 0) ? 1 : 0; @@ -1063,7 +1063,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, /* precalculate horizontal scaler filter coefficients */ { -#if HAVE_MMX2 && HAVE_INLINE_ASM +#if HAVE_MMXEXT && HAVE_INLINE_ASM // can't downscale !!! if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) { c->lumMmx2FilterCodeSize = initMMX2HScaler(dstW, c->lumXInc, NULL, @@ -1107,7 +1107,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, mprotect(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize, PROT_EXEC | PROT_READ); #endif } else -#endif /* HAVE_MMX2 && HAVE_INLINE_ASM */ +#endif /* HAVE_MMXEXT && HAVE_INLINE_ASM */ { const int filterAlign = (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? 4 : @@ -1273,7 +1273,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, #endif av_get_pix_fmt_name(dstFormat)); - if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) + if (HAVE_MMXEXT && cpu_flags & AV_CPU_FLAG_MMXEXT) av_log(c, AV_LOG_INFO, "using MMX2\n"); else if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW) av_log(c, AV_LOG_INFO, "using 3DNOW\n"); diff --git a/libswscale/version.h b/libswscale/version.h index ef0e0298f6..37dcc96572 100644 --- a/libswscale/version.h +++ b/libswscale/version.h @@ -28,7 +28,7 @@ #define LIBSWSCALE_VERSION_MAJOR 2 #define LIBSWSCALE_VERSION_MINOR 1 -#define LIBSWSCALE_VERSION_MICRO 100 +#define LIBSWSCALE_VERSION_MICRO 101 #define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \ LIBSWSCALE_VERSION_MINOR, \ diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c index cd46df719d..9e4938a81a 100644 --- a/libswscale/x86/rgb2rgb.c +++ b/libswscale/x86/rgb2rgb.c @@ -88,7 +88,7 @@ DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL; //Note: We have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW + MMX2 one. -#define COMPILE_TEMPLATE_MMX2 0 +#define COMPILE_TEMPLATE_MMXEXT 0 #define COMPILE_TEMPLATE_AMD3DNOW 0 #define COMPILE_TEMPLATE_SSE2 0 @@ -99,8 +99,8 @@ DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL; //MMX2 versions #undef RENAME -#undef COMPILE_TEMPLATE_MMX2 -#define COMPILE_TEMPLATE_MMX2 1 +#undef COMPILE_TEMPLATE_MMXEXT +#define COMPILE_TEMPLATE_MMXEXT 1 #define RENAME(a) a ## _MMX2 #include "rgb2rgb_template.c" @@ -113,10 +113,10 @@ DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL; //3DNOW versions #undef RENAME -#undef COMPILE_TEMPLATE_MMX2 +#undef COMPILE_TEMPLATE_MMXEXT #undef COMPILE_TEMPLATE_SSE2 #undef COMPILE_TEMPLATE_AMD3DNOW -#define COMPILE_TEMPLATE_MMX2 0 +#define COMPILE_TEMPLATE_MMXEXT 0 #define COMPILE_TEMPLATE_SSE2 0 #define COMPILE_TEMPLATE_AMD3DNOW 1 #define RENAME(a) a ## _3DNOW @@ -140,7 +140,7 @@ av_cold void rgb2rgb_init_x86(void) rgb2rgb_init_MMX(); if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW) rgb2rgb_init_3DNOW(); - if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) + if (HAVE_MMXEXT && cpu_flags & AV_CPU_FLAG_MMXEXT) rgb2rgb_init_MMX2(); if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2) rgb2rgb_init_SSE2(); diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c index bb97a0b4a1..594524d9ed 100644 --- a/libswscale/x86/rgb2rgb_template.c +++ b/libswscale/x86/rgb2rgb_template.c @@ -35,7 +35,7 @@ #if COMPILE_TEMPLATE_AMD3DNOW #define PREFETCH "prefetch" #define PAVGB "pavgusb" -#elif COMPILE_TEMPLATE_MMX2 +#elif COMPILE_TEMPLATE_MMXEXT #define PREFETCH "prefetchnta" #define PAVGB "pavgb" #else @@ -49,7 +49,7 @@ #define EMMS "emms" #endif -#if COMPILE_TEMPLATE_MMX2 +#if COMPILE_TEMPLATE_MMXEXT #define MOVNTQ "movntq" #define SFENCE "sfence" #else @@ -1136,7 +1136,7 @@ static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, PREFETCH" 32(%1, %0) \n\t" "movq (%1, %0), %%mm0 \n\t" "movq 8(%1, %0), %%mm1 \n\t" -# if COMPILE_TEMPLATE_MMX2 +# if COMPILE_TEMPLATE_MMXEXT "pshufw $177, %%mm0, %%mm3 \n\t" "pshufw $177, %%mm1, %%mm5 \n\t" "pand %%mm7, %%mm0 \n\t" @@ -1500,7 +1500,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t } #endif /* !COMPILE_TEMPLATE_AMD3DNOW */ -#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW +#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride) { int x,y; @@ -1590,7 +1590,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid SFENCE" \n\t" :::"memory"); } -#endif /* COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW */ +#endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */ #if !COMPILE_TEMPLATE_AMD3DNOW /** @@ -1798,7 +1798,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "1: \n\t" PREFETCH" 64(%0, %%"REG_d") \n\t" PREFETCH" 64(%1, %%"REG_d") \n\t" -#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW +#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW "movq (%0, %%"REG_d"), %%mm0 \n\t" "movq (%1, %%"REG_d"), %%mm1 \n\t" "movq 6(%0, %%"REG_d"), %%mm2 \n\t" @@ -1859,7 +1859,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0 "psraw $7, %%mm0 \n\t" -#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW +#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW "movq 12(%0, %%"REG_d"), %%mm4 \n\t" "movq 12(%1, %%"REG_d"), %%mm1 \n\t" "movq 18(%0, %%"REG_d"), %%mm2 \n\t" @@ -2580,9 +2580,9 @@ static inline void RENAME(rgb2rgb_init)(void) yuyvtoyuv422 = RENAME(yuyvtoyuv422); #endif /* !COMPILE_TEMPLATE_SSE2 */ -#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW +#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW planar2x = RENAME(planar2x); -#endif /* COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW */ +#endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */ rgb24toyv12 = RENAME(rgb24toyv12); yuyvtoyuv420 = RENAME(yuyvtoyuv420); diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index eb33788168..132ec3e570 100644 --- a/libswscale/x86/swscale.c +++ b/libswscale/x86/swscale.c @@ -74,16 +74,16 @@ DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; //MMX versions #if HAVE_MMX #undef RENAME -#define COMPILE_TEMPLATE_MMX2 0 +#define COMPILE_TEMPLATE_MMXEXT 0 #define RENAME(a) a ## _MMX #include "swscale_template.c" #endif //MMX2 versions -#if HAVE_MMX2 +#if HAVE_MMXEXT #undef RENAME -#undef COMPILE_TEMPLATE_MMX2 -#define COMPILE_TEMPLATE_MMX2 1 +#undef COMPILE_TEMPLATE_MMXEXT +#define COMPILE_TEMPLATE_MMXEXT 1 #define RENAME(a) a ## _MMX2 #include "swscale_template.c" #endif @@ -375,8 +375,8 @@ av_cold void ff_sws_init_swScale_mmx(SwsContext *c) #if HAVE_INLINE_ASM if (cpu_flags & AV_CPU_FLAG_MMX) sws_init_swScale_MMX(c); -#if HAVE_MMX2 - if (cpu_flags & AV_CPU_FLAG_MMX2) +#if HAVE_MMXEXT + if (cpu_flags & AV_CPU_FLAG_MMXEXT) sws_init_swScale_MMX2(c); if (cpu_flags & AV_CPU_FLAG_SSE3){ if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) @@ -439,7 +439,7 @@ switch(c->dstBpc){ \ if (cpu_flags & AV_CPU_FLAG_MMX) { ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx); ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx); - ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMX2); + ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMXEXT); switch (c->srcFormat) { case PIX_FMT_Y400A: @@ -471,7 +471,7 @@ switch(c->dstBpc){ \ break; } } - if (cpu_flags & AV_CPU_FLAG_MMX2) { + if (cpu_flags & AV_CPU_FLAG_MMXEXT) { ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmx2, , 1); } #endif diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index 245bfdeadd..370a0ebe1b 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -23,13 +23,13 @@ #undef MOVNTQ2 #undef PREFETCH -#if COMPILE_TEMPLATE_MMX2 +#if COMPILE_TEMPLATE_MMXEXT #define PREFETCH "prefetchnta" #else #define PREFETCH " # nop" #endif -#if COMPILE_TEMPLATE_MMX2 +#if COMPILE_TEMPLATE_MMXEXT #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t" #define MOVNTQ2 "movntq " #else @@ -38,7 +38,7 @@ #endif #define MOVNTQ(a,b) REAL_MOVNTQ(a,b) -#if !COMPILE_TEMPLATE_MMX2 +#if !COMPILE_TEMPLATE_MMXEXT static av_always_inline void dither_8to16(const uint8_t *srcDither, int rot) { @@ -641,7 +641,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, "cmp "#dstw", "#index" \n\t"\ " jb 1b \n\t" -#if COMPILE_TEMPLATE_MMX2 +#if COMPILE_TEMPLATE_MMXEXT #undef WRITEBGR24 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index) #else @@ -1445,7 +1445,7 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0, } } -#if COMPILE_TEMPLATE_MMX2 +#if COMPILE_TEMPLATE_MMXEXT static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, int dstWidth, const uint8_t *src, int srcW, int xInc) @@ -1627,7 +1627,7 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2, dst2[i] = src2[srcW-1]*128; } } -#endif /* COMPILE_TEMPLATE_MMX2 */ +#endif /* COMPILE_TEMPLATE_MMXEXT */ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) { @@ -1691,17 +1691,17 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) if (c->srcBpc == 8 && c->dstBpc <= 14) { // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one). -#if COMPILE_TEMPLATE_MMX2 +#if COMPILE_TEMPLATE_MMXEXT if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed) { c->hyscale_fast = RENAME(hyscale_fast); c->hcscale_fast = RENAME(hcscale_fast); } else { -#endif /* COMPILE_TEMPLATE_MMX2 */ +#endif /* COMPILE_TEMPLATE_MMXEXT */ c->hyscale_fast = NULL; c->hcscale_fast = NULL; -#if COMPILE_TEMPLATE_MMX2 +#if COMPILE_TEMPLATE_MMXEXT } -#endif /* COMPILE_TEMPLATE_MMX2 */ +#endif /* COMPILE_TEMPLATE_MMXEXT */ } } diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c index 2a76716335..8cb7f1ae0d 100644 --- a/libswscale/x86/yuv2rgb.c +++ b/libswscale/x86/yuv2rgb.c @@ -52,20 +52,20 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; //MMX versions #if HAVE_MMX #undef RENAME -#undef COMPILE_TEMPLATE_MMX2 -#define COMPILE_TEMPLATE_MMX2 0 +#undef COMPILE_TEMPLATE_MMXEXT +#define COMPILE_TEMPLATE_MMXEXT 0 #define RENAME(a) a ## _MMX #include "yuv2rgb_template.c" #endif /* HAVE_MMX */ //MMX2 versions -#if HAVE_MMX2 +#if HAVE_MMXEXT #undef RENAME -#undef COMPILE_TEMPLATE_MMX2 -#define COMPILE_TEMPLATE_MMX2 1 +#undef COMPILE_TEMPLATE_MMXEXT +#define COMPILE_TEMPLATE_MMXEXT 1 #define RENAME(a) a ## _MMX2 #include "yuv2rgb_template.c" -#endif /* HAVE_MMX2 */ +#endif /* HAVE_MMXEXT */ #endif /* HAVE_INLINE_ASM */ @@ -74,8 +74,8 @@ av_cold SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c) #if HAVE_INLINE_ASM int cpu_flags = av_get_cpu_flags(); -#if HAVE_MMX2 - if (cpu_flags & AV_CPU_FLAG_MMX2) { +#if HAVE_MMXEXT + if (cpu_flags & AV_CPU_FLAG_MMXEXT) { switch (c->dstFormat) { case PIX_FMT_RGB24: return yuv420_rgb24_MMX2; case PIX_FMT_BGR24: return yuv420_bgr24_MMX2; diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c index 624de14252..79e48fd7f0 100644 --- a/libswscale/x86/yuv2rgb_template.c +++ b/libswscale/x86/yuv2rgb_template.c @@ -25,7 +25,7 @@ #undef EMMS #undef SFENCE -#if COMPILE_TEMPLATE_MMX2 +#if COMPILE_TEMPLATE_MMXEXT #define MOVNTQ "movntq" #define SFENCE "sfence" #else @@ -181,7 +181,7 @@ "paddusb "GREEN_DITHER"(%4), %%mm2\n\t" \ "paddusb "RED_DITHER"(%4), %%mm1\n\t" \ -#if !COMPILE_TEMPLATE_MMX2 +#if !COMPILE_TEMPLATE_MMXEXT static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, @@ -237,7 +237,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[], YUV2RGB_OPERANDS YUV2RGB_ENDFUNC } -#endif /* !COMPILE_TEMPLATE_MMX2 */ +#endif /* !COMPILE_TEMPLATE_MMXEXT */ #define RGB_PACK24(blue, red)\ "packuswb %%mm3, %%mm0 \n" /* R0 R2 R4 R6 R1 R3 R5 R7 */\ @@ -254,7 +254,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[], "punpckhwd %%mm6, %%mm5 \n" /* R4 G4 B4 R5 R6 G6 B6 R7 */\ RGB_PACK24_B -#if COMPILE_TEMPLATE_MMX2 +#if COMPILE_TEMPLATE_MMXEXT DECLARE_ASM_CONST(8, int16_t, mask1101[4]) = {-1,-1, 0,-1}; DECLARE_ASM_CONST(8, int16_t, mask0010[4]) = { 0, 0,-1, 0}; DECLARE_ASM_CONST(8, int16_t, mask0110[4]) = { 0,-1,-1, 0}; @@ -361,7 +361,7 @@ static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[], MOVNTQ " %%mm5, 16(%1)\n\t" \ MOVNTQ " %%mm"alpha", 24(%1)\n\t" \ -#if !COMPILE_TEMPLATE_MMX2 +#if !COMPILE_TEMPLATE_MMXEXT static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, @@ -448,4 +448,4 @@ static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[], } #endif -#endif /* !COMPILE_TEMPLATE_MMX2 */ +#endif /* !COMPILE_TEMPLATE_MMXEXT */ diff --git a/tests/fate/lossless-video.mak b/tests/fate/lossless-video.mak index d7d192db9a..e7fe388454 100644 --- a/tests/fate/lossless-video.mak +++ b/tests/fate/lossless-video.mak @@ -1,3 +1,18 @@ +FATE_LAGARITH += fate-lagarith-rgb24 +fate-lagarith-rgb24: CMD = framecrc -i $(SAMPLES)/lagarith/lag-rgb24.avi + +FATE_LAGARITH += fate-lagarith-rgb32 +fate-lagarith-rgb32: CMD = framecrc -i $(SAMPLES)/lagarith/lag-rgb32.avi -pix_fmt bgra + +FATE_LAGARITH += fate-lagarith-yuy2 +fate-lagarith-yuy2: CMD = framecrc -i $(SAMPLES)/lagarith/lag-yuy2.avi + +FATE_LAGARITH += fate-lagarith-yv12 +fate-lagarith-yv12: CMD = framecrc -i $(SAMPLES)/lagarith/lag-yv12.avi + +FATE_SAMPLES_AVCONV += $(FATE_LAGARITH) +fate-lagarith: $(FATE_LAGARITH) + FATE_LOCO += fate-loco-rgb fate-loco-rgb: CMD = framecrc -i $(SAMPLES)/loco/pig-loco-rgb.avi diff --git a/tests/ref/fate/lagarith-rgb24 b/tests/ref/fate/lagarith-rgb24 new file mode 100644 index 0000000000..1eb2bc441c --- /dev/null +++ b/tests/ref/fate/lagarith-rgb24 @@ -0,0 +1,5 @@ +#tb 0: 100/2997 +0, 0, 0, 1, 368640, 0x26f74db2 +0, 1, 1, 1, 368640, 0x63b29ea4 +0, 2, 2, 1, 368640, 0x19467f03 +0, 3, 3, 1, 368640, 0x5fdc3575 diff --git a/tests/ref/fate/lagarith-rgb32 b/tests/ref/fate/lagarith-rgb32 new file mode 100644 index 0000000000..490e2e5c7d --- /dev/null +++ b/tests/ref/fate/lagarith-rgb32 @@ -0,0 +1,26 @@ +#tb 0: 1001/24000 +0, 0, 0, 1, 1382400, 0x00000000 +0, 1, 1, 1, 1382400, 0x00000000 +0, 2, 2, 1, 1382400, 0x00000000 +0, 3, 3, 1, 1382400, 0x00000000 +0, 4, 4, 1, 1382400, 0x00000000 +0, 5, 5, 1, 1382400, 0xf95bde46 +0, 6, 6, 1, 1382400, 0x4f4c0393 +0, 7, 7, 1, 1382400, 0xe5aa40db +0, 8, 8, 1, 1382400, 0xc25a8ba2 +0, 9, 9, 1, 1382400, 0x9db3150d +0, 10, 10, 1, 1382400, 0x730e64b3 +0, 11, 11, 1, 1382400, 0xf8fd7edf +0, 12, 12, 1, 1382400, 0x0114798a +0, 13, 13, 1, 1382400, 0x7571210f +0, 14, 14, 1, 1382400, 0x552ae59d +0, 15, 15, 1, 1382400, 0x7ae0c946 +0, 16, 16, 1, 1382400, 0x0818c3ef +0, 17, 17, 1, 1382400, 0x8257cac4 +0, 18, 18, 1, 1382400, 0x7762a979 +0, 19, 19, 1, 1382400, 0x282af57a +0, 20, 20, 1, 1382400, 0x3f42de50 +0, 21, 21, 1, 1382400, 0xc42d5f93 +0, 22, 22, 1, 1382400, 0x18775c90 +0, 23, 23, 1, 1382400, 0x34befa90 +0, 24, 24, 1, 1382400, 0xd33d5f53 diff --git a/tests/ref/fate/lagarith-yuy2 b/tests/ref/fate/lagarith-yuy2 new file mode 100644 index 0000000000..c5aed92d61 --- /dev/null +++ b/tests/ref/fate/lagarith-yuy2 @@ -0,0 +1,2 @@ +#tb 0: 1/10 +0, 0, 0, 1, 1572864, 0xeed76a7d diff --git a/tests/ref/fate/lagarith-yv12 b/tests/ref/fate/lagarith-yv12 new file mode 100644 index 0000000000..c9c9ff372f --- /dev/null +++ b/tests/ref/fate/lagarith-yv12 @@ -0,0 +1,3 @@ +#tb 0: 1/60 +0, 0, 0, 1, 92160, 0x1dfdf5c1 +0, 1, 1, 1, 92160, 0x6965884f