You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	Merge remote-tracking branch 'qatar/master'
* qatar/master: lavr: fix handling of custom mix matrices fate: force pix_fmt in lagarith-rgb32 test fate: add tests for lagarith lossless video codec. ARMv6: vp8: fix stack allocation with Apple's assembler ARM: vp56: allow inline asm to build with clang fft: 3dnow: fix register name typo in DECL_IMDCT macro x86: dct32: port to cpuflags x86: build: replace mmx2 by mmxext Revert "wmapro: prevent division by zero when sample rate is unspecified" wmapro: prevent division by zero when sample rate is unspecified lagarith: fix color plane inversion for YUY2 output. lagarith: pad RGB buffer by 1 byte. dsputil: make add_hfyu_left_prediction_sse4() support unaligned src. Conflicts: doc/APIchanges libavcodec/lagarith.c libavfilter/x86/gradfun.c libavutil/cpu.h libavutil/version.h libswscale/utils.c libswscale/version.h libswscale/x86/yuv2rgb.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
		
							
								
								
									
										2
									
								
								Doxyfile
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								Doxyfile
									
									
									
									
									
								
							| @@ -1378,7 +1378,7 @@ PREDEFINED             = "__attribute__(x)=" \ | ||||
|                          "DEF(x)=x ## _TMPL" \ | ||||
|                          HAVE_AV_CONFIG_H \ | ||||
|                          HAVE_MMX \ | ||||
|                          HAVE_MMX2 \ | ||||
|                          HAVE_MMXEXT \ | ||||
|                          HAVE_AMD3DNOW \ | ||||
|                          "DECLARE_ALIGNED(a,t,n)=t n" \ | ||||
|                          "offsetof(x,y)=0x42" | ||||
|   | ||||
							
								
								
									
										13
									
								
								configure
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										13
									
								
								configure
									
									
									
									
										vendored
									
									
								
							| @@ -267,7 +267,7 @@ Optimization options (experts only): | ||||
|   --disable-amd3dnow       disable 3DNow! optimizations | ||||
|   --disable-amd3dnowext    disable 3DNow! extended optimizations | ||||
|   --disable-mmx            disable MMX optimizations | ||||
|   --disable-mmx2           disable MMX2 optimizations | ||||
|   --disable-mmxext         disable MMXEXT optimizations | ||||
|   --disable-sse            disable SSE optimizations | ||||
|   --disable-ssse3          disable SSSE3 optimizations | ||||
|   --disable-avx            disable AVX optimizations | ||||
| @@ -1182,7 +1182,7 @@ ARCH_EXT_LIST=' | ||||
|     fma4 | ||||
|     mmi | ||||
|     mmx | ||||
|     mmx2 | ||||
|     mmxext | ||||
|     neon | ||||
|     ppc4xx | ||||
|     sse | ||||
| @@ -1459,7 +1459,7 @@ x86_64_suggest="cmov fast_cmov" | ||||
| amd3dnow_deps="mmx" | ||||
| amd3dnowext_deps="amd3dnow" | ||||
| mmx_deps="x86" | ||||
| mmx2_deps="mmx" | ||||
| mmxext_deps="mmx" | ||||
| sse_deps="mmx" | ||||
| ssse3_deps="sse" | ||||
| avx_deps="ssse3" | ||||
| @@ -3194,9 +3194,9 @@ EOF | ||||
|     # check whether xmm clobbers are supported | ||||
|     check_asm xmm_clobbers '"":::"%xmm0"' | ||||
|  | ||||
|     # check whether binutils is new enough to compile SSSE3/MMX2 | ||||
|     # check whether binutils is new enough to compile SSSE3/MMXEXT | ||||
|     enabled ssse3 && check_asm ssse3 '"pabsw %xmm0, %xmm0"' | ||||
|     enabled mmx2  && check_asm mmx2  '"pmaxub %mm0, %mm1"' | ||||
|     enabled mmxext && check_asm mmxext '"pmaxub %mm0, %mm1"' | ||||
|  | ||||
|     if ! disabled_any asm mmx yasm; then | ||||
|         if check_cmd $yasmexe --version; then | ||||
| @@ -3748,7 +3748,7 @@ echo "runtime cpu detection     ${runtime_cpudetect-no}" | ||||
| if enabled x86; then | ||||
|     echo "${yasmexe}                      ${yasm-no}" | ||||
|     echo "MMX enabled               ${mmx-no}" | ||||
|     echo "MMX2 enabled              ${mmx2-no}" | ||||
|     echo "MMXEXT enabled            ${mmxext-no}" | ||||
|     echo "3DNow! enabled            ${amd3dnow-no}" | ||||
|     echo "3DNow! extended enabled   ${amd3dnowext-no}" | ||||
|     echo "SSE enabled               ${sse-no}" | ||||
| @@ -4019,6 +4019,7 @@ cat > $TMPH <<EOF | ||||
| #define EXTERN_PREFIX "${extern_prefix}" | ||||
| #define EXTERN_ASM ${extern_prefix} | ||||
| #define SLIBSUF "$SLIBSUF" | ||||
| #define HAVE_MMX2 HAVE_MMXEXT | ||||
| EOF | ||||
|  | ||||
| test -n "$assert_level" && | ||||
|   | ||||
| @@ -70,6 +70,11 @@ API changes, most recent first: | ||||
| 2012-03-26 - a67d9cf - lavfi 2.66.100 | ||||
|   Add avfilter_fill_frame_from_{audio_,}buffer_ref() functions. | ||||
|  | ||||
| 2012-08-03 - xxxxxxx - lavu 51.37.1 - cpu.h | ||||
|                        lsws 2.1.1   - swscale.h | ||||
|   Rename AV_CPU_FLAG_MMX2  ---> AV_CPU_FLAG_MMXEXT. | ||||
|   Rename SWS_CPU_CAPS_MMX2 ---> SWS_CPU_CAPS_MMXEXT. | ||||
|  | ||||
| 2012-07-xx - xxxxxxx - lavf 54.13.0 - avformat.h | ||||
|   Add AVFMT_FLAG_NOBUFFER for low latency use cases. | ||||
|  | ||||
|   | ||||
| @@ -29,6 +29,14 @@ | ||||
| #   define T(x) | ||||
| #endif | ||||
|  | ||||
| #if CONFIG_THUMB || defined __clang__ | ||||
| #   define L(x) | ||||
| #   define U(x) x | ||||
| #else | ||||
| #   define L(x) x | ||||
| #   define U(x) | ||||
| #endif | ||||
|  | ||||
| #if HAVE_ARMV6 && HAVE_INLINE_ASM | ||||
|  | ||||
| #define vp56_rac_get_prob vp56_rac_get_prob_armv6 | ||||
| @@ -42,8 +50,8 @@ static inline int vp56_rac_get_prob_armv6(VP56RangeCoder *c, int pr) | ||||
|     __asm__ ("adds    %3,  %3,  %0           \n" | ||||
|              "itt     cs                     \n" | ||||
|              "cmpcs   %7,  %4                \n" | ||||
|            A("ldrcsh  %2,  [%4], #2          \n") | ||||
|            T("ldrhcs  %2,  [%4], #2          \n") | ||||
|            L("ldrcsh  %2,  [%4], #2          \n") | ||||
|            U("ldrhcs  %2,  [%4], #2          \n") | ||||
|              "rsb     %0,  %6,  #256         \n" | ||||
|              "smlabb  %0,  %5,  %6,  %0      \n" | ||||
|            T("itttt   cs                     \n") | ||||
| @@ -80,8 +88,8 @@ static inline int vp56_rac_get_prob_branchy_armv6(VP56RangeCoder *c, int pr) | ||||
|     __asm__ ("adds    %3,  %3,  %0           \n" | ||||
|              "itt     cs                     \n" | ||||
|              "cmpcs   %7,  %4                \n" | ||||
|            A("ldrcsh  %2,  [%4], #2          \n") | ||||
|            T("ldrhcs  %2,  [%4], #2          \n") | ||||
|            L("ldrcsh  %2,  [%4], #2          \n") | ||||
|            U("ldrhcs  %2,  [%4], #2          \n") | ||||
|              "rsb     %0,  %6,  #256         \n" | ||||
|              "smlabb  %0,  %5,  %6,  %0      \n" | ||||
|            T("itttt   cs                     \n") | ||||
|   | ||||
| @@ -1226,7 +1226,13 @@ vp8_mc_1                bilin,  8, v | ||||
| vp8_mc_1                bilin,  4, h | ||||
| vp8_mc_1                bilin,  4, v | ||||
|  | ||||
| #define TMPSIZE \size * (8 - 8*(\size > 4) + \ytaps - 1) | ||||
| /* True relational expressions have the value -1 in the GNU assembler, | ||||
|    +1 in Apple's. */ | ||||
| #ifdef __APPLE__ | ||||
| #   define TMPSIZE \size * (8 + 8*(\size > 4) + \ytaps - 1) | ||||
| #else | ||||
| #   define TMPSIZE \size * (8 - 8*(\size > 4) + \ytaps - 1) | ||||
| #endif | ||||
|  | ||||
| .macro  vp8_mc_hv       name, size, h, v, ytaps | ||||
| function ff_put_vp8_\name\size\()_\h\v\()_armv6, export=1 | ||||
|   | ||||
| @@ -87,7 +87,7 @@ static const struct algo fdct_tab[] = { | ||||
|  | ||||
| #if HAVE_MMX && HAVE_INLINE_ASM | ||||
|     { "MMX",            ff_fdct_mmx,           NO_PERM,   AV_CPU_FLAG_MMX     }, | ||||
|     { "MMX2",           ff_fdct_mmx2,          NO_PERM,   AV_CPU_FLAG_MMX2    }, | ||||
|     { "MMXEXT",         ff_fdct_mmx2,          NO_PERM,   AV_CPU_FLAG_MMXEXT  }, | ||||
|     { "SSE2",           ff_fdct_sse2,          NO_PERM,   AV_CPU_FLAG_SSE2    }, | ||||
| #endif | ||||
|  | ||||
| @@ -132,7 +132,7 @@ static const struct algo idct_tab[] = { | ||||
| #endif | ||||
|     { "SIMPLE-MMX",     ff_simple_idct_mmx,  MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX }, | ||||
|     { "XVID-MMX",       ff_idct_xvid_mmx,      NO_PERM,   AV_CPU_FLAG_MMX,  1 }, | ||||
|     { "XVID-MMX2",      ff_idct_xvid_mmx2,     NO_PERM,   AV_CPU_FLAG_MMX2, 1 }, | ||||
|     { "XVID-MMXEXT",    ff_idct_xvid_mmx2,     NO_PERM,   AV_CPU_FLAG_MMXEXT, 1 }, | ||||
|     { "XVID-SSE2",      ff_idct_xvid_sse2,     SSE2_PERM, AV_CPU_FLAG_SSE2, 1 }, | ||||
| #if ARCH_X86_64 && HAVE_YASM | ||||
|     { "PR-SSE2",        ff_prores_idct_put_10_sse2_wrap,     TRANSPOSE_PERM, AV_CPU_FLAG_SSE2, 1 }, | ||||
|   | ||||
| @@ -116,8 +116,8 @@ int main(int argc, char **argv) | ||||
|     AVCodecContext *ctx; | ||||
|     int c; | ||||
|     DSPContext cctx, mmxctx; | ||||
|     int flags[2] = { AV_CPU_FLAG_MMX, AV_CPU_FLAG_MMX2 }; | ||||
|     int flags_size = HAVE_MMX2 ? 2 : 1; | ||||
|     int flags[2] = { AV_CPU_FLAG_MMX, AV_CPU_FLAG_MMXEXT }; | ||||
|     int flags_size = HAVE_MMXEXT ? 2 : 1; | ||||
|  | ||||
|     if (argc > 1) { | ||||
|         help(); | ||||
|   | ||||
| @@ -68,7 +68,7 @@ cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset | ||||
| %define LOOP_ALIGN | ||||
| INIT_MMX | ||||
| AC3_EXPONENT_MIN mmx | ||||
| %if HAVE_MMX2 | ||||
| %if HAVE_MMXEXT | ||||
| %define PMINUB PMINUB_MMXEXT | ||||
| %define LOOP_ALIGN ALIGN 16 | ||||
| AC3_EXPONENT_MIN mmxext | ||||
|   | ||||
| @@ -65,7 +65,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) | ||||
|             c->float_to_fixed24 = ff_float_to_fixed24_3dnow; | ||||
|         } | ||||
|     } | ||||
|     if (mm_flags & AV_CPU_FLAG_MMX2 && HAVE_MMX2) { | ||||
|     if (mm_flags & AV_CPU_FLAG_MMXEXT && HAVE_MMXEXT) { | ||||
|         c->ac3_exponent_min = ff_ac3_exponent_min_mmxext; | ||||
|         c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2; | ||||
|     } | ||||
|   | ||||
| @@ -486,7 +486,7 @@ void ff_cavsdsp_init_mmx(CAVSDSPContext *c, AVCodecContext *avctx) | ||||
|     int mm_flags = av_get_cpu_flags(); | ||||
|  | ||||
| #if HAVE_INLINE_ASM | ||||
|     if (mm_flags & AV_CPU_FLAG_MMX2)  ff_cavsdsp_init_mmx2 (c, avctx); | ||||
|     if (mm_flags & AV_CPU_FLAG_MMXEXT) ff_cavsdsp_init_mmx2(c, avctx); | ||||
|     if (mm_flags & AV_CPU_FLAG_3DNOW) ff_cavsdsp_init_3dnow(c, avctx); | ||||
| #endif /* HAVE_INLINE_ASM */ | ||||
| } | ||||
|   | ||||
| @@ -42,39 +42,24 @@ ps_cos_vec: dd   0.500603,  0.505471,  0.515447,  0.531043 | ||||
| align 32 | ||||
| ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000 | ||||
|  | ||||
| %macro BUTTERFLY_SSE 4 | ||||
|     movaps %4, %1 | ||||
|     subps  %1, %2 | ||||
|     addps  %2, %4 | ||||
|     mulps  %1, %3 | ||||
| %macro BUTTERFLY 4 | ||||
|     subps  %4, %1, %2 | ||||
|     addps  %2, %2, %1 | ||||
|     mulps  %1, %4, %3 | ||||
| %endmacro | ||||
|  | ||||
| %macro BUTTERFLY_AVX 4 | ||||
|     vsubps  %4, %1, %2 | ||||
|     vaddps  %2, %2, %1 | ||||
|     vmulps  %1, %4, %3 | ||||
| %endmacro | ||||
|  | ||||
| %macro BUTTERFLY0_SSE 5 | ||||
|     movaps %4, %1 | ||||
|     shufps %1, %1, %5 | ||||
|     xorps  %4, %2 | ||||
|     addps  %1, %4 | ||||
|     mulps  %1, %3 | ||||
| %endmacro | ||||
|  | ||||
| %macro BUTTERFLY0_SSE2 5 | ||||
| %macro BUTTERFLY0 5 | ||||
| %if cpuflag(sse2) && notcpuflag(avx) | ||||
|     pshufd %4, %1, %5 | ||||
|     xorps  %1, %2 | ||||
|     addps  %1, %4 | ||||
|     mulps  %1, %3 | ||||
| %endmacro | ||||
|  | ||||
| %macro BUTTERFLY0_AVX 5 | ||||
|     vshufps %4, %1, %1, %5 | ||||
|     vxorps  %1, %1, %2 | ||||
|     vaddps  %4, %4, %1 | ||||
|     vmulps  %1, %4, %3 | ||||
| %else | ||||
|     shufps %4, %1, %1, %5 | ||||
|     xorps  %1, %1, %2 | ||||
|     addps  %4, %4, %1 | ||||
|     mulps  %1, %4, %3 | ||||
| %endif | ||||
| %endmacro | ||||
|  | ||||
| %macro BUTTERFLY2 4 | ||||
| @@ -206,14 +191,11 @@ ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000 | ||||
|     movss [outq+116], m6 | ||||
| %endmacro | ||||
|  | ||||
| %define BUTTERFLY  BUTTERFLY_AVX | ||||
| %define BUTTERFLY0 BUTTERFLY0_AVX | ||||
|  | ||||
| INIT_YMM | ||||
| INIT_YMM avx | ||||
| SECTION_TEXT | ||||
| %if HAVE_AVX | ||||
| ; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in) | ||||
| cglobal dct32_float_avx, 2,3,8, out, in, tmp | ||||
| cglobal dct32_float, 2,3,8, out, in, tmp | ||||
|     ; pass 1 | ||||
|     vmovaps     m4, [inq+0] | ||||
|     vinsertf128 m5, m5, [inq+96], 1 | ||||
| @@ -286,9 +268,6 @@ INIT_XMM | ||||
|     RET | ||||
| %endif | ||||
|  | ||||
| %define BUTTERFLY  BUTTERFLY_SSE | ||||
| %define BUTTERFLY0 BUTTERFLY0_SSE | ||||
|  | ||||
| %if ARCH_X86_64 | ||||
| %define SPILL SWAP | ||||
| %define UNSPILL SWAP | ||||
| @@ -411,10 +390,9 @@ INIT_XMM | ||||
| %endif | ||||
|  | ||||
|  | ||||
| INIT_XMM | ||||
| %macro DCT32_FUNC 1 | ||||
| ; void ff_dct32_float_sse(FFTSample *out, const FFTSample *in) | ||||
| cglobal dct32_float_%1, 2,3,16, out, in, tmp | ||||
| %macro DCT32_FUNC 0 | ||||
| cglobal dct32_float, 2, 3, 16, out, in, tmp | ||||
|     ; pass 1 | ||||
|  | ||||
|     movaps      m0, [inq+0] | ||||
| @@ -498,18 +476,16 @@ cglobal dct32_float_%1, 2,3,16, out, in, tmp | ||||
|     RET | ||||
| %endmacro | ||||
|  | ||||
| %macro LOAD_INV_SSE 2 | ||||
| %macro LOAD_INV 2 | ||||
| %if cpuflag(sse2) | ||||
|     pshufd      %1, %2, 0x1b | ||||
| %elif cpuflag(sse) | ||||
|     movaps      %1, %2 | ||||
|     shufps      %1, %1, 0x1b | ||||
| %endif | ||||
| %endmacro | ||||
|  | ||||
| %define LOAD_INV LOAD_INV_SSE | ||||
| DCT32_FUNC sse | ||||
|  | ||||
| %macro LOAD_INV_SSE2 2 | ||||
|     pshufd      %1, %2, 0x1b | ||||
| %endmacro | ||||
|  | ||||
| %define LOAD_INV LOAD_INV_SSE2 | ||||
| %define BUTTERFLY0 BUTTERFLY0_SSE2 | ||||
| DCT32_FUNC sse2 | ||||
| INIT_XMM sse | ||||
| DCT32_FUNC | ||||
| INIT_XMM sse2 | ||||
| DCT32_FUNC | ||||
|   | ||||
| @@ -3171,7 +3171,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) | ||||
|                     c->idct_add              = ff_idct_xvid_sse2_add; | ||||
|                     c->idct                  = ff_idct_xvid_sse2; | ||||
|                     c->idct_permutation_type = FF_SSE2_IDCT_PERM; | ||||
|                 } else if (mm_flags & AV_CPU_FLAG_MMX2) { | ||||
|                 } else if (mm_flags & AV_CPU_FLAG_MMXEXT) { | ||||
|                     c->idct_put              = ff_idct_xvid_mmx2_put; | ||||
|                     c->idct_add              = ff_idct_xvid_mmx2_add; | ||||
|                     c->idct                  = ff_idct_xvid_mmx2; | ||||
| @@ -3187,7 +3187,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) | ||||
|         dsputil_init_mmx(c, avctx, mm_flags); | ||||
|     } | ||||
|  | ||||
|     if (mm_flags & AV_CPU_FLAG_MMX2) | ||||
|     if (mm_flags & AV_CPU_FLAG_MMXEXT) | ||||
|         dsputil_init_mmx2(c, avctx, mm_flags); | ||||
|  | ||||
|     if (mm_flags & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) | ||||
|   | ||||
| @@ -388,12 +388,16 @@ cglobal add_hfyu_median_prediction_mmx2, 6,6,0, dst, top, diff, w, left, left_to | ||||
|     RET | ||||
|  | ||||
|  | ||||
| %macro ADD_HFYU_LEFT_LOOP 1 ; %1 = is_aligned | ||||
| %macro ADD_HFYU_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned | ||||
|     add     srcq, wq | ||||
|     add     dstq, wq | ||||
|     neg     wq | ||||
| %%.loop: | ||||
| %if %2 | ||||
|     mova    m1, [srcq+wq] | ||||
| %else | ||||
|     movu    m1, [srcq+wq] | ||||
| %endif | ||||
|     mova    m2, m1 | ||||
|     psllw   m1, 8 | ||||
|     paddb   m1, m2 | ||||
| @@ -435,7 +439,7 @@ cglobal add_hfyu_left_prediction_ssse3, 3,3,7, dst, src, w, left | ||||
|     mova    m3, [pb_zz11zz55zz99zzdd] | ||||
|     movd    m0, leftm | ||||
|     psllq   m0, 56 | ||||
|     ADD_HFYU_LEFT_LOOP 1 | ||||
|     ADD_HFYU_LEFT_LOOP 1, 1 | ||||
|  | ||||
| INIT_XMM | ||||
| cglobal add_hfyu_left_prediction_sse4, 3,3,7, dst, src, w, left | ||||
| @@ -446,12 +450,14 @@ cglobal add_hfyu_left_prediction_sse4, 3,3,7, dst, src, w, left | ||||
|     movd    m0, leftm | ||||
|     pslldq  m0, 15 | ||||
|     test    srcq, 15 | ||||
|     jnz add_hfyu_left_prediction_ssse3.skip_prologue | ||||
|     jnz .src_unaligned | ||||
|     test    dstq, 15 | ||||
|     jnz .unaligned | ||||
|     ADD_HFYU_LEFT_LOOP 1 | ||||
| .unaligned: | ||||
|     ADD_HFYU_LEFT_LOOP 0 | ||||
|     jnz .dst_unaligned | ||||
|     ADD_HFYU_LEFT_LOOP 1, 1 | ||||
| .dst_unaligned: | ||||
|     ADD_HFYU_LEFT_LOOP 0, 1 | ||||
| .src_unaligned: | ||||
|     ADD_HFYU_LEFT_LOOP 0, 0 | ||||
|  | ||||
|  | ||||
| ; float scalarproduct_float_sse(const float *v1, const float *v2, int len) | ||||
|   | ||||
| @@ -1112,7 +1112,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) | ||||
|             (dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX)) { | ||||
|             if(mm_flags & AV_CPU_FLAG_SSE2){ | ||||
|                 c->fdct = ff_fdct_sse2; | ||||
|             }else if(mm_flags & AV_CPU_FLAG_MMX2){ | ||||
|             } else if (mm_flags & AV_CPU_FLAG_MMXEXT) { | ||||
|                 c->fdct = ff_fdct_mmx2; | ||||
|             }else{ | ||||
|                 c->fdct = ff_fdct_mmx; | ||||
| @@ -1145,8 +1145,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) | ||||
|  | ||||
|         c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx; | ||||
|  | ||||
|  | ||||
|         if (mm_flags & AV_CPU_FLAG_MMX2) { | ||||
|         if (mm_flags & AV_CPU_FLAG_MMXEXT) { | ||||
|             c->sum_abs_dctelem= sum_abs_dctelem_mmx2; | ||||
|             c->vsad[4]= vsad_intra16_mmx2; | ||||
|  | ||||
| @@ -1187,7 +1186,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) | ||||
|         c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx; | ||||
|         c->hadamard8_diff[1] = ff_hadamard8_diff_mmx; | ||||
|  | ||||
|         if (mm_flags & AV_CPU_FLAG_MMX2) { | ||||
|         if (mm_flags & AV_CPU_FLAG_MMXEXT) { | ||||
|             c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx2; | ||||
|             c->hadamard8_diff[1] = ff_hadamard8_diff_mmx2; | ||||
|         } | ||||
|   | ||||
| @@ -1041,7 +1041,7 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i | ||||
|     mova [r1+r5*8], m0 | ||||
|     mova [r1+r6*8], m2 | ||||
|     add    r4, 2 | ||||
|     sub    r4, 2 | ||||
|     sub    r3, 2 | ||||
| %else | ||||
| %if ARCH_X86_64 | ||||
|     movzx  r5,  word [rrevtab+r4-4] | ||||
|   | ||||
| @@ -198,7 +198,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         if (mm_flags & AV_CPU_FLAG_MMX2) { | ||||
|         if (mm_flags & AV_CPU_FLAG_MMXEXT) { | ||||
|             h->pred16x16[HOR_PRED8x8            ] = ff_pred16x16_horizontal_mmx2; | ||||
|             h->pred16x16[DC_PRED8x8             ] = ff_pred16x16_dc_mmx2; | ||||
|             if (chroma_format_idc == 1) | ||||
| @@ -308,7 +308,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth | ||||
|             } | ||||
|         } | ||||
|     } else if (bit_depth == 10) { | ||||
|         if (mm_flags & AV_CPU_FLAG_MMX2) { | ||||
|         if (mm_flags & AV_CPU_FLAG_MMXEXT) { | ||||
|             h->pred4x4[DC_PRED             ] = ff_pred4x4_dc_10_mmxext; | ||||
|             h->pred4x4[HOR_UP_PRED         ] = ff_pred4x4_horizontal_up_10_mmxext; | ||||
|  | ||||
|   | ||||
| @@ -218,7 +218,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, | ||||
| #if HAVE_YASM | ||||
|     int mm_flags = av_get_cpu_flags(); | ||||
|  | ||||
|     if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMX2) | ||||
|     if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMXEXT) | ||||
|         c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmx2; | ||||
|  | ||||
|     if (bit_depth == 8) { | ||||
| @@ -236,7 +236,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, | ||||
|             if (mm_flags & AV_CPU_FLAG_CMOV) | ||||
|                 c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx; | ||||
|  | ||||
|             if (mm_flags & AV_CPU_FLAG_MMX2) { | ||||
|             if (mm_flags & AV_CPU_FLAG_MMXEXT) { | ||||
|                 c->h264_idct_dc_add  = ff_h264_idct_dc_add_8_mmx2; | ||||
|                 c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2; | ||||
|                 c->h264_idct_add16   = ff_h264_idct_add16_8_mmx2; | ||||
| @@ -304,7 +304,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, | ||||
|         } | ||||
|     } else if (bit_depth == 10) { | ||||
|         if (mm_flags & AV_CPU_FLAG_MMX) { | ||||
|             if (mm_flags & AV_CPU_FLAG_MMX2) { | ||||
|             if (mm_flags & AV_CPU_FLAG_MMXEXT) { | ||||
| #if ARCH_X86_32 | ||||
|                 c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_mmx2; | ||||
|                 c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmx2; | ||||
|   | ||||
| @@ -444,7 +444,7 @@ void ff_dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx) | ||||
|         c->sad[0]= sad16_mmx; | ||||
|         c->sad[1]= sad8_mmx; | ||||
|     } | ||||
|     if (mm_flags & AV_CPU_FLAG_MMX2) { | ||||
|     if (mm_flags & AV_CPU_FLAG_MMXEXT) { | ||||
|         c->pix_abs[0][0] = sad16_mmx2; | ||||
|         c->pix_abs[1][0] = sad8_mmx2; | ||||
|  | ||||
|   | ||||
| @@ -595,15 +595,15 @@ static void  denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){ | ||||
| #define HAVE_SSSE3 0 | ||||
|  | ||||
| #undef HAVE_SSE2 | ||||
| #undef HAVE_MMX2 | ||||
| #undef HAVE_MMXEXT | ||||
| #define HAVE_SSE2 0 | ||||
| #define HAVE_MMX2 0 | ||||
| #define HAVE_MMXEXT 0 | ||||
| #define RENAME(a) a ## _MMX | ||||
| #define RENAMEl(a) a ## _mmx | ||||
| #include "mpegvideo_mmx_template.c" | ||||
|  | ||||
| #undef HAVE_MMX2 | ||||
| #define HAVE_MMX2 1 | ||||
| #undef HAVE_MMXEXT | ||||
| #define HAVE_MMXEXT 1 | ||||
| #undef RENAME | ||||
| #undef RENAMEl | ||||
| #define RENAME(a) a ## _MMX2 | ||||
| @@ -660,7 +660,7 @@ void ff_MPV_common_init_mmx(MpegEncContext *s) | ||||
| #endif | ||||
|             if(mm_flags & AV_CPU_FLAG_SSE2){ | ||||
|                 s->dct_quantize= dct_quantize_SSE2; | ||||
|             } else if(mm_flags & AV_CPU_FLAG_MMX2){ | ||||
|             } else if (mm_flags & AV_CPU_FLAG_MMXEXT) { | ||||
|                 s->dct_quantize= dct_quantize_MMX2; | ||||
|             } else { | ||||
|                 s->dct_quantize= dct_quantize_MMX; | ||||
|   | ||||
| @@ -48,7 +48,7 @@ | ||||
| #define MMREG_WIDTH "8" | ||||
| #define MM "%%mm" | ||||
| #define MOVQ "movq" | ||||
| #if HAVE_MMX2 | ||||
| #if HAVE_MMXEXT | ||||
| #define SPREADW(a) "pshufw $0, "a", "a" \n\t" | ||||
| #define PMAXW(a,b) "pmaxsw "a", "b"     \n\t" | ||||
| #define PMAX(a,b) \ | ||||
|   | ||||
| @@ -41,7 +41,7 @@ void ff_pngdsp_init_x86(PNGDSPContext *dsp) | ||||
|     if (flags & AV_CPU_FLAG_MMX) | ||||
|         dsp->add_bytes_l2         = ff_add_bytes_l2_mmx; | ||||
| #endif | ||||
|     if (flags & AV_CPU_FLAG_MMX2) | ||||
|     if (flags & AV_CPU_FLAG_MMXEXT) | ||||
|         dsp->add_paeth_prediction = ff_add_png_paeth_prediction_mmx2; | ||||
|     if (flags & AV_CPU_FLAG_SSE2) | ||||
|         dsp->add_bytes_l2         = ff_add_bytes_l2_sse2; | ||||
|   | ||||
| @@ -37,7 +37,7 @@ av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c, DSPContext *dsp) | ||||
|  | ||||
|     if (mm_flags & AV_CPU_FLAG_MMX) | ||||
|         c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx; | ||||
|     if (mm_flags & AV_CPU_FLAG_MMX2) { | ||||
|     if (mm_flags & AV_CPU_FLAG_MMXEXT) { | ||||
|         c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmx2; | ||||
|         c->rv34_idct_add         = ff_rv34_idct_add_mmx2; | ||||
|     } | ||||
|   | ||||
| @@ -204,7 +204,7 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) | ||||
|         QPEL_MC_SET(put_, _mmx) | ||||
| #endif | ||||
|     } | ||||
|     if (mm_flags & AV_CPU_FLAG_MMX2) { | ||||
|     if (mm_flags & AV_CPU_FLAG_MMXEXT) { | ||||
|         c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmx2; | ||||
|         c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmx2; | ||||
|         c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_mmx2; | ||||
|   | ||||
| @@ -889,7 +889,7 @@ void ff_dwt_init_x86(DWTContext *c) | ||||
|             c->inner_add_yblock = ff_snow_inner_add_yblock_sse2; | ||||
|         } | ||||
|         else{ | ||||
|             if(mm_flags & AV_CPU_FLAG_MMX2){ | ||||
|             if (mm_flags & AV_CPU_FLAG_MMXEXT) { | ||||
|             c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx; | ||||
| #if HAVE_7REGS | ||||
|             c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; | ||||
|   | ||||
| @@ -760,7 +760,7 @@ void ff_vc1dsp_init_mmx(VC1DSPContext *dsp) | ||||
|             dsp->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd; | ||||
|     } | ||||
|  | ||||
|     if (mm_flags & AV_CPU_FLAG_MMX2){ | ||||
|     if (mm_flags & AV_CPU_FLAG_MMXEXT) { | ||||
|         dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmx2; | ||||
|         dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_mmx2; | ||||
|         dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_mmx2; | ||||
| @@ -810,7 +810,7 @@ void ff_vc1dsp_init_mmx(VC1DSPContext *dsp) | ||||
|     if (mm_flags & AV_CPU_FLAG_MMX) { | ||||
|     } | ||||
|  | ||||
|     if (mm_flags & AV_CPU_FLAG_MMX2) { | ||||
|     if (mm_flags & AV_CPU_FLAG_MMXEXT) { | ||||
|         ASSIGN_LF(mmx2); | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -49,7 +49,7 @@ av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags) | ||||
|     } | ||||
| #endif | ||||
|  | ||||
|     if (HAVE_MMX2 && cpuflags & AV_CPU_FLAG_MMX2) { | ||||
|     if (HAVE_MMXEXT && cpuflags & AV_CPU_FLAG_MMXEXT) { | ||||
|         c->idct_dc_add = ff_vp3_idct_dc_add_mmx2; | ||||
|  | ||||
|         if (!(flags & CODEC_FLAG_BITEXACT)) { | ||||
|   | ||||
| @@ -350,7 +350,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) | ||||
|  | ||||
|     /* note that 4-tap width=16 functions are missing because w=16 | ||||
|      * is only used for luma, and luma is always a copy or sixtap. */ | ||||
|     if (mm_flags & AV_CPU_FLAG_MMX2) { | ||||
|     if (mm_flags & AV_CPU_FLAG_MMXEXT) { | ||||
|         VP8_MC_FUNC(2, 4, mmx2); | ||||
|         VP8_BILINEAR_MC_FUNC(2, 4, mmx2); | ||||
| #if ARCH_X86_32 | ||||
|   | ||||
| @@ -28,7 +28,7 @@ | ||||
| DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] = {0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F}; | ||||
| DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; | ||||
|  | ||||
| #if HAVE_MMX2 | ||||
| #if HAVE_MMXEXT | ||||
| static void gradfun_filter_line_mmx2(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers) | ||||
| { | ||||
|     intptr_t x; | ||||
| @@ -173,8 +173,8 @@ av_cold void ff_gradfun_init_x86(GradFunContext *gf) | ||||
|     int cpu_flags = av_get_cpu_flags(); | ||||
|  | ||||
| #if HAVE_INLINE_ASM | ||||
| #if HAVE_MMX2 | ||||
|     if (cpu_flags & AV_CPU_FLAG_MMX2) | ||||
| #if HAVE_MMXEXT | ||||
|     if (cpu_flags & AV_CPU_FLAG_MMXEXT) | ||||
|         gf->filter_line = gradfun_filter_line_mmx2; | ||||
| #endif | ||||
| #if HAVE_SSSE3 | ||||
|   | ||||
| @@ -45,7 +45,7 @@ DECLARE_ASM_CONST(16, const xmm_reg, pw_1) = {0x0001000100010001ULL, 0x000100010 | ||||
| #undef COMPILE_TEMPLATE_SSE | ||||
| #endif | ||||
|  | ||||
| #if HAVE_MMX2 | ||||
| #if HAVE_MMXEXT | ||||
| #undef RENAME | ||||
| #define RENAME(a) a ## _mmx2 | ||||
| #include "yadif_template.c" | ||||
| @@ -58,8 +58,8 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif) | ||||
|     int cpu_flags = av_get_cpu_flags(); | ||||
|  | ||||
| #if HAVE_INLINE_ASM | ||||
| #if HAVE_MMX2 | ||||
|     if (cpu_flags & AV_CPU_FLAG_MMX2) | ||||
| #if HAVE_MMXEXT | ||||
|     if (cpu_flags & AV_CPU_FLAG_MMXEXT) | ||||
|         yadif->filter_line = yadif_filter_line_mmx2; | ||||
| #endif | ||||
| #if HAVE_SSE | ||||
|   | ||||
| @@ -314,7 +314,15 @@ int ff_audio_mix_init(AVAudioResampleContext *avr) | ||||
|     } | ||||
|  | ||||
|     /* build matrix if the user did not already set one */ | ||||
|     if (!avr->am->matrix) { | ||||
|     if (avr->am->matrix) { | ||||
|         if (avr->am->coeff_type != avr->mix_coeff_type      || | ||||
|             avr->am->in_layout  != avr->in_channel_layout   || | ||||
|             avr->am->out_layout != avr->out_channel_layout) { | ||||
|             av_log(avr, AV_LOG_ERROR, | ||||
|                    "Custom matrix does not match current parameters\n"); | ||||
|             return AVERROR(EINVAL); | ||||
|         } | ||||
|     } else { | ||||
|         int i, j; | ||||
|         char in_layout_name[128]; | ||||
|         char out_layout_name[128]; | ||||
|   | ||||
| @@ -294,8 +294,8 @@ int avresample_get_matrix(AVAudioResampleContext *avr, double *matrix, | ||||
|     in_channels  = av_get_channel_layout_nb_channels(avr->in_channel_layout); | ||||
|     out_channels = av_get_channel_layout_nb_channels(avr->out_channel_layout); | ||||
|  | ||||
|     if ( in_channels < 0 ||  in_channels > AVRESAMPLE_MAX_CHANNELS || | ||||
|         out_channels < 0 || out_channels > AVRESAMPLE_MAX_CHANNELS) { | ||||
|     if ( in_channels <= 0 ||  in_channels > AVRESAMPLE_MAX_CHANNELS || | ||||
|         out_channels <= 0 || out_channels > AVRESAMPLE_MAX_CHANNELS) { | ||||
|         av_log(avr, AV_LOG_ERROR, "Invalid channel layouts\n"); | ||||
|         return AVERROR(EINVAL); | ||||
|     } | ||||
| @@ -332,6 +332,7 @@ int avresample_get_matrix(AVAudioResampleContext *avr, double *matrix, | ||||
|         av_log(avr, AV_LOG_ERROR, "Invalid mix coeff type\n"); | ||||
|         return AVERROR(EINVAL); | ||||
|     } | ||||
|  | ||||
|     return 0; | ||||
| } | ||||
|  | ||||
| @@ -343,14 +344,16 @@ int avresample_set_matrix(AVAudioResampleContext *avr, const double *matrix, | ||||
|     in_channels  = av_get_channel_layout_nb_channels(avr->in_channel_layout); | ||||
|     out_channels = av_get_channel_layout_nb_channels(avr->out_channel_layout); | ||||
|  | ||||
|     if ( in_channels < 0 ||  in_channels > AVRESAMPLE_MAX_CHANNELS || | ||||
|         out_channels < 0 || out_channels > AVRESAMPLE_MAX_CHANNELS) { | ||||
|     if ( in_channels <= 0 ||  in_channels > AVRESAMPLE_MAX_CHANNELS || | ||||
|         out_channels <= 0 || out_channels > AVRESAMPLE_MAX_CHANNELS) { | ||||
|         av_log(avr, AV_LOG_ERROR, "Invalid channel layouts\n"); | ||||
|         return AVERROR(EINVAL); | ||||
|     } | ||||
|  | ||||
|     if (avr->am->matrix) | ||||
|         av_freep(avr->am->matrix); | ||||
|     if (avr->am->matrix) { | ||||
|         av_free(avr->am->matrix[0]); | ||||
|         avr->am->matrix = NULL; | ||||
|     } | ||||
|  | ||||
| #define CONVERT_MATRIX(type, expr)                                          \ | ||||
|     avr->am->matrix_## type[0] = av_mallocz(out_channels * in_channels *    \ | ||||
| @@ -386,5 +389,11 @@ int avresample_set_matrix(AVAudioResampleContext *avr, const double *matrix, | ||||
|     /* TODO: detect situations where we can just swap around pointers | ||||
|              instead of doing matrix multiplications with 0.0 and 1.0 */ | ||||
|  | ||||
|     /* set AudioMix params */ | ||||
|     avr->am->in_layout    = avr->in_channel_layout; | ||||
|     avr->am->out_layout   = avr->out_channel_layout; | ||||
|     avr->am->in_channels  = in_channels; | ||||
|     avr->am->out_channels = out_channels; | ||||
|  | ||||
|     return 0; | ||||
| } | ||||
|   | ||||
| @@ -48,9 +48,8 @@ int avresample_open(AVAudioResampleContext *avr) | ||||
|     avr->resample_channels = FFMIN(avr->in_channels, avr->out_channels); | ||||
|     avr->downmix_needed    = avr->in_channels  > avr->out_channels; | ||||
|     avr->upmix_needed      = avr->out_channels > avr->in_channels || | ||||
|                              avr->am->matrix                      || | ||||
|                              (avr->out_channels == avr->in_channels && | ||||
|                               avr->in_channel_layout != avr->out_channel_layout); | ||||
|                              (!avr->downmix_needed && (avr->am->matrix || | ||||
|                               avr->in_channel_layout != avr->out_channel_layout)); | ||||
|     avr->mixing_needed     = avr->downmix_needed || avr->upmix_needed; | ||||
|  | ||||
|     /* set resampling parameters */ | ||||
|   | ||||
| @@ -49,10 +49,10 @@ void av_set_cpu_flags_mask(int mask) | ||||
|  | ||||
| int av_parse_cpu_flags(const char *s) | ||||
| { | ||||
| #define CPUFLAG_MMX2     (AV_CPU_FLAG_MMX      | AV_CPU_FLAG_MMX2 | AV_CPU_FLAG_CMOV) | ||||
| #define CPUFLAG_MMXEXT   (AV_CPU_FLAG_MMX      | AV_CPU_FLAG_MMXEXT | AV_CPU_FLAG_CMOV) | ||||
| #define CPUFLAG_3DNOW    (AV_CPU_FLAG_3DNOW    | AV_CPU_FLAG_MMX) | ||||
| #define CPUFLAG_3DNOWEXT (AV_CPU_FLAG_3DNOWEXT | CPUFLAG_3DNOW) | ||||
| #define CPUFLAG_SSE      (AV_CPU_FLAG_SSE      | CPUFLAG_MMX2) | ||||
| #define CPUFLAG_SSE      (AV_CPU_FLAG_SSE      | CPUFLAG_MMXEXT) | ||||
| #define CPUFLAG_SSE2     (AV_CPU_FLAG_SSE2     | CPUFLAG_SSE) | ||||
| #define CPUFLAG_SSE2SLOW (AV_CPU_FLAG_SSE2SLOW | CPUFLAG_SSE2) | ||||
| #define CPUFLAG_SSE3     (AV_CPU_FLAG_SSE3     | CPUFLAG_SSE2) | ||||
| @@ -69,7 +69,7 @@ int av_parse_cpu_flags(const char *s) | ||||
|         { "altivec" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_ALTIVEC  },    .unit = "flags" }, | ||||
| #elif ARCH_X86 | ||||
|         { "mmx"     , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_MMX      },    .unit = "flags" }, | ||||
|         { "mmx2"    , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_MMX2         },    .unit = "flags" }, | ||||
|         { "mmxext"  , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_MMXEXT       },    .unit = "flags" }, | ||||
|         { "sse"     , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE          },    .unit = "flags" }, | ||||
|         { "sse2"    , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE2         },    .unit = "flags" }, | ||||
|         { "sse2slow", NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE2SLOW     },    .unit = "flags" }, | ||||
| @@ -174,7 +174,7 @@ static const struct { | ||||
|     { AV_CPU_FLAG_ALTIVEC,   "altivec"    }, | ||||
| #elif ARCH_X86 | ||||
|     { AV_CPU_FLAG_MMX,       "mmx"        }, | ||||
|     { AV_CPU_FLAG_MMX2,      "mmx2"       }, | ||||
|     { AV_CPU_FLAG_MMXEXT,    "mmxext"     }, | ||||
|     { AV_CPU_FLAG_SSE,       "sse"        }, | ||||
|     { AV_CPU_FLAG_SSE2,      "sse2"       }, | ||||
|     { AV_CPU_FLAG_SSE2SLOW,  "sse2(slow)" }, | ||||
|   | ||||
| @@ -27,6 +27,7 @@ | ||||
|  | ||||
|     /* lower 16 bits - CPU features */ | ||||
| #define AV_CPU_FLAG_MMX          0x0001 ///< standard MMX | ||||
| #define AV_CPU_FLAG_MMXEXT       0x0002 ///< SSE integer functions or AMD MMX ext | ||||
| #define AV_CPU_FLAG_MMX2         0x0002 ///< SSE integer functions or AMD MMX ext | ||||
| #define AV_CPU_FLAG_3DNOW        0x0004 ///< AMD 3DNOW | ||||
| #define AV_CPU_FLAG_SSE          0x0008 ///< SSE functions | ||||
|   | ||||
| @@ -33,6 +33,7 @@ unsigned avutil_version(void) | ||||
|     av_assert0(AVMEDIA_TYPE_ATTACHMENT == 4); | ||||
|     av_assert0(AV_PICTURE_TYPE_BI == 7); | ||||
|     av_assert0(LIBAVUTIL_VERSION_MICRO >= 100); | ||||
|     av_assert0(HAVE_MMX2 == HAVE_MMXEXT); | ||||
|  | ||||
|     return LIBAVUTIL_VERSION_INT; | ||||
| } | ||||
|   | ||||
| @@ -40,7 +40,7 @@ | ||||
|  | ||||
| #define LIBAVUTIL_VERSION_MAJOR 51 | ||||
| #define LIBAVUTIL_VERSION_MINOR 66 | ||||
| #define LIBAVUTIL_VERSION_MICRO 100 | ||||
| #define LIBAVUTIL_VERSION_MICRO 101 | ||||
|  | ||||
| #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ | ||||
|                                                LIBAVUTIL_VERSION_MINOR, \ | ||||
|   | ||||
| @@ -122,7 +122,7 @@ int ff_get_cpu_flags_x86(void) | ||||
|         if (std_caps & (1 << 23)) | ||||
|             rval |= AV_CPU_FLAG_MMX; | ||||
|         if (std_caps & (1 << 25)) | ||||
|             rval |= AV_CPU_FLAG_MMX2; | ||||
|             rval |= AV_CPU_FLAG_MMXEXT; | ||||
| #if HAVE_SSE | ||||
|         if (std_caps & (1 << 25)) | ||||
|             rval |= AV_CPU_FLAG_SSE; | ||||
| @@ -159,7 +159,7 @@ int ff_get_cpu_flags_x86(void) | ||||
|         if (ext_caps & (1 << 23)) | ||||
|             rval |= AV_CPU_FLAG_MMX; | ||||
|         if (ext_caps & (1 << 22)) | ||||
|             rval |= AV_CPU_FLAG_MMX2; | ||||
|             rval |= AV_CPU_FLAG_MMXEXT; | ||||
|  | ||||
|         /* Allow for selectively disabling SSE2 functions on AMD processors | ||||
|            with SSE2 support but not SSE4a. This includes Athlon64, some | ||||
|   | ||||
| @@ -663,8 +663,8 @@ static int swScale(SwsContext *c, const uint8_t *src[], | ||||
|     if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf) | ||||
|         fillPlane(dst[3], dstStride[3], dstW, dstY - lastDstY, lastDstY, 255); | ||||
|  | ||||
| #if HAVE_MMX2 && HAVE_INLINE_ASM | ||||
|     if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2) | ||||
| #if HAVE_MMXEXT && HAVE_INLINE_ASM | ||||
|     if (av_get_cpu_flags() & AV_CPU_FLAG_MMXEXT) | ||||
|         __asm__ volatile ("sfence" ::: "memory"); | ||||
| #endif | ||||
|     emms_c(); | ||||
|   | ||||
| @@ -82,7 +82,10 @@ const char *swscale_license(void); | ||||
|  * are only provided for API compatibility. | ||||
|  */ | ||||
| #define SWS_CPU_CAPS_MMX      0x80000000 | ||||
| #define SWS_CPU_CAPS_MMXEXT   0x20000000 | ||||
| #if LIBSWSCALE_VERSION_MAJOR < 3 | ||||
| #define SWS_CPU_CAPS_MMX2     0x20000000 | ||||
| #endif | ||||
| #define SWS_CPU_CAPS_3DNOW    0x40000000 | ||||
| #define SWS_CPU_CAPS_ALTIVEC  0x10000000 | ||||
| #define SWS_CPU_CAPS_BFIN     0x01000000 | ||||
|   | ||||
| @@ -599,7 +599,7 @@ fail: | ||||
|     return ret; | ||||
| } | ||||
|  | ||||
| #if HAVE_MMX2 && HAVE_INLINE_ASM | ||||
| #if HAVE_MMXEXT && HAVE_INLINE_ASM | ||||
| static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, | ||||
|                            int16_t *filter, int32_t *filterPos, int numSplits) | ||||
| { | ||||
| @@ -762,7 +762,7 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, | ||||
|  | ||||
|     return fragmentPos + 1; | ||||
| } | ||||
| #endif /* HAVE_MMX2 && HAVE_INLINE_ASM */ | ||||
| #endif /* HAVE_MMXEXT && HAVE_INLINE_ASM */ | ||||
|  | ||||
| static void getSubSampleFactors(int *h, int *v, enum PixelFormat format) | ||||
| { | ||||
| @@ -1024,7 +1024,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, | ||||
|         c->srcBpc = 16; | ||||
|     if (c->dstBpc == 16) | ||||
|         dst_stride <<= 1; | ||||
|     if (HAVE_MMX2 && HAVE_INLINE_ASM && cpu_flags & AV_CPU_FLAG_MMX2 && | ||||
|     if (HAVE_MMXEXT && HAVE_INLINE_ASM && cpu_flags & AV_CPU_FLAG_MMXEXT && | ||||
|         c->srcBpc == 8 && c->dstBpc <= 14) { | ||||
|         c->canMMX2BeUsed = (dstW >= srcW && (dstW & 31) == 0 && | ||||
|                             (srcW & 15) == 0) ? 1 : 0; | ||||
| @@ -1063,7 +1063,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, | ||||
|  | ||||
|     /* precalculate horizontal scaler filter coefficients */ | ||||
|     { | ||||
| #if HAVE_MMX2 && HAVE_INLINE_ASM | ||||
| #if HAVE_MMXEXT && HAVE_INLINE_ASM | ||||
| // can't downscale !!! | ||||
|         if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) { | ||||
|             c->lumMmx2FilterCodeSize = initMMX2HScaler(dstW, c->lumXInc, NULL, | ||||
| @@ -1107,7 +1107,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, | ||||
|             mprotect(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize, PROT_EXEC | PROT_READ); | ||||
| #endif | ||||
|         } else | ||||
| #endif /* HAVE_MMX2 && HAVE_INLINE_ASM */ | ||||
| #endif /* HAVE_MMXEXT && HAVE_INLINE_ASM */ | ||||
|         { | ||||
|             const int filterAlign = | ||||
|                 (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? 4 : | ||||
| @@ -1273,7 +1273,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, | ||||
| #endif | ||||
|                av_get_pix_fmt_name(dstFormat)); | ||||
|  | ||||
|         if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) | ||||
|         if (HAVE_MMXEXT && cpu_flags & AV_CPU_FLAG_MMXEXT) | ||||
|             av_log(c, AV_LOG_INFO, "using MMX2\n"); | ||||
|         else if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW) | ||||
|             av_log(c, AV_LOG_INFO, "using 3DNOW\n"); | ||||
|   | ||||
| @@ -28,7 +28,7 @@ | ||||
|  | ||||
| #define LIBSWSCALE_VERSION_MAJOR 2 | ||||
| #define LIBSWSCALE_VERSION_MINOR 1 | ||||
| #define LIBSWSCALE_VERSION_MICRO 100 | ||||
| #define LIBSWSCALE_VERSION_MICRO 101 | ||||
|  | ||||
| #define LIBSWSCALE_VERSION_INT  AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \ | ||||
|                                                LIBSWSCALE_VERSION_MINOR, \ | ||||
|   | ||||
| @@ -88,7 +88,7 @@ DECLARE_ASM_CONST(8, uint64_t, mul16_mid)    = 0x2080208020802080ULL; | ||||
|  | ||||
| //Note: We have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW + MMX2 one. | ||||
|  | ||||
| #define COMPILE_TEMPLATE_MMX2 0 | ||||
| #define COMPILE_TEMPLATE_MMXEXT 0 | ||||
| #define COMPILE_TEMPLATE_AMD3DNOW 0 | ||||
| #define COMPILE_TEMPLATE_SSE2 0 | ||||
|  | ||||
| @@ -99,8 +99,8 @@ DECLARE_ASM_CONST(8, uint64_t, mul16_mid)    = 0x2080208020802080ULL; | ||||
|  | ||||
| //MMX2 versions | ||||
| #undef RENAME | ||||
| #undef COMPILE_TEMPLATE_MMX2 | ||||
| #define COMPILE_TEMPLATE_MMX2 1 | ||||
| #undef COMPILE_TEMPLATE_MMXEXT | ||||
| #define COMPILE_TEMPLATE_MMXEXT 1 | ||||
| #define RENAME(a) a ## _MMX2 | ||||
| #include "rgb2rgb_template.c" | ||||
|  | ||||
| @@ -113,10 +113,10 @@ DECLARE_ASM_CONST(8, uint64_t, mul16_mid)    = 0x2080208020802080ULL; | ||||
|  | ||||
| //3DNOW versions | ||||
| #undef RENAME | ||||
| #undef COMPILE_TEMPLATE_MMX2 | ||||
| #undef COMPILE_TEMPLATE_MMXEXT | ||||
| #undef COMPILE_TEMPLATE_SSE2 | ||||
| #undef COMPILE_TEMPLATE_AMD3DNOW | ||||
| #define COMPILE_TEMPLATE_MMX2 0 | ||||
| #define COMPILE_TEMPLATE_MMXEXT 0 | ||||
| #define COMPILE_TEMPLATE_SSE2 0 | ||||
| #define COMPILE_TEMPLATE_AMD3DNOW 1 | ||||
| #define RENAME(a) a ## _3DNOW | ||||
| @@ -140,7 +140,7 @@ av_cold void rgb2rgb_init_x86(void) | ||||
|         rgb2rgb_init_MMX(); | ||||
|     if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW) | ||||
|         rgb2rgb_init_3DNOW(); | ||||
|     if (HAVE_MMX2     && cpu_flags & AV_CPU_FLAG_MMX2) | ||||
|     if (HAVE_MMXEXT   && cpu_flags & AV_CPU_FLAG_MMXEXT) | ||||
|         rgb2rgb_init_MMX2(); | ||||
|     if (HAVE_SSE      && cpu_flags & AV_CPU_FLAG_SSE2) | ||||
|         rgb2rgb_init_SSE2(); | ||||
|   | ||||
| @@ -35,7 +35,7 @@ | ||||
| #if COMPILE_TEMPLATE_AMD3DNOW | ||||
| #define PREFETCH  "prefetch" | ||||
| #define PAVGB     "pavgusb" | ||||
| #elif COMPILE_TEMPLATE_MMX2 | ||||
| #elif COMPILE_TEMPLATE_MMXEXT | ||||
| #define PREFETCH "prefetchnta" | ||||
| #define PAVGB     "pavgb" | ||||
| #else | ||||
| @@ -49,7 +49,7 @@ | ||||
| #define EMMS     "emms" | ||||
| #endif | ||||
|  | ||||
| #if COMPILE_TEMPLATE_MMX2 | ||||
| #if COMPILE_TEMPLATE_MMXEXT | ||||
| #define MOVNTQ "movntq" | ||||
| #define SFENCE "sfence" | ||||
| #else | ||||
| @@ -1136,7 +1136,7 @@ static inline void RENAME(shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, | ||||
|         PREFETCH"     32(%1, %0)        \n\t" | ||||
|         "movq           (%1, %0), %%mm0 \n\t" | ||||
|         "movq          8(%1, %0), %%mm1 \n\t" | ||||
| # if COMPILE_TEMPLATE_MMX2 | ||||
| # if COMPILE_TEMPLATE_MMXEXT | ||||
|         "pshufw      $177, %%mm0, %%mm3 \n\t" | ||||
|         "pshufw      $177, %%mm1, %%mm5 \n\t" | ||||
|         "pand       %%mm7, %%mm0        \n\t" | ||||
| @@ -1500,7 +1500,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t | ||||
| } | ||||
| #endif /* !COMPILE_TEMPLATE_AMD3DNOW */ | ||||
|  | ||||
| #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW | ||||
| #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW | ||||
| static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride) | ||||
| { | ||||
|     int x,y; | ||||
| @@ -1590,7 +1590,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid | ||||
|                      SFENCE"     \n\t" | ||||
|                      :::"memory"); | ||||
| } | ||||
| #endif /* COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW */ | ||||
| #endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */ | ||||
|  | ||||
| #if !COMPILE_TEMPLATE_AMD3DNOW | ||||
| /** | ||||
| @@ -1798,7 +1798,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | ||||
|             "1:                                         \n\t" | ||||
|             PREFETCH"    64(%0, %%"REG_d")              \n\t" | ||||
|             PREFETCH"    64(%1, %%"REG_d")              \n\t" | ||||
| #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW | ||||
| #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW | ||||
|             "movq          (%0, %%"REG_d"), %%mm0       \n\t" | ||||
|             "movq          (%1, %%"REG_d"), %%mm1       \n\t" | ||||
|             "movq         6(%0, %%"REG_d"), %%mm2       \n\t" | ||||
| @@ -1859,7 +1859,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | ||||
|             "packssdw                %%mm1, %%mm0       \n\t" // V1 V0 U1 U0 | ||||
|             "psraw                      $7, %%mm0       \n\t" | ||||
|  | ||||
| #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW | ||||
| #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW | ||||
|             "movq        12(%0, %%"REG_d"), %%mm4       \n\t" | ||||
|             "movq        12(%1, %%"REG_d"), %%mm1       \n\t" | ||||
|             "movq        18(%0, %%"REG_d"), %%mm2       \n\t" | ||||
| @@ -2580,9 +2580,9 @@ static inline void RENAME(rgb2rgb_init)(void) | ||||
|     yuyvtoyuv422       = RENAME(yuyvtoyuv422); | ||||
| #endif /* !COMPILE_TEMPLATE_SSE2 */ | ||||
|  | ||||
| #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW | ||||
| #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW | ||||
|     planar2x           = RENAME(planar2x); | ||||
| #endif /* COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW */ | ||||
| #endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */ | ||||
|     rgb24toyv12        = RENAME(rgb24toyv12); | ||||
|  | ||||
|     yuyvtoyuv420       = RENAME(yuyvtoyuv420); | ||||
|   | ||||
| @@ -74,16 +74,16 @@ DECLARE_ALIGNED(8, const uint64_t, ff_w1111)        = 0x0001000100010001ULL; | ||||
| //MMX versions | ||||
| #if HAVE_MMX | ||||
| #undef RENAME | ||||
| #define COMPILE_TEMPLATE_MMX2 0 | ||||
| #define COMPILE_TEMPLATE_MMXEXT 0 | ||||
| #define RENAME(a) a ## _MMX | ||||
| #include "swscale_template.c" | ||||
| #endif | ||||
|  | ||||
| //MMX2 versions | ||||
| #if HAVE_MMX2 | ||||
| #if HAVE_MMXEXT | ||||
| #undef RENAME | ||||
| #undef COMPILE_TEMPLATE_MMX2 | ||||
| #define COMPILE_TEMPLATE_MMX2 1 | ||||
| #undef COMPILE_TEMPLATE_MMXEXT | ||||
| #define COMPILE_TEMPLATE_MMXEXT 1 | ||||
| #define RENAME(a) a ## _MMX2 | ||||
| #include "swscale_template.c" | ||||
| #endif | ||||
| @@ -375,8 +375,8 @@ av_cold void ff_sws_init_swScale_mmx(SwsContext *c) | ||||
| #if HAVE_INLINE_ASM | ||||
|     if (cpu_flags & AV_CPU_FLAG_MMX) | ||||
|         sws_init_swScale_MMX(c); | ||||
| #if HAVE_MMX2 | ||||
|     if (cpu_flags & AV_CPU_FLAG_MMX2) | ||||
| #if HAVE_MMXEXT | ||||
|     if (cpu_flags & AV_CPU_FLAG_MMXEXT) | ||||
|         sws_init_swScale_MMX2(c); | ||||
|     if (cpu_flags & AV_CPU_FLAG_SSE3){ | ||||
|         if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) | ||||
| @@ -439,7 +439,7 @@ switch(c->dstBpc){ \ | ||||
|     if (cpu_flags & AV_CPU_FLAG_MMX) { | ||||
|         ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx); | ||||
|         ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx); | ||||
|         ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMX2); | ||||
|         ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMXEXT); | ||||
|  | ||||
|         switch (c->srcFormat) { | ||||
|         case PIX_FMT_Y400A: | ||||
| @@ -471,7 +471,7 @@ switch(c->dstBpc){ \ | ||||
|             break; | ||||
|         } | ||||
|     } | ||||
|     if (cpu_flags & AV_CPU_FLAG_MMX2) { | ||||
|     if (cpu_flags & AV_CPU_FLAG_MMXEXT) { | ||||
|         ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmx2, , 1); | ||||
|     } | ||||
| #endif | ||||
|   | ||||
| @@ -23,13 +23,13 @@ | ||||
| #undef MOVNTQ2 | ||||
| #undef PREFETCH | ||||
|  | ||||
| #if COMPILE_TEMPLATE_MMX2 | ||||
| #if COMPILE_TEMPLATE_MMXEXT | ||||
| #define PREFETCH "prefetchnta" | ||||
| #else | ||||
| #define PREFETCH  " # nop" | ||||
| #endif | ||||
|  | ||||
| #if COMPILE_TEMPLATE_MMX2 | ||||
| #if COMPILE_TEMPLATE_MMXEXT | ||||
| #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t" | ||||
| #define MOVNTQ2 "movntq " | ||||
| #else | ||||
| @@ -38,7 +38,7 @@ | ||||
| #endif | ||||
| #define MOVNTQ(a,b)  REAL_MOVNTQ(a,b) | ||||
|  | ||||
| #if !COMPILE_TEMPLATE_MMX2 | ||||
| #if !COMPILE_TEMPLATE_MMXEXT | ||||
| static av_always_inline void | ||||
| dither_8to16(const uint8_t *srcDither, int rot) | ||||
| { | ||||
| @@ -641,7 +641,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, | ||||
|     "cmp  "#dstw", "#index"     \n\t"\ | ||||
|     " jb       1b               \n\t" | ||||
|  | ||||
| #if COMPILE_TEMPLATE_MMX2 | ||||
| #if COMPILE_TEMPLATE_MMXEXT | ||||
| #undef WRITEBGR24 | ||||
| #define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX2(dst, dstw, index) | ||||
| #else | ||||
| @@ -1445,7 +1445,7 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0, | ||||
|     } | ||||
| } | ||||
|  | ||||
| #if COMPILE_TEMPLATE_MMX2 | ||||
| #if COMPILE_TEMPLATE_MMXEXT | ||||
| static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, | ||||
|                                  int dstWidth, const uint8_t *src, | ||||
|                                  int srcW, int xInc) | ||||
| @@ -1627,7 +1627,7 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2, | ||||
|         dst2[i] = src2[srcW-1]*128; | ||||
|     } | ||||
| } | ||||
| #endif /* COMPILE_TEMPLATE_MMX2 */ | ||||
| #endif /* COMPILE_TEMPLATE_MMXEXT */ | ||||
|  | ||||
| static av_cold void RENAME(sws_init_swScale)(SwsContext *c) | ||||
| { | ||||
| @@ -1691,17 +1691,17 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) | ||||
|  | ||||
|     if (c->srcBpc == 8 && c->dstBpc <= 14) { | ||||
|     // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one). | ||||
| #if COMPILE_TEMPLATE_MMX2 | ||||
| #if COMPILE_TEMPLATE_MMXEXT | ||||
|     if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed) | ||||
|     { | ||||
|         c->hyscale_fast = RENAME(hyscale_fast); | ||||
|         c->hcscale_fast = RENAME(hcscale_fast); | ||||
|     } else { | ||||
| #endif /* COMPILE_TEMPLATE_MMX2 */ | ||||
| #endif /* COMPILE_TEMPLATE_MMXEXT */ | ||||
|         c->hyscale_fast = NULL; | ||||
|         c->hcscale_fast = NULL; | ||||
| #if COMPILE_TEMPLATE_MMX2 | ||||
| #if COMPILE_TEMPLATE_MMXEXT | ||||
|     } | ||||
| #endif /* COMPILE_TEMPLATE_MMX2 */ | ||||
| #endif /* COMPILE_TEMPLATE_MMXEXT */ | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -52,20 +52,20 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; | ||||
| //MMX versions | ||||
| #if HAVE_MMX | ||||
| #undef RENAME | ||||
| #undef COMPILE_TEMPLATE_MMX2 | ||||
| #define COMPILE_TEMPLATE_MMX2 0 | ||||
| #undef COMPILE_TEMPLATE_MMXEXT | ||||
| #define COMPILE_TEMPLATE_MMXEXT 0 | ||||
| #define RENAME(a) a ## _MMX | ||||
| #include "yuv2rgb_template.c" | ||||
| #endif /* HAVE_MMX */ | ||||
|  | ||||
| //MMX2 versions | ||||
| #if HAVE_MMX2 | ||||
| #if HAVE_MMXEXT | ||||
| #undef RENAME | ||||
| #undef COMPILE_TEMPLATE_MMX2 | ||||
| #define COMPILE_TEMPLATE_MMX2 1 | ||||
| #undef COMPILE_TEMPLATE_MMXEXT | ||||
| #define COMPILE_TEMPLATE_MMXEXT 1 | ||||
| #define RENAME(a) a ## _MMX2 | ||||
| #include "yuv2rgb_template.c" | ||||
| #endif /* HAVE_MMX2 */ | ||||
| #endif /* HAVE_MMXEXT */ | ||||
|  | ||||
| #endif /* HAVE_INLINE_ASM */ | ||||
|  | ||||
| @@ -74,8 +74,8 @@ av_cold SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c) | ||||
| #if HAVE_INLINE_ASM | ||||
|     int cpu_flags = av_get_cpu_flags(); | ||||
|  | ||||
| #if HAVE_MMX2 | ||||
|     if (cpu_flags & AV_CPU_FLAG_MMX2) { | ||||
| #if HAVE_MMXEXT | ||||
|     if (cpu_flags & AV_CPU_FLAG_MMXEXT) { | ||||
|         switch (c->dstFormat) { | ||||
|         case PIX_FMT_RGB24:  return yuv420_rgb24_MMX2; | ||||
|         case PIX_FMT_BGR24:  return yuv420_bgr24_MMX2; | ||||
|   | ||||
| @@ -25,7 +25,7 @@ | ||||
| #undef EMMS | ||||
| #undef SFENCE | ||||
|  | ||||
| #if COMPILE_TEMPLATE_MMX2 | ||||
| #if COMPILE_TEMPLATE_MMXEXT | ||||
| #define MOVNTQ "movntq" | ||||
| #define SFENCE "sfence" | ||||
| #else | ||||
| @@ -181,7 +181,7 @@ | ||||
|     "paddusb "GREEN_DITHER"(%4), %%mm2\n\t"      \ | ||||
|     "paddusb "RED_DITHER"(%4),   %%mm1\n\t"      \ | ||||
|  | ||||
| #if !COMPILE_TEMPLATE_MMX2 | ||||
| #if !COMPILE_TEMPLATE_MMXEXT | ||||
| static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[], | ||||
|                                        int srcStride[], | ||||
|                                        int srcSliceY, int srcSliceH, | ||||
| @@ -237,7 +237,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[], | ||||
|     YUV2RGB_OPERANDS | ||||
|     YUV2RGB_ENDFUNC | ||||
| } | ||||
| #endif /* !COMPILE_TEMPLATE_MMX2 */ | ||||
| #endif /* !COMPILE_TEMPLATE_MMXEXT */ | ||||
|  | ||||
| #define RGB_PACK24(blue, red)\ | ||||
|     "packuswb  %%mm3,      %%mm0 \n" /* R0 R2 R4 R6 R1 R3 R5 R7 */\ | ||||
| @@ -254,7 +254,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[], | ||||
|     "punpckhwd %%mm6,      %%mm5 \n" /* R4 G4 B4 R5 R6 G6 B6 R7 */\ | ||||
|     RGB_PACK24_B | ||||
|  | ||||
| #if COMPILE_TEMPLATE_MMX2 | ||||
| #if COMPILE_TEMPLATE_MMXEXT | ||||
| DECLARE_ASM_CONST(8, int16_t, mask1101[4]) = {-1,-1, 0,-1}; | ||||
| DECLARE_ASM_CONST(8, int16_t, mask0010[4]) = { 0, 0,-1, 0}; | ||||
| DECLARE_ASM_CONST(8, int16_t, mask0110[4]) = { 0,-1,-1, 0}; | ||||
| @@ -361,7 +361,7 @@ static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[], | ||||
|     MOVNTQ "   %%mm5,       16(%1)\n\t"      \ | ||||
|     MOVNTQ "   %%mm"alpha", 24(%1)\n\t"      \ | ||||
|  | ||||
| #if !COMPILE_TEMPLATE_MMX2 | ||||
| #if !COMPILE_TEMPLATE_MMXEXT | ||||
| static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[], | ||||
|                                        int srcStride[], | ||||
|                                        int srcSliceY, int srcSliceH, | ||||
| @@ -448,4 +448,4 @@ static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[], | ||||
| } | ||||
| #endif | ||||
|  | ||||
| #endif /* !COMPILE_TEMPLATE_MMX2 */ | ||||
| #endif /* !COMPILE_TEMPLATE_MMXEXT */ | ||||
|   | ||||
| @@ -1,3 +1,18 @@ | ||||
| FATE_LAGARITH += fate-lagarith-rgb24 | ||||
| fate-lagarith-rgb24: CMD = framecrc -i $(SAMPLES)/lagarith/lag-rgb24.avi | ||||
|  | ||||
| FATE_LAGARITH += fate-lagarith-rgb32 | ||||
| fate-lagarith-rgb32: CMD = framecrc -i $(SAMPLES)/lagarith/lag-rgb32.avi -pix_fmt bgra | ||||
|  | ||||
| FATE_LAGARITH += fate-lagarith-yuy2 | ||||
| fate-lagarith-yuy2: CMD = framecrc -i $(SAMPLES)/lagarith/lag-yuy2.avi | ||||
|  | ||||
| FATE_LAGARITH += fate-lagarith-yv12 | ||||
| fate-lagarith-yv12: CMD = framecrc -i $(SAMPLES)/lagarith/lag-yv12.avi | ||||
|  | ||||
| FATE_SAMPLES_AVCONV += $(FATE_LAGARITH) | ||||
| fate-lagarith: $(FATE_LAGARITH) | ||||
|  | ||||
| FATE_LOCO += fate-loco-rgb | ||||
| fate-loco-rgb: CMD = framecrc -i $(SAMPLES)/loco/pig-loco-rgb.avi | ||||
|  | ||||
|   | ||||
							
								
								
									
										5
									
								
								tests/ref/fate/lagarith-rgb24
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								tests/ref/fate/lagarith-rgb24
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,5 @@ | ||||
| #tb 0: 100/2997 | ||||
| 0,          0,          0,        1,   368640, 0x26f74db2 | ||||
| 0,          1,          1,        1,   368640, 0x63b29ea4 | ||||
| 0,          2,          2,        1,   368640, 0x19467f03 | ||||
| 0,          3,          3,        1,   368640, 0x5fdc3575 | ||||
							
								
								
									
										26
									
								
								tests/ref/fate/lagarith-rgb32
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								tests/ref/fate/lagarith-rgb32
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,26 @@ | ||||
| #tb 0: 1001/24000 | ||||
| 0,          0,          0,        1,  1382400, 0x00000000 | ||||
| 0,          1,          1,        1,  1382400, 0x00000000 | ||||
| 0,          2,          2,        1,  1382400, 0x00000000 | ||||
| 0,          3,          3,        1,  1382400, 0x00000000 | ||||
| 0,          4,          4,        1,  1382400, 0x00000000 | ||||
| 0,          5,          5,        1,  1382400, 0xf95bde46 | ||||
| 0,          6,          6,        1,  1382400, 0x4f4c0393 | ||||
| 0,          7,          7,        1,  1382400, 0xe5aa40db | ||||
| 0,          8,          8,        1,  1382400, 0xc25a8ba2 | ||||
| 0,          9,          9,        1,  1382400, 0x9db3150d | ||||
| 0,         10,         10,        1,  1382400, 0x730e64b3 | ||||
| 0,         11,         11,        1,  1382400, 0xf8fd7edf | ||||
| 0,         12,         12,        1,  1382400, 0x0114798a | ||||
| 0,         13,         13,        1,  1382400, 0x7571210f | ||||
| 0,         14,         14,        1,  1382400, 0x552ae59d | ||||
| 0,         15,         15,        1,  1382400, 0x7ae0c946 | ||||
| 0,         16,         16,        1,  1382400, 0x0818c3ef | ||||
| 0,         17,         17,        1,  1382400, 0x8257cac4 | ||||
| 0,         18,         18,        1,  1382400, 0x7762a979 | ||||
| 0,         19,         19,        1,  1382400, 0x282af57a | ||||
| 0,         20,         20,        1,  1382400, 0x3f42de50 | ||||
| 0,         21,         21,        1,  1382400, 0xc42d5f93 | ||||
| 0,         22,         22,        1,  1382400, 0x18775c90 | ||||
| 0,         23,         23,        1,  1382400, 0x34befa90 | ||||
| 0,         24,         24,        1,  1382400, 0xd33d5f53 | ||||
							
								
								
									
										2
									
								
								tests/ref/fate/lagarith-yuy2
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								tests/ref/fate/lagarith-yuy2
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,2 @@ | ||||
| #tb 0: 1/10 | ||||
| 0,          0,          0,        1,  1572864, 0xeed76a7d | ||||
							
								
								
									
										3
									
								
								tests/ref/fate/lagarith-yv12
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								tests/ref/fate/lagarith-yv12
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | ||||
| #tb 0: 1/60 | ||||
| 0,          0,          0,        1,    92160, 0x1dfdf5c1 | ||||
| 0,          1,          1,        1,    92160, 0x6965884f | ||||
		Reference in New Issue
	
	Block a user