From 823674751196e382c1d6334b8c92839f95d0ba9e Mon Sep 17 00:00:00 2001 From: Matt Oliver Date: Tue, 18 Mar 2014 15:53:26 +1100 Subject: [PATCH] Automatically change MANGLE() into named inline asm operands when direct symbol reference in inline asm are not supported. This is part of the patch-set for intel C inline asm on windows support Signed-off-by: Michael Niedermayer --- configure | 3 +++ libavcodec/x86/cabac.h | 3 ++- libavcodec/x86/cavsdsp.c | 2 ++ libavcodec/x86/dsputil_mmx.c | 1 + libavcodec/x86/h264_i386.h | 2 ++ libavcodec/x86/idct_sse2_xvid.c | 2 +- libavcodec/x86/lpc.c | 3 +++ libavcodec/x86/motion_est.c | 3 ++- libavcodec/x86/simple_idct.c | 1 + libavcodec/x86/vc1dsp_mmx.c | 6 +++++ libavutil/x86/asm.h | 36 +++++++++++++++++++++++++++++- libpostproc/postprocess_template.c | 7 ++++++ libswresample/x86/resample_mmx.h | 2 ++ libswscale/x86/rgb2rgb_template.c | 11 +++++++++ libswscale/x86/swscale_template.c | 12 ++++++++++ libswscale/x86/yuv2rgb_template.c | 9 ++++++++ 16 files changed, 99 insertions(+), 4 deletions(-) diff --git a/configure b/configure index 3939cfa63e..b41e0dc8d5 100755 --- a/configure +++ b/configure @@ -1691,6 +1691,7 @@ TOOLCHAIN_FEATURES=" ibm_asm inline_asm_labels inline_asm_nonlocal_labels + inline_asm_direct_symbol_refs pragma_deprecated rsync_contimeout symver_asm_label @@ -4306,6 +4307,8 @@ EOF # check whether xmm clobbers are supported check_inline_asm xmm_clobbers '"":::"%xmm0"' + check_inline_asm inline_asm_direct_symbol_refs '"movl test, %eax"' + # check whether binutils is new enough to compile SSSE3/MMXEXT enabled ssse3 && check_inline_asm ssse3_inline '"pabsw %xmm0, %xmm0"' enabled mmxext && check_inline_asm mmxext_inline '"pmaxub %mm0, %mm1"' diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h index 3a82c1e4a4..0a68b7b7ef 100644 --- a/libavcodec/x86/cabac.h +++ b/libavcodec/x86/cabac.h @@ -110,7 +110,7 @@ "2: \n\t" #else /* BROKEN_RELOCATIONS */ -#define TABLES_ARG +#define TABLES_ARG NAMED_CONSTRAINTS_ADD(ff_h264_cabac_tables) #define RIP_ARG #if HAVE_FAST_CMOV @@ -184,6 +184,7 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c, __asm__ volatile( "lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t" : "=&r"(tables) + : NAMED_CONSTRAINTS(ff_h264_cabac_tables) ); #endif diff --git a/libavcodec/x86/cavsdsp.c b/libavcodec/x86/cavsdsp.c index aaa09d1784..805e120db3 100644 --- a/libavcodec/x86/cavsdsp.c +++ b/libavcodec/x86/cavsdsp.c @@ -309,6 +309,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) \ : "+a"(src), "+c"(dst)\ : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1)\ + NAMED_CONSTRAINTS_ADD(MUL2)\ : "memory"\ );\ if(h==16){\ @@ -324,6 +325,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) \ : "+a"(src), "+c"(dst)\ : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1)\ + NAMED_CONSTRAINTS_ADD(MUL2)\ : "memory"\ );\ }\ diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 47835acf2b..420a4ef8b5 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -123,6 +123,7 @@ void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, put_signed_pixels_clamped_mmx_half(64) : "+&r" (pixels), "=&r" (line_skip3) : "r" (block), "r" (line_skip) + NAMED_CONSTRAINTS_ADD(ff_pb_80) : "memory"); } diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h index 0dc0a7cb0f..9d811f07b7 100644 --- a/libavcodec/x86/h264_i386.h +++ b/libavcodec/x86/h264_i386.h @@ -55,6 +55,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, __asm__ volatile( "lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t" : "=&r"(tables) + : NAMED_CONSTRAINTS(ff_h264_cabac_tables) ); #endif @@ -130,6 +131,7 @@ static int decode_significance_8x8_x86(CABACContext *c, __asm__ volatile( "lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t" : "=&r"(tables) + : NAMED_CONSTRAINTS(ff_h264_cabac_tables) ); #endif diff --git a/libavcodec/x86/idct_sse2_xvid.c b/libavcodec/x86/idct_sse2_xvid.c index d9bc841825..e1878fa93d 100644 --- a/libavcodec/x86/idct_sse2_xvid.c +++ b/libavcodec/x86/idct_sse2_xvid.c @@ -381,7 +381,7 @@ inline void ff_idct_xvid_sse2(short *block) iLLM_PASS("%0") "6: \n\t" : "+r"(block) - : + : NAMED_CONSTRAINTS(m127,iTab1,walkenIdctRounders,iTab2,iTab3,iTab4,tan3,tan1,tan2,sqrt2) : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm6" , "%xmm7" ,) #if ARCH_X86_64 diff --git a/libavcodec/x86/lpc.c b/libavcodec/x86/lpc.c index 9682733096..bff26359f4 100644 --- a/libavcodec/x86/lpc.c +++ b/libavcodec/x86/lpc.c @@ -72,6 +72,7 @@ static void lpc_apply_welch_window_sse2(const int32_t *data, int len, "3: \n\t" :"+&r"(i), "+&r"(j) :"r"(w_data+n2), "r"(data+n2), "m"(c), "r"(len) + NAMED_CONSTRAINTS_ADD(pd_1,pd_2) XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm5", "%xmm6", "%xmm7") ); @@ -116,6 +117,7 @@ static void lpc_compute_autocorr_sse2(const double *data, int len, int lag, "movsd %%xmm2, 16(%1) \n\t" :"+&r"(i) :"r"(autoc+j), "r"(data+len), "r"(data+len-j) + NAMED_CONSTRAINTS_ADD(pd_1) :"memory" ); } else { @@ -139,6 +141,7 @@ static void lpc_compute_autocorr_sse2(const double *data, int len, int lag, "movsd %%xmm1, %2 \n\t" :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]) :"r"(data+len), "r"(data+len-j) + NAMED_CONSTRAINTS_ADD(pd_1) ); } } diff --git a/libavcodec/x86/motion_est.c b/libavcodec/x86/motion_est.c index 2b11cf95d3..3df471ed78 100644 --- a/libavcodec/x86/motion_est.c +++ b/libavcodec/x86/motion_est.c @@ -193,7 +193,8 @@ static inline void sad8_4_mmxext(uint8_t *blk1, uint8_t *blk2, "sub $2, %0 \n\t" " jg 1b \n\t" : "+r" (h), "+r" (blk1), "+r" (blk2) - : "r" ((x86_reg) stride)); + : "r" ((x86_reg) stride) + NAMED_CONSTRAINTS_ADD(bone)); } static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, diff --git a/libavcodec/x86/simple_idct.c b/libavcodec/x86/simple_idct.c index e10dc7ba09..3ae30f3488 100644 --- a/libavcodec/x86/simple_idct.c +++ b/libavcodec/x86/simple_idct.c @@ -1143,6 +1143,7 @@ Temp "9: \n\t" :: "r" (block), "r" (temp), "r" (coeffs) + NAMED_CONSTRAINTS_ADD(wm1010,d40000) : "%eax" ); } diff --git a/libavcodec/x86/vc1dsp_mmx.c b/libavcodec/x86/vc1dsp_mmx.c index 5ceacd348e..942aa42b45 100644 --- a/libavcodec/x86/vc1dsp_mmx.c +++ b/libavcodec/x86/vc1dsp_mmx.c @@ -110,6 +110,7 @@ static void vc1_put_ver_16b_shift2_mmx(int16_t *dst, : "+r"(src), "+r"(dst) : "r"(stride), "r"(-2*stride), "m"(shift), "m"(rnd), "r"(9*stride-4) + NAMED_CONSTRAINTS_ADD(ff_pw_9) : "%"REG_c, "memory" ); } @@ -154,6 +155,7 @@ static void OPNAME ## vc1_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride,\ "jnz 1b \n\t"\ : "+r"(h), "+r" (src), "+r" (dst)\ : "r"(stride), "m"(rnd)\ + NAMED_CONSTRAINTS_ADD(ff_pw_128,ff_pw_9)\ : "memory"\ );\ } @@ -212,6 +214,7 @@ static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\ : "+r"(src), "+r"(dst)\ : "r"(offset), "r"(-2*offset), "g"(stride), "m"(rnd),\ "g"(stride-offset)\ + NAMED_CONSTRAINTS_ADD(ff_pw_9)\ : "%"REG_c, "memory"\ );\ } @@ -314,6 +317,7 @@ vc1_put_ver_16b_ ## NAME ## _mmx(int16_t *dst, const uint8_t *src, \ : "+r"(h), "+r" (src), "+r" (dst) \ : "r"(src_stride), "r"(3*src_stride), \ "m"(rnd), "m"(shift) \ + NAMED_CONSTRAINTS_ADD(ff_pw_3,ff_pw_53,ff_pw_18) \ : "memory" \ ); \ } @@ -351,6 +355,7 @@ OPNAME ## vc1_hor_16b_ ## NAME ## _mmx(uint8_t *dst, x86_reg stride, \ "jnz 1b \n\t" \ : "+r"(h), "+r" (src), "+r" (dst) \ : "r"(stride), "m"(rnd) \ + NAMED_CONSTRAINTS_ADD(ff_pw_3,ff_pw_18,ff_pw_53,ff_pw_128) \ : "memory" \ ); \ } @@ -386,6 +391,7 @@ OPNAME ## vc1_## NAME ## _mmx(uint8_t *dst, const uint8_t *src, \ "jnz 1b \n\t" \ : "+r"(h), "+r" (src), "+r" (dst) \ : "r"(offset), "r"(3*offset), "g"(stride), "m"(rnd) \ + NAMED_CONSTRAINTS_ADD(ff_pw_53,ff_pw_18,ff_pw_3) \ : "memory" \ ); \ } diff --git a/libavutil/x86/asm.h b/libavutil/x86/asm.h index 70ccac7885..5328e2bfc9 100644 --- a/libavutil/x86/asm.h +++ b/libavutil/x86/asm.h @@ -107,6 +107,40 @@ typedef int x86_reg; # define LOCAL_MANGLE(a) #a #endif -#define MANGLE(a) EXTERN_PREFIX LOCAL_MANGLE(a) +#if HAVE_INLINE_ASM_DIRECT_SYMBOL_REFS +# define MANGLE(a) EXTERN_PREFIX LOCAL_MANGLE(a) +# define NAMED_CONSTRAINTS_ADD(...) +# define NAMED_CONSTRAINTS(...) +#else + /* When direct symbol references are used in code passed to a compiler that does not support them + * then these references need to be converted to named asm constraints instead. + * Instead of returning a direct symbol MANGLE now returns a named constraint for that specific symbol. + * In order for this to work there must also be a corresponding entry in the asm-interface. To add this + * entry use the macro NAMED_CONSTRAINTS() and pass in a list of each symbol reference used in the + * corresponding block of code. (e.g. NAMED_CONSTRAINTS(var1,var2,var3) where var1 is the first symbol etc. ). + * If there are already existing constraints then use NAMED_CONSTRAINTS_ADD to add to the existing constraint list. + */ +# define MANGLE(a) "%["#a"]" + // Intel/MSVC does not correctly expand va-args so we need a rather ugly hack in order to get it to work +# define FE_0(P,X) P(X) +# define FE_1(P,X,X1) P(X), FE_0(P,X1) +# define FE_2(P,X,X1,X2) P(X), FE_1(P,X1,X2) +# define FE_3(P,X,X1,X2,X3) P(X), FE_2(P,X1,X2,X3) +# define FE_4(P,X,X1,X2,X3,X4) P(X), FE_3(P,X1,X2,X3,X4) +# define FE_5(P,X,X1,X2,X3,X4,X5) P(X), FE_4(P,X1,X2,X3,X4,X5) +# define FE_6(P,X,X1,X2,X3,X4,X5,X6) P(X), FE_5(P,X1,X2,X3,X4,X5,X6) +# define FE_7(P,X,X1,X2,X3,X4,X5,X6,X7) P(X), FE_6(P,X1,X2,X3,X4,X5,X6,X7) +# define FE_8(P,X,X1,X2,X3,X4,X5,X6,X7,X8) P(X), FE_7(P,X1,X2,X3,X4,X5,X6,X7,X8) +# define FE_9(P,X,X1,X2,X3,X4,X5,X6,X7,X8,X9) P(X), FE_8(P,X1,X2,X3,X4,X5,X6,X7,X8,X9) +# define GET_FE_IMPL(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9,NAME,...) NAME +# define GET_FE(A) GET_FE_IMPL A +# define GET_FE_GLUE(x, y) x y +# define FOR_EACH_VA(P,...) GET_FE_GLUE(GET_FE((__VA_ARGS__,FE_9,FE_8,FE_7,FE_6,FE_5,FE_4,FE_3,FE_2,FE_1,FE_0)), (P,__VA_ARGS__)) +# define NAME_CONSTRAINT(x) [x] "m"(x) + // Parameters are a list of each symbol reference required +# define NAMED_CONSTRAINTS_ADD(...) , FOR_EACH_VA(NAME_CONSTRAINT,__VA_ARGS__) + // Same but without comma for when there are no previously defined constraints +# define NAMED_CONSTRAINTS(...) FOR_EACH_VA(NAME_CONSTRAINT,__VA_ARGS__) +#endif #endif /* AVUTIL_X86_ASM_H */ diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c index 74b0ab4c46..3991ae9472 100644 --- a/libpostproc/postprocess_template.c +++ b/libpostproc/postprocess_template.c @@ -490,6 +490,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) : : "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb) + NAMED_CONSTRAINTS_ADD(b01) : "%"REG_a, "%"REG_c ); #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW @@ -755,6 +756,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb) + NAMED_CONSTRAINTS_ADD(b80,b00,b01) : "%"REG_a, "%"REG_c ); @@ -1042,6 +1044,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext : "+r" (src) : "r" ((x86_reg)stride), "m" (c->pQPb), "r"(tmp) + NAMED_CONSTRAINTS_ADD(w05,w20) : "%"REG_a ); #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW @@ -1313,6 +1316,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1, "1: \n\t" : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2), "q"(tmp) + NAMED_CONSTRAINTS_ADD(deringThreshold,b00,b02,b08) : "%"REG_a, "%"REG_d, "%"REG_SP ); #else // HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) @@ -2446,6 +2450,7 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc)) "4: \n\t" :: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast) + NAMED_CONSTRAINTS_ADD(b80) : "%"REG_a, "%"REG_d, "%"REG_c, "memory" ); #else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW @@ -2790,6 +2795,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st : "+&r"(src) : "r" ((x86_reg)step), "m" (c->pQPb), "r"(sums), "g"(src) + NAMED_CONSTRAINTS_ADD(w04) ); src+= step; // src points to begin of the 8x8 Block @@ -3061,6 +3067,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st : "+r" (temp_src) : "r" ((x86_reg)step), "m" (c->pQPb), "m"(eq_mask), "r"(tmp) + NAMED_CONSTRAINTS_ADD(w05,w20) : "%"REG_a ); } diff --git a/libswresample/x86/resample_mmx.h b/libswresample/x86/resample_mmx.h index fab52f704a..f366cc7f59 100644 --- a/libswresample/x86/resample_mmx.h +++ b/libswresample/x86/resample_mmx.h @@ -46,6 +46,7 @@ __asm__ volatile(\ : "r" (((uint8_t*)(src+sample_index))-len),\ "r" (((uint8_t*)filter)-len),\ "r" (dst+dst_index)\ + NAMED_CONSTRAINTS_ADD(ff_resample_int16_rounder)\ ); #define COMMON_CORE_INT16_SSE2 \ @@ -69,4 +70,5 @@ __asm__ volatile(\ : "r" (((uint8_t*)(src+sample_index))-len),\ "r" (((uint8_t*)filter)-len),\ "r" (dst+dst_index)\ + NAMED_CONSTRAINTS_ADD(ff_resample_int16_rounder)\ ); diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c index 455e7c25a8..b68824dcbe 100644 --- a/libswscale/x86/rgb2rgb_template.c +++ b/libswscale/x86/rgb2rgb_template.c @@ -163,6 +163,7 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int sr "movq %%mm5, %%mm7 \n\t" STORE_BGR24_MMX :: "r"(dest), "r"(s) + NAMED_CONSTRAINTS_ADD(mask24l,mask24h) :"memory"); dest += 24; s += 32; @@ -785,6 +786,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr :"=m"(*d) :"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) + NAMED_CONSTRAINTS_ADD(mul15_mid,mul15_hi) :"memory"); /* borrowed 32 to 24 */ __asm__ volatile( @@ -801,6 +803,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr STORE_BGR24_MMX :: "r"(d), "m"(*s) + NAMED_CONSTRAINTS_ADD(mask24l,mask24h) :"memory"); d += 24; s += 8; @@ -890,6 +893,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr "por %%mm5, %%mm3 \n\t" :"=m"(*d) :"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) + NAMED_CONSTRAINTS_ADD(mul15_mid,mul16_mid,mul15_hi) :"memory"); /* borrowed 32 to 24 */ __asm__ volatile( @@ -906,6 +910,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr STORE_BGR24_MMX :: "r"(d), "m"(*s) + NAMED_CONSTRAINTS_ADD(mask24l,mask24h) :"memory"); d += 24; s += 8; @@ -966,6 +971,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" PACK_RGB32 ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r) ,"m"(mul15_mid) + NAMED_CONSTRAINTS_ADD(mul15_hi) :"memory"); d += 16; s += 4; @@ -1009,6 +1015,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" PACK_RGB32 ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid) + NAMED_CONSTRAINTS_ADD(mul16_mid,mul15_hi) :"memory"); d += 16; s += 4; @@ -1133,6 +1140,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int sr "2: \n\t" : "+a" (mmx_size) : "r" (src-mmx_size), "r"(dst-mmx_size) + NAMED_CONSTRAINTS_ADD(mask24r,mask24g,mask24b) ); __asm__ volatile(SFENCE:::"memory"); @@ -1468,6 +1476,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid :: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ), "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2), "g" (-mmxSize) + NAMED_CONSTRAINTS_ADD(mmx_ff) : "%"REG_a ); @@ -1689,6 +1698,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "add $8, %%"REG_a" \n\t" " js 1b \n\t" : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width), "r"(rgb2yuv) + NAMED_CONSTRAINTS_ADD(ff_w1111,ff_bgr2YOffset) : "%"REG_a, "%"REG_d ); ydst += lumStride; @@ -1837,6 +1847,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "add $4, %%"REG_a" \n\t" " js 1b \n\t" : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth), "r"(rgb2yuv) + NAMED_CONSTRAINTS_ADD(ff_w1111,ff_bgr2UVOffset) : "%"REG_a, "%"REG_d ); diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index c7a1bb46d9..71a60bc738 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -172,6 +172,7 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize, :: "r" (&c->redDither), \ "m" (dummy), "m" (dummy), "m" (dummy),\ "r" (dest), "m" (dstW_reg), "m"(uv_off) \ + NAMED_CONSTRAINTS_ADD(bF8,bFC) \ : "%"REG_a, "%"REG_d, "%"REG_S \ ); @@ -680,6 +681,7 @@ static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter, :: "r" (&c->redDither), "m" (dummy), "m" (dummy), "m" (dummy), "r" (dest), "m" (dstW_reg), "m"(uv_off) + NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B) : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S ); } @@ -704,6 +706,7 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter, :: "r" (&c->redDither), "m" (dummy), "m" (dummy), "m" (dummy), "r" (dest), "m" (dstW_reg), "m"(uv_off) + NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B) : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S ); } @@ -931,6 +934,7 @@ static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2], "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) + NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B) ); } @@ -960,6 +964,7 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2], "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) + NAMED_CONSTRAINTS_ADD(bF8) ); } @@ -989,6 +994,7 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2], "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) + NAMED_CONSTRAINTS_ADD(bF8,bFC) ); } @@ -1262,6 +1268,7 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0, "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) + NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B) ); } else { const int16_t *ubuf1 = ubuf[1]; @@ -1276,6 +1283,7 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0, "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) + NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B) ); } } @@ -1307,6 +1315,7 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) + NAMED_CONSTRAINTS_ADD(bF8) ); } else { const int16_t *ubuf1 = ubuf[1]; @@ -1327,6 +1336,7 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) + NAMED_CONSTRAINTS_ADD(bF8) ); } } @@ -1358,6 +1368,7 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) + NAMED_CONSTRAINTS_ADD(bF8,bFC) ); } else { const int16_t *ubuf1 = ubuf[1]; @@ -1378,6 +1389,7 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, "mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), "a" (&c->redDither) + NAMED_CONSTRAINTS_ADD(bF8,bFC) ); } } diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c index c879102cc4..d29e3a424d 100644 --- a/libswscale/x86/yuv2rgb_template.c +++ b/libswscale/x86/yuv2rgb_template.c @@ -134,10 +134,18 @@ "add $4, %0\n\t" \ "js 1b\n\t" \ +#if COMPILE_TEMPLATE_MMXEXT +#define RGB_PACK24_B_OPERANDS NAMED_CONSTRAINTS_ADD(mask1101,mask0110,mask0100,mask0010,mask1001) +#else +#define RGB_PACK24_B_OPERANDS +#endif + #define YUV2RGB_OPERANDS \ : "+r" (index), "+r" (image) \ : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \ "r" (py - 2*index) \ + NAMED_CONSTRAINTS_ADD(mmx_00ffw,pb_03,pb_07,mmx_redmask,pb_e0) \ + RGB_PACK24_B_OPERANDS \ : "memory" \ ); \ } \ @@ -146,6 +154,7 @@ : "+r" (index), "+r" (image) \ : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \ "r" (py - 2*index), "r" (pa - 2*index) \ + NAMED_CONSTRAINTS_ADD(mmx_00ffw) \ : "memory" \ ); \ } \