1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-02-09 14:14:39 +02:00

x86: dsputil: prettyprint gcc inline asm

This commit is contained in:
Diego Biurrun 2012-03-16 18:42:01 +01:00
parent 3b54912113
commit 62ce9defb8

View File

@ -261,7 +261,8 @@ void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels,
"movq %%mm2, (%0, %1) \n\t" "movq %%mm2, (%0, %1) \n\t"
"movq %%mm4, (%0, %1, 2) \n\t" "movq %%mm4, (%0, %1, 2) \n\t"
"movq %%mm6, (%0, %2) \n\t" "movq %%mm6, (%0, %2) \n\t"
::"r" (pix), "r" ((x86_reg)line_size), "r" ((x86_reg)line_size*3), "m"(*p) :: "r"(pix), "r"((x86_reg)line_size), "r"((x86_reg)line_size * 3),
"m"(*p)
: "memory"); : "memory");
pix += line_size * 4; pix += line_size * 4;
p += 32; p += 32;
@ -706,7 +707,6 @@ static void h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale)
const int strength = ff_h263_loop_filter_strength[qscale]; const int strength = ff_h263_loop_filter_strength[qscale];
__asm__ volatile ( __asm__ volatile (
H263_LOOP_FILTER H263_LOOP_FILTER
"movq %%mm3, %1 \n\t" "movq %%mm3, %1 \n\t"
@ -844,7 +844,8 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
"cmp %4, %0 \n\t" "cmp %4, %0 \n\t"
"jb 1b \n\t" "jb 1b \n\t"
: "+r"(ptr) : "+r"(ptr)
: "r" ((x86_reg)buf - (x86_reg)ptr - w), "r" ((x86_reg)-wrap), "r" ((x86_reg)-wrap*3), "r" (ptr+width+2*w) : "r"((x86_reg)buf - (x86_reg)ptr - w), "r"((x86_reg) -wrap),
"r"((x86_reg) -wrap * 3), "r"(ptr + width + 2 * w)
); );
} }
} }
@ -863,13 +864,16 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
"cmp %4, %0 \n\t" "cmp %4, %0 \n\t"
"jb 1b \n\t" "jb 1b \n\t"
: "+r"(ptr) : "+r"(ptr)
: "r" ((x86_reg)last_line - (x86_reg)ptr - w), "r" ((x86_reg)wrap), "r" ((x86_reg)wrap*3), "r" (ptr+width+2*w) : "r"((x86_reg)last_line - (x86_reg)ptr - w),
"r"((x86_reg)wrap), "r"((x86_reg)wrap * 3),
"r"(ptr + width + 2 * w)
); );
} }
} }
} }
#define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\ #define QPEL_V_LOW(m3, m4, m5, m6, pw_20, pw_3, rnd, \
in0, in1, in2, in7, out, OP) \
"paddw "#m4", "#m3" \n\t" /* x1 */ \ "paddw "#m4", "#m3" \n\t" /* x1 */ \
"movq "MANGLE(ff_pw_20)", %%mm4 \n\t" /* 20 */ \ "movq "MANGLE(ff_pw_20)", %%mm4 \n\t" /* 20 */ \
"pmullw "#m3", %%mm4 \n\t" /* 20x1 */ \ "pmullw "#m3", %%mm4 \n\t" /* 20x1 */ \
@ -986,7 +990,8 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, \
"paddw %6, %%mm0 \n\t" \ "paddw %6, %%mm0 \n\t" \
"paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */ \ "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */ \
"psraw $5, %%mm0 \n\t" \ "psraw $5, %%mm0 \n\t" \
/* mm1=KLMN, mm2=JKLM, mm3=MNOP, mm4=LMNO, mm5=NOPQ mm7=0 */\ /* mm1 = KLMN, mm2 = JKLM, mm3 = MNOP, */ \
/* mm4 = LMNO, mm5 = NOPQ mm7 = 0 */ \
\ \
"paddw %%mm5, %%mm3 \n\t" /* a */ \ "paddw %%mm5, %%mm3 \n\t" /* a */ \
"pshufw $0xF9, %%mm5, %%mm6 \n\t" /* 0O0P0Q0Q */ \ "pshufw $0xF9, %%mm5, %%mm6 \n\t" /* 0O0P0Q0Q */ \
@ -1011,7 +1016,8 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, \
"decl %2 \n\t" \ "decl %2 \n\t" \
"jnz 1b \n\t" \ "jnz 1b \n\t" \
: "+a"(src), "+c"(dst), "+D"(h) \ : "+a"(src), "+c"(dst), "+D"(h) \
: "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\ : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), \
/* "m"(ff_pw_20), "m"(ff_pw_3), */ "m"(temp), "m"(ROUNDER) \
: "memory" \ : "memory" \
); \ ); \
} \ } \
@ -1147,7 +1153,8 @@ static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, \
"decl %2 \n\t" \ "decl %2 \n\t" \
"jnz 1b \n\t" \ "jnz 1b \n\t" \
: "+a"(src), "+c"(dst), "+d"(h) \ : "+a"(src), "+c"(dst), "+d"(h) \
: "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER)\ : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), \
/* "m"(ff_pw_20), "m"(ff_pw_3), */ "m"(ROUNDER) \
: "memory" \ : "memory" \
); \ ); \
} \ } \
@ -1273,7 +1280,9 @@ static void OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(uint8_t *dst, \
"jnz 1b \n\t" \ "jnz 1b \n\t" \
\ \
: "+r"(temp_ptr), "+r"(dst), "+g"(count) \ : "+r"(temp_ptr), "+r"(dst), "+g"(count) \
: "r"((x86_reg)dstStride), "r"(2*(x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*(x86_reg)dstStride)\ : "r"((x86_reg)dstStride), "r"(2 * (x86_reg)dstStride), \
/* "m"(ff_pw_20), "m"(ff_pw_3), */ "m"(ROUNDER), \
"g"(4 - 14 * (x86_reg)dstStride) \
: "memory" \ : "memory" \
); \ ); \
} \ } \
@ -1337,7 +1346,9 @@ static void OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(uint8_t *dst, \
"jnz 1b \n\t" \ "jnz 1b \n\t" \
\ \
: "+r"(temp_ptr), "+r"(dst), "+g"(count) \ : "+r"(temp_ptr), "+r"(dst), "+g"(count) \
: "r"((x86_reg)dstStride), "r"(2*(x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*(x86_reg)dstStride)\ : "r"((x86_reg)dstStride), "r"(2 * (x86_reg)dstStride), \
/* "m"(ff_pw_20), "m"(ff_pw_3), */ "m"(ROUNDER), \
"g"(4 - 6 * (x86_reg)dstStride) \
: "memory" \ : "memory" \
); \ ); \
} \ } \
@ -1696,11 +1707,14 @@ static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \
OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16); \ OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16); \
} }
#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t" #define PUT_OP(a, b, temp, size) \
"mov"#size" "#a", "#b" \n\t"
#define AVG_3DNOW_OP(a, b, temp, size) \ #define AVG_3DNOW_OP(a, b, temp, size) \
"mov"#size" "#b", "#temp" \n\t" \ "mov"#size" "#b", "#temp" \n\t" \
"pavgusb "#temp", "#a" \n\t" \ "pavgusb "#temp", "#a" \n\t" \
"mov"#size" "#a", "#b" \n\t" "mov"#size" "#a", "#b" \n\t"
#define AVG_MMX2_OP(a, b, temp, size) \ #define AVG_MMX2_OP(a, b, temp, size) \
"mov"#size" "#b", "#temp" \n\t" \ "mov"#size" "#b", "#temp" \n\t" \
"pavgb "#temp", "#a" \n\t" \ "pavgb "#temp", "#a" \n\t" \
@ -2271,7 +2285,8 @@ static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
"add $16, %0 \n" \ "add $16, %0 \n" \
"jl 1b \n" \ "jl 1b \n" \
: "+&r"(i), "=&r"(j), "=&r"(k) \ : "+&r"(i), "=&r"(j), "=&r"(k) \
:"r"(samples[0]+len), "r"(matrix_simd+in_ch), "g"((intptr_t)-32*(in_ch-1))\ : "r"(samples[0] + len), "r"(matrix_simd + in_ch), \
"g"((intptr_t) - 32 * (in_ch - 1)) \
: "memory" \ : "memory" \
); );
@ -2517,10 +2532,10 @@ static void vector_clipf_sse(float *dst, const float *src,
{ {
x86_reg i = (len - 16) * 4; x86_reg i = (len - 16) * 4;
__asm__ volatile ( __asm__ volatile (
"movss %3, %%xmm4 \n" "movss %3, %%xmm4 \n\t"
"movss %4, %%xmm5 \n" "movss %4, %%xmm5 \n\t"
"shufps $0, %%xmm4, %%xmm4 \n" "shufps $0, %%xmm4, %%xmm4 \n\t"
"shufps $0, %%xmm5, %%xmm5 \n" "shufps $0, %%xmm5, %%xmm5 \n\t"
"1: \n\t" "1: \n\t"
"movaps (%2, %0), %%xmm0 \n\t" // 3/1 on intel "movaps (%2, %0), %%xmm0 \n\t" // 3/1 on intel
"movaps 16(%2, %0), %%xmm1 \n\t" "movaps 16(%2, %0), %%xmm1 \n\t"