mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-02-09 14:14:39 +02:00
x86: dsputil: prettyprint gcc inline asm
This commit is contained in:
parent
3b54912113
commit
62ce9defb8
@ -261,7 +261,8 @@ void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels,
|
|||||||
"movq %%mm2, (%0, %1) \n\t"
|
"movq %%mm2, (%0, %1) \n\t"
|
||||||
"movq %%mm4, (%0, %1, 2) \n\t"
|
"movq %%mm4, (%0, %1, 2) \n\t"
|
||||||
"movq %%mm6, (%0, %2) \n\t"
|
"movq %%mm6, (%0, %2) \n\t"
|
||||||
::"r" (pix), "r" ((x86_reg)line_size), "r" ((x86_reg)line_size*3), "m"(*p)
|
:: "r"(pix), "r"((x86_reg)line_size), "r"((x86_reg)line_size * 3),
|
||||||
|
"m"(*p)
|
||||||
: "memory");
|
: "memory");
|
||||||
pix += line_size * 4;
|
pix += line_size * 4;
|
||||||
p += 32;
|
p += 32;
|
||||||
@ -706,7 +707,6 @@ static void h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale)
|
|||||||
const int strength = ff_h263_loop_filter_strength[qscale];
|
const int strength = ff_h263_loop_filter_strength[qscale];
|
||||||
|
|
||||||
__asm__ volatile (
|
__asm__ volatile (
|
||||||
|
|
||||||
H263_LOOP_FILTER
|
H263_LOOP_FILTER
|
||||||
|
|
||||||
"movq %%mm3, %1 \n\t"
|
"movq %%mm3, %1 \n\t"
|
||||||
@ -844,7 +844,8 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
|
|||||||
"cmp %4, %0 \n\t"
|
"cmp %4, %0 \n\t"
|
||||||
"jb 1b \n\t"
|
"jb 1b \n\t"
|
||||||
: "+r"(ptr)
|
: "+r"(ptr)
|
||||||
: "r" ((x86_reg)buf - (x86_reg)ptr - w), "r" ((x86_reg)-wrap), "r" ((x86_reg)-wrap*3), "r" (ptr+width+2*w)
|
: "r"((x86_reg)buf - (x86_reg)ptr - w), "r"((x86_reg) -wrap),
|
||||||
|
"r"((x86_reg) -wrap * 3), "r"(ptr + width + 2 * w)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -863,13 +864,16 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
|
|||||||
"cmp %4, %0 \n\t"
|
"cmp %4, %0 \n\t"
|
||||||
"jb 1b \n\t"
|
"jb 1b \n\t"
|
||||||
: "+r"(ptr)
|
: "+r"(ptr)
|
||||||
: "r" ((x86_reg)last_line - (x86_reg)ptr - w), "r" ((x86_reg)wrap), "r" ((x86_reg)wrap*3), "r" (ptr+width+2*w)
|
: "r"((x86_reg)last_line - (x86_reg)ptr - w),
|
||||||
|
"r"((x86_reg)wrap), "r"((x86_reg)wrap * 3),
|
||||||
|
"r"(ptr + width + 2 * w)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\
|
#define QPEL_V_LOW(m3, m4, m5, m6, pw_20, pw_3, rnd, \
|
||||||
|
in0, in1, in2, in7, out, OP) \
|
||||||
"paddw "#m4", "#m3" \n\t" /* x1 */ \
|
"paddw "#m4", "#m3" \n\t" /* x1 */ \
|
||||||
"movq "MANGLE(ff_pw_20)", %%mm4 \n\t" /* 20 */ \
|
"movq "MANGLE(ff_pw_20)", %%mm4 \n\t" /* 20 */ \
|
||||||
"pmullw "#m3", %%mm4 \n\t" /* 20x1 */ \
|
"pmullw "#m3", %%mm4 \n\t" /* 20x1 */ \
|
||||||
@ -986,7 +990,8 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, \
|
|||||||
"paddw %6, %%mm0 \n\t" \
|
"paddw %6, %%mm0 \n\t" \
|
||||||
"paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */ \
|
"paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */ \
|
||||||
"psraw $5, %%mm0 \n\t" \
|
"psraw $5, %%mm0 \n\t" \
|
||||||
/* mm1=KLMN, mm2=JKLM, mm3=MNOP, mm4=LMNO, mm5=NOPQ mm7=0 */\
|
/* mm1 = KLMN, mm2 = JKLM, mm3 = MNOP, */ \
|
||||||
|
/* mm4 = LMNO, mm5 = NOPQ mm7 = 0 */ \
|
||||||
\
|
\
|
||||||
"paddw %%mm5, %%mm3 \n\t" /* a */ \
|
"paddw %%mm5, %%mm3 \n\t" /* a */ \
|
||||||
"pshufw $0xF9, %%mm5, %%mm6 \n\t" /* 0O0P0Q0Q */ \
|
"pshufw $0xF9, %%mm5, %%mm6 \n\t" /* 0O0P0Q0Q */ \
|
||||||
@ -1011,7 +1016,8 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, \
|
|||||||
"decl %2 \n\t" \
|
"decl %2 \n\t" \
|
||||||
"jnz 1b \n\t" \
|
"jnz 1b \n\t" \
|
||||||
: "+a"(src), "+c"(dst), "+D"(h) \
|
: "+a"(src), "+c"(dst), "+D"(h) \
|
||||||
: "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
|
: "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), \
|
||||||
|
/* "m"(ff_pw_20), "m"(ff_pw_3), */ "m"(temp), "m"(ROUNDER) \
|
||||||
: "memory" \
|
: "memory" \
|
||||||
); \
|
); \
|
||||||
} \
|
} \
|
||||||
@ -1147,7 +1153,8 @@ static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, \
|
|||||||
"decl %2 \n\t" \
|
"decl %2 \n\t" \
|
||||||
"jnz 1b \n\t" \
|
"jnz 1b \n\t" \
|
||||||
: "+a"(src), "+c"(dst), "+d"(h) \
|
: "+a"(src), "+c"(dst), "+d"(h) \
|
||||||
: "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER)\
|
: "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), \
|
||||||
|
/* "m"(ff_pw_20), "m"(ff_pw_3), */ "m"(ROUNDER) \
|
||||||
: "memory" \
|
: "memory" \
|
||||||
); \
|
); \
|
||||||
} \
|
} \
|
||||||
@ -1273,7 +1280,9 @@ static void OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(uint8_t *dst, \
|
|||||||
"jnz 1b \n\t" \
|
"jnz 1b \n\t" \
|
||||||
\
|
\
|
||||||
: "+r"(temp_ptr), "+r"(dst), "+g"(count) \
|
: "+r"(temp_ptr), "+r"(dst), "+g"(count) \
|
||||||
: "r"((x86_reg)dstStride), "r"(2*(x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*(x86_reg)dstStride)\
|
: "r"((x86_reg)dstStride), "r"(2 * (x86_reg)dstStride), \
|
||||||
|
/* "m"(ff_pw_20), "m"(ff_pw_3), */ "m"(ROUNDER), \
|
||||||
|
"g"(4 - 14 * (x86_reg)dstStride) \
|
||||||
: "memory" \
|
: "memory" \
|
||||||
); \
|
); \
|
||||||
} \
|
} \
|
||||||
@ -1337,7 +1346,9 @@ static void OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(uint8_t *dst, \
|
|||||||
"jnz 1b \n\t" \
|
"jnz 1b \n\t" \
|
||||||
\
|
\
|
||||||
: "+r"(temp_ptr), "+r"(dst), "+g"(count) \
|
: "+r"(temp_ptr), "+r"(dst), "+g"(count) \
|
||||||
: "r"((x86_reg)dstStride), "r"(2*(x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*(x86_reg)dstStride)\
|
: "r"((x86_reg)dstStride), "r"(2 * (x86_reg)dstStride), \
|
||||||
|
/* "m"(ff_pw_20), "m"(ff_pw_3), */ "m"(ROUNDER), \
|
||||||
|
"g"(4 - 6 * (x86_reg)dstStride) \
|
||||||
: "memory" \
|
: "memory" \
|
||||||
); \
|
); \
|
||||||
} \
|
} \
|
||||||
@ -1696,11 +1707,14 @@ static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \
|
|||||||
OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16); \
|
OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
|
#define PUT_OP(a, b, temp, size) \
|
||||||
|
"mov"#size" "#a", "#b" \n\t"
|
||||||
|
|
||||||
#define AVG_3DNOW_OP(a, b, temp, size) \
|
#define AVG_3DNOW_OP(a, b, temp, size) \
|
||||||
"mov"#size" "#b", "#temp" \n\t" \
|
"mov"#size" "#b", "#temp" \n\t" \
|
||||||
"pavgusb "#temp", "#a" \n\t" \
|
"pavgusb "#temp", "#a" \n\t" \
|
||||||
"mov"#size" "#a", "#b" \n\t"
|
"mov"#size" "#a", "#b" \n\t"
|
||||||
|
|
||||||
#define AVG_MMX2_OP(a, b, temp, size) \
|
#define AVG_MMX2_OP(a, b, temp, size) \
|
||||||
"mov"#size" "#b", "#temp" \n\t" \
|
"mov"#size" "#b", "#temp" \n\t" \
|
||||||
"pavgb "#temp", "#a" \n\t" \
|
"pavgb "#temp", "#a" \n\t" \
|
||||||
@ -2271,7 +2285,8 @@ static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
|
|||||||
"add $16, %0 \n" \
|
"add $16, %0 \n" \
|
||||||
"jl 1b \n" \
|
"jl 1b \n" \
|
||||||
: "+&r"(i), "=&r"(j), "=&r"(k) \
|
: "+&r"(i), "=&r"(j), "=&r"(k) \
|
||||||
:"r"(samples[0]+len), "r"(matrix_simd+in_ch), "g"((intptr_t)-32*(in_ch-1))\
|
: "r"(samples[0] + len), "r"(matrix_simd + in_ch), \
|
||||||
|
"g"((intptr_t) - 32 * (in_ch - 1)) \
|
||||||
: "memory" \
|
: "memory" \
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -2517,10 +2532,10 @@ static void vector_clipf_sse(float *dst, const float *src,
|
|||||||
{
|
{
|
||||||
x86_reg i = (len - 16) * 4;
|
x86_reg i = (len - 16) * 4;
|
||||||
__asm__ volatile (
|
__asm__ volatile (
|
||||||
"movss %3, %%xmm4 \n"
|
"movss %3, %%xmm4 \n\t"
|
||||||
"movss %4, %%xmm5 \n"
|
"movss %4, %%xmm5 \n\t"
|
||||||
"shufps $0, %%xmm4, %%xmm4 \n"
|
"shufps $0, %%xmm4, %%xmm4 \n\t"
|
||||||
"shufps $0, %%xmm5, %%xmm5 \n"
|
"shufps $0, %%xmm5, %%xmm5 \n\t"
|
||||||
"1: \n\t"
|
"1: \n\t"
|
||||||
"movaps (%2, %0), %%xmm0 \n\t" // 3/1 on intel
|
"movaps (%2, %0), %%xmm0 \n\t" // 3/1 on intel
|
||||||
"movaps 16(%2, %0), %%xmm1 \n\t"
|
"movaps 16(%2, %0), %%xmm1 \n\t"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user