mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
H.264: optimize CABAC x86 asm for Atom
This commit is contained in:
parent
8dcf518430
commit
6c32576548
@ -1649,7 +1649,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
|
|||||||
const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
|
const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
|
||||||
#if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
|
#if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
|
||||||
coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index,
|
coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index,
|
||||||
last_coeff_ctx_base-significant_coeff_ctx_base, sig_off);
|
last_coeff_ctx_base, sig_off);
|
||||||
} else {
|
} else {
|
||||||
coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index,
|
coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index,
|
||||||
last_coeff_ctx_base-significant_coeff_ctx_base);
|
last_coeff_ctx_base-significant_coeff_ctx_base);
|
||||||
|
@ -34,8 +34,8 @@
|
|||||||
"cmova %%ecx , "range" \n\t"\
|
"cmova %%ecx , "range" \n\t"\
|
||||||
"sbb %%ecx , %%ecx \n\t"\
|
"sbb %%ecx , %%ecx \n\t"\
|
||||||
"and %%ecx , "tmp" \n\t"\
|
"and %%ecx , "tmp" \n\t"\
|
||||||
"sub "tmp" , "low" \n\t"\
|
"xor %%ecx , "ret" \n\t"\
|
||||||
"xor %%ecx , "ret" \n\t"
|
"sub "tmp" , "low" \n\t"
|
||||||
#else /* HAVE_FAST_CMOV */
|
#else /* HAVE_FAST_CMOV */
|
||||||
#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\
|
#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\
|
||||||
"mov "tmp" , %%ecx \n\t"\
|
"mov "tmp" , %%ecx \n\t"\
|
||||||
@ -62,21 +62,20 @@
|
|||||||
"movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\
|
"movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\
|
||||||
"shl %%cl , "range" \n\t"\
|
"shl %%cl , "range" \n\t"\
|
||||||
"movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\
|
"movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\
|
||||||
"mov "tmpbyte" , "statep" \n\t"\
|
|
||||||
"shl %%cl , "low" \n\t"\
|
"shl %%cl , "low" \n\t"\
|
||||||
|
"mov "tmpbyte" , "statep" \n\t"\
|
||||||
"test "lowword" , "lowword" \n\t"\
|
"test "lowword" , "lowword" \n\t"\
|
||||||
" jnz 1f \n\t"\
|
" jnz 1f \n\t"\
|
||||||
"mov "byte"("cabac"), %%"REG_c" \n\t"\
|
"mov "byte"("cabac"), %%"REG_c" \n\t"\
|
||||||
|
"add $2 , "byte "("cabac") \n\t"\
|
||||||
"movzwl (%%"REG_c") , "tmp" \n\t"\
|
"movzwl (%%"REG_c") , "tmp" \n\t"\
|
||||||
"bswap "tmp" \n\t"\
|
|
||||||
"shr $15 , "tmp" \n\t"\
|
|
||||||
"sub $0xFFFF , "tmp" \n\t"\
|
|
||||||
"add $2 , %%"REG_c" \n\t"\
|
|
||||||
"mov %%"REG_c" , "byte "("cabac") \n\t"\
|
|
||||||
"lea -1("low") , %%ecx \n\t"\
|
"lea -1("low") , %%ecx \n\t"\
|
||||||
"xor "low" , %%ecx \n\t"\
|
"xor "low" , %%ecx \n\t"\
|
||||||
"shr $15 , %%ecx \n\t"\
|
"shr $15 , %%ecx \n\t"\
|
||||||
|
"bswap "tmp" \n\t"\
|
||||||
|
"shr $15 , "tmp" \n\t"\
|
||||||
"movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"\
|
"movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"\
|
||||||
|
"sub $0xFFFF , "tmp" \n\t"\
|
||||||
"neg %%ecx \n\t"\
|
"neg %%ecx \n\t"\
|
||||||
"add $7 , %%ecx \n\t"\
|
"add $7 , %%ecx \n\t"\
|
||||||
"shl %%cl , "tmp" \n\t"\
|
"shl %%cl , "tmp" \n\t"\
|
||||||
|
@ -72,8 +72,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
|
|||||||
"test $1, %4 \n\t"
|
"test $1, %4 \n\t"
|
||||||
" jnz 4f \n\t"
|
" jnz 4f \n\t"
|
||||||
|
|
||||||
"add $4, %0 \n\t"
|
"add $4, %2 \n\t"
|
||||||
"mov %0, %2 \n\t"
|
|
||||||
|
|
||||||
"3: \n\t"
|
"3: \n\t"
|
||||||
"add $1, %1 \n\t"
|
"add $1, %1 \n\t"
|
||||||
@ -101,7 +100,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
|
|||||||
|
|
||||||
static int decode_significance_8x8_x86(CABACContext *c,
|
static int decode_significance_8x8_x86(CABACContext *c,
|
||||||
uint8_t *significant_coeff_ctx_base,
|
uint8_t *significant_coeff_ctx_base,
|
||||||
int *index, x86_reg last_off, const uint8_t *sig_off){
|
int *index, uint8_t *last_coeff_ctx_base, const uint8_t *sig_off){
|
||||||
int minusindex= 4-(intptr_t)index;
|
int minusindex= 4-(intptr_t)index;
|
||||||
int bit;
|
int bit;
|
||||||
x86_reg coeff_count;
|
x86_reg coeff_count;
|
||||||
@ -128,7 +127,6 @@ static int decode_significance_8x8_x86(CABACContext *c,
|
|||||||
" jz 3f \n\t"
|
" jz 3f \n\t"
|
||||||
|
|
||||||
"movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%k6), %k6\n\t"
|
"movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%k6), %k6\n\t"
|
||||||
"add %9, %6 \n\t"
|
|
||||||
"add %11, %6 \n\t"
|
"add %11, %6 \n\t"
|
||||||
|
|
||||||
BRANCHLESS_GET_CABAC("%4", "%7", "(%6)", "%3",
|
BRANCHLESS_GET_CABAC("%4", "%7", "(%6)", "%3",
|
||||||
@ -141,8 +139,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
|
|||||||
"test $1, %4 \n\t"
|
"test $1, %4 \n\t"
|
||||||
" jnz 4f \n\t"
|
" jnz 4f \n\t"
|
||||||
|
|
||||||
"add $4, %0 \n\t"
|
"add $4, %2 \n\t"
|
||||||
"mov %0, %2 \n\t"
|
|
||||||
|
|
||||||
"3: \n\t"
|
"3: \n\t"
|
||||||
"addl $1, %k6 \n\t"
|
"addl $1, %k6 \n\t"
|
||||||
@ -159,7 +156,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
|
|||||||
"movl %3, %a13(%7) \n\t"
|
"movl %3, %a13(%7) \n\t"
|
||||||
:"=&q"(coeff_count),"+m"(last), "+m"(index), "=&r"(low), "=&r"(bit),
|
:"=&q"(coeff_count),"+m"(last), "+m"(index), "=&r"(low), "=&r"(bit),
|
||||||
"=&r"(range), "=&r"(state)
|
"=&r"(range), "=&r"(state)
|
||||||
:"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_off),
|
:"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_coeff_ctx_base),
|
||||||
"i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)),
|
"i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)),
|
||||||
"i"(offsetof(CABACContext, bytestream))
|
"i"(offsetof(CABACContext, bytestream))
|
||||||
: "%"REG_c, "memory"
|
: "%"REG_c, "memory"
|
||||||
|
Loading…
Reference in New Issue
Block a user