You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-15 14:13:16 +02:00
decode_significance_8x8_x86()
8% faster decode_cabac_residual() (8x8 case only) on P3 Originally committed as revision 6750 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
@@ -721,6 +721,62 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, uint8_t *sign
|
|||||||
);
|
);
|
||||||
return coeff_count;
|
return coeff_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int decode_significance_8x8_x86(CABACContext *c, uint8_t *significant_coeff_ctx_base, int *index, uint8_t *sig_off){
|
||||||
|
int minusindex= 4-(int)index;
|
||||||
|
int coeff_count;
|
||||||
|
int last=0;
|
||||||
|
asm volatile(
|
||||||
|
"movl "RANGE "(%3), %%esi \n\t"
|
||||||
|
"movl "LOW "(%3), %%ebx \n\t"
|
||||||
|
|
||||||
|
"mov %1, %%edi \n\t"
|
||||||
|
"2: \n\t"
|
||||||
|
|
||||||
|
"mov %6, %%eax \n\t"
|
||||||
|
"movzbl (%%eax, %%edi), %%edi \n\t"
|
||||||
|
"add %5, %%edi \n\t"
|
||||||
|
|
||||||
|
BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%edi)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
|
||||||
|
|
||||||
|
"mov %1, %%edi \n\t"
|
||||||
|
"test $1, %%edx \n\t"
|
||||||
|
" jz 3f \n\t"
|
||||||
|
|
||||||
|
"movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%%edi), %%edi\n\t"
|
||||||
|
"add %5, %%edi \n\t"
|
||||||
|
|
||||||
|
BRANCHLESS_GET_CABAC("%%edx", "%3", "15(%%edi)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
|
||||||
|
|
||||||
|
"movl %2, %%eax \n\t"
|
||||||
|
"mov %1, %%edi \n\t"
|
||||||
|
"movl %%edi, (%%eax) \n\t"
|
||||||
|
|
||||||
|
"test $1, %%edx \n\t"
|
||||||
|
" jnz 4f \n\t"
|
||||||
|
|
||||||
|
"addl $4, %%eax \n\t"
|
||||||
|
"movl %%eax, %2 \n\t"
|
||||||
|
|
||||||
|
"3: \n\t"
|
||||||
|
"addl $1, %%edi \n\t"
|
||||||
|
"mov %%edi, %1 \n\t"
|
||||||
|
"cmpl $63, %%edi \n\t"
|
||||||
|
" jb 2b \n\t"
|
||||||
|
"movl %2, %%eax \n\t"
|
||||||
|
"movl %%edi, (%%eax) \n\t"
|
||||||
|
"4: \n\t"
|
||||||
|
"addl %4, %%eax \n\t"
|
||||||
|
"shr $2, %%eax \n\t"
|
||||||
|
|
||||||
|
"movl %%esi, "RANGE "(%3) \n\t"
|
||||||
|
"movl %%ebx, "LOW "(%3) \n\t"
|
||||||
|
:"=&a"(coeff_count),"+m"(last), "+m"(index)\
|
||||||
|
:"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off)\
|
||||||
|
: "%ecx", "%ebx", "%edx", "%esi", "%edi", "memory"\
|
||||||
|
);
|
||||||
|
return coeff_count;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@@ -6034,6 +6034,13 @@ static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
|
|||||||
return ctx + 4 * cat;
|
return ctx + 4 * cat;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const __attribute((used)) uint8_t last_coeff_flag_offset_8x8[63] = {
|
||||||
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||||
|
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||||
|
5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
|
||||||
|
};
|
||||||
|
|
||||||
static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
|
static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
|
||||||
const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
|
const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
|
||||||
static const int significant_coeff_flag_offset[2][6] = {
|
static const int significant_coeff_flag_offset[2][6] = {
|
||||||
@@ -6057,12 +6064,6 @@ static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n
|
|||||||
9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
|
9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
|
||||||
9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
|
9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
|
||||||
};
|
};
|
||||||
static const uint8_t last_coeff_flag_offset_8x8[63] = {
|
|
||||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
||||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
||||||
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
||||||
5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
|
|
||||||
};
|
|
||||||
|
|
||||||
int index[64];
|
int index[64];
|
||||||
|
|
||||||
@@ -6138,11 +6139,13 @@ static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n
|
|||||||
index[coeff_count++] = last;\
|
index[coeff_count++] = last;\
|
||||||
}
|
}
|
||||||
const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
|
const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
|
||||||
DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
|
|
||||||
} else {
|
|
||||||
#ifdef ARCH_X86
|
#ifdef ARCH_X86
|
||||||
|
coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
|
||||||
|
} else {
|
||||||
coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
|
coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
|
||||||
#else
|
#else
|
||||||
|
DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
|
||||||
|
} else {
|
||||||
DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
|
DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user