You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	H.264: optimize CABAC x86 asm for Atom
This commit is contained in:
		| @@ -1649,7 +1649,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT | ||||
|         const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD]; | ||||
| #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS) | ||||
|         coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, | ||||
|                                                  last_coeff_ctx_base-significant_coeff_ctx_base, sig_off); | ||||
|                                                  last_coeff_ctx_base, sig_off); | ||||
|     } else { | ||||
|         coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index, | ||||
|                                              last_coeff_ctx_base-significant_coeff_ctx_base); | ||||
|   | ||||
| @@ -34,8 +34,8 @@ | ||||
|         "cmova  %%ecx       , "range"   \n\t"\ | ||||
|         "sbb    %%ecx       , %%ecx     \n\t"\ | ||||
|         "and    %%ecx       , "tmp"     \n\t"\ | ||||
|         "sub    "tmp"       , "low"     \n\t"\ | ||||
|         "xor    %%ecx       , "ret"     \n\t" | ||||
|         "xor    %%ecx       , "ret"     \n\t"\ | ||||
|         "sub    "tmp"       , "low"     \n\t" | ||||
| #else /* HAVE_FAST_CMOV */ | ||||
| #define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\ | ||||
|         "mov    "tmp"       , %%ecx     \n\t"\ | ||||
| @@ -62,21 +62,20 @@ | ||||
|         "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx          \n\t"\ | ||||
|         "shl    %%cl        , "range"                                   \n\t"\ | ||||
|         "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp"          \n\t"\ | ||||
|         "mov    "tmpbyte"   , "statep"                                  \n\t"\ | ||||
|         "shl    %%cl        , "low"                                     \n\t"\ | ||||
|         "mov    "tmpbyte"   , "statep"                                  \n\t"\ | ||||
|         "test   "lowword"   , "lowword"                                 \n\t"\ | ||||
|         " jnz   1f                                                      \n\t"\ | ||||
|         "mov "byte"("cabac"), %%"REG_c"                                 \n\t"\ | ||||
|         "add    $2          , "byte    "("cabac")                       \n\t"\ | ||||
|         "movzwl (%%"REG_c")     , "tmp"                                 \n\t"\ | ||||
|         "bswap  "tmp"                                                   \n\t"\ | ||||
|         "shr    $15         , "tmp"                                     \n\t"\ | ||||
|         "sub    $0xFFFF     , "tmp"                                     \n\t"\ | ||||
|         "add    $2          , %%"REG_c"                                 \n\t"\ | ||||
|         "mov    %%"REG_c"   , "byte    "("cabac")                       \n\t"\ | ||||
|         "lea    -1("low")   , %%ecx                                     \n\t"\ | ||||
|         "xor    "low"       , %%ecx                                     \n\t"\ | ||||
|         "shr    $15         , %%ecx                                     \n\t"\ | ||||
|         "bswap  "tmp"                                                   \n\t"\ | ||||
|         "shr    $15         , "tmp"                                     \n\t"\ | ||||
|         "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx            \n\t"\ | ||||
|         "sub    $0xFFFF     , "tmp"                                     \n\t"\ | ||||
|         "neg    %%ecx                                                   \n\t"\ | ||||
|         "add    $7          , %%ecx                                     \n\t"\ | ||||
|         "shl    %%cl        , "tmp"                                     \n\t"\ | ||||
|   | ||||
| @@ -72,8 +72,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, | ||||
|         "test $1, %4                            \n\t" | ||||
|         " jnz 4f                                \n\t" | ||||
|  | ||||
|         "add  $4, %0                            \n\t" | ||||
|         "mov  %0, %2                            \n\t" | ||||
|         "add  $4, %2                            \n\t" | ||||
|  | ||||
|         "3:                                     \n\t" | ||||
|         "add  $1, %1                            \n\t" | ||||
| @@ -101,7 +100,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, | ||||
|  | ||||
| static int decode_significance_8x8_x86(CABACContext *c, | ||||
|                                        uint8_t *significant_coeff_ctx_base, | ||||
|                                        int *index, x86_reg last_off, const uint8_t *sig_off){ | ||||
|                                        int *index, uint8_t *last_coeff_ctx_base, const uint8_t *sig_off){ | ||||
|     int minusindex= 4-(intptr_t)index; | ||||
|     int bit; | ||||
|     x86_reg coeff_count; | ||||
| @@ -128,7 +127,6 @@ static int decode_significance_8x8_x86(CABACContext *c, | ||||
|         " jz 3f                                 \n\t" | ||||
|  | ||||
|         "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%k6), %k6\n\t" | ||||
|         "add %9, %6                             \n\t" | ||||
|         "add %11, %6                            \n\t" | ||||
|  | ||||
|         BRANCHLESS_GET_CABAC("%4", "%7", "(%6)", "%3", | ||||
| @@ -141,8 +139,7 @@ static int decode_significance_8x8_x86(CABACContext *c, | ||||
|         "test $1, %4                            \n\t" | ||||
|         " jnz 4f                                \n\t" | ||||
|  | ||||
|         "add $4, %0                             \n\t" | ||||
|         "mov %0, %2                             \n\t" | ||||
|         "add  $4, %2                            \n\t" | ||||
|  | ||||
|         "3:                                     \n\t" | ||||
|         "addl $1, %k6                           \n\t" | ||||
| @@ -159,7 +156,7 @@ static int decode_significance_8x8_x86(CABACContext *c, | ||||
|         "movl %3, %a13(%7)                      \n\t" | ||||
|         :"=&q"(coeff_count),"+m"(last), "+m"(index), "=&r"(low), "=&r"(bit), | ||||
|          "=&r"(range), "=&r"(state) | ||||
|         :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_off), | ||||
|         :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_coeff_ctx_base), | ||||
|          "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)), | ||||
|          "i"(offsetof(CABACContext, bytestream)) | ||||
|         : "%"REG_c, "memory" | ||||
|   | ||||
		Reference in New Issue
	
	Block a user