2003-05-28 21:44:52 +03:00
/*
* H .26 L / H .264 / AVC / JVT / 14496 - 10 / . . . encoder / decoder
* Copyright ( c ) 2003 Michael Niedermayer < michaelni @ gmx . at >
*
2006-10-07 18:30:46 +03:00
* This file is part of FFmpeg .
*
* FFmpeg is free software ; you can redistribute it and / or
2003-05-28 21:44:52 +03:00
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation ; either
2006-10-07 18:30:46 +03:00
* version 2.1 of the License , or ( at your option ) any later version .
2003-05-28 21:44:52 +03:00
*
2006-10-07 18:30:46 +03:00
* FFmpeg is distributed in the hope that it will be useful ,
2003-05-28 21:44:52 +03:00
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* Lesser General Public License for more details .
*
* You should have received a copy of the GNU Lesser General Public
2006-10-07 18:30:46 +03:00
* License along with FFmpeg ; if not , write to the Free Software
2006-01-13 00:43:26 +02:00
* Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA
2003-05-28 21:44:52 +03:00
*
*/
2005-12-17 20:14:38 +02:00
2003-05-28 21:44:52 +03:00
/**
* @ file cabac . h
* Context Adaptive Binary Arithmetic Coder .
*/
2006-04-29 03:43:15 +03:00
//#undef NDEBUG
2003-05-28 21:44:52 +03:00
# include <assert.h>
2006-10-23 19:54:21 +03:00
# ifdef ARCH_X86
# include "x86_cpu.h"
# endif
2003-05-28 21:44:52 +03:00
2006-10-07 18:44:14 +03:00
# define CABAC_BITS 16
2004-10-26 06:12:21 +03:00
# define CABAC_MASK ((1<<CABAC_BITS)-1)
2006-10-10 11:16:41 +03:00
# define BRANCHLESS_CABAC_DECODER 1
2006-10-18 12:37:25 +03:00
//#define ARCH_X86_DISABLED 1
2004-10-26 06:12:21 +03:00
2003-05-28 21:44:52 +03:00
typedef struct CABACContext {
int low ;
int range ;
int outstanding_count ;
# ifdef STRICT_LIMITS
int symCount ;
# endif
2004-05-18 20:09:46 +03:00
const uint8_t * bytestream_start ;
const uint8_t * bytestream ;
2004-07-08 03:53:21 +03:00
const uint8_t * bytestream_end ;
2003-05-28 21:44:52 +03:00
PutBitContext pb ;
} CABACContext ;
2006-10-13 17:21:25 +03:00
extern uint8_t ff_h264_mlps_state [ 4 * 64 ] ;
2006-10-18 12:37:25 +03:00
extern uint8_t ff_h264_lps_range [ 4 * 2 * 64 ] ; ///< rangeTabLPS
2006-10-11 17:44:17 +03:00
extern uint8_t ff_h264_mps_state [ 2 * 64 ] ; ///< transIdxMPS
extern uint8_t ff_h264_lps_state [ 2 * 64 ] ; ///< transIdxLPS
2006-10-15 23:40:50 +03:00
extern const uint8_t ff_h264_norm_shift [ 512 ] ;
2004-10-26 06:12:21 +03:00
2003-05-28 21:44:52 +03:00
void ff_init_cabac_encoder ( CABACContext * c , uint8_t * buf , int buf_size ) ;
2004-05-18 20:09:46 +03:00
void ff_init_cabac_decoder ( CABACContext * c , const uint8_t * buf , int buf_size ) ;
2006-10-11 17:44:17 +03:00
void ff_init_cabac_states ( CABACContext * c ) ;
2003-05-28 21:44:52 +03:00
static inline void put_cabac_bit ( CABACContext * c , int b ) {
2005-12-17 20:14:38 +02:00
put_bits ( & c - > pb , 1 , b ) ;
for ( ; c - > outstanding_count ; c - > outstanding_count - - ) {
2003-05-28 21:44:52 +03:00
put_bits ( & c - > pb , 1 , 1 - b ) ;
}
}
static inline void renorm_cabac_encoder ( CABACContext * c ) {
while ( c - > range < 0x100 ) {
//FIXME optimize
if ( c - > low < 0x100 ) {
put_cabac_bit ( c , 0 ) ;
} else if ( c - > low < 0x200 ) {
c - > outstanding_count + + ;
c - > low - = 0x100 ;
} else {
put_cabac_bit ( c , 1 ) ;
c - > low - = 0x200 ;
}
2005-12-17 20:14:38 +02:00
2003-05-28 21:44:52 +03:00
c - > range + = c - > range ;
c - > low + = c - > low ;
}
}
2006-10-04 10:16:10 +03:00
static void put_cabac ( CABACContext * c , uint8_t * const state , int bit ) {
2006-10-15 23:40:50 +03:00
int RangeLPS = ff_h264_lps_range [ 2 * ( c - > range & 0xC0 ) + * state ] ;
2005-12-17 20:14:38 +02:00
2003-05-28 21:44:52 +03:00
if ( bit = = ( ( * state ) & 1 ) ) {
c - > range - = RangeLPS ;
2006-10-11 17:44:17 +03:00
* state = ff_h264_mps_state [ * state ] ;
2003-05-28 21:44:52 +03:00
} else {
c - > low + = c - > range - RangeLPS ;
c - > range = RangeLPS ;
2006-10-11 17:44:17 +03:00
* state = ff_h264_lps_state [ * state ] ;
2003-05-28 21:44:52 +03:00
}
2005-12-17 20:14:38 +02:00
2003-05-28 21:44:52 +03:00
renorm_cabac_encoder ( c ) ;
# ifdef STRICT_LIMITS
c - > symCount + + ;
# endif
}
2006-10-04 10:16:10 +03:00
static void put_cabac_static ( CABACContext * c , int RangeLPS , int bit ) {
2003-05-28 21:44:52 +03:00
assert ( c - > range > RangeLPS ) ;
if ( ! bit ) {
c - > range - = RangeLPS ;
} else {
c - > low + = c - > range - RangeLPS ;
c - > range = RangeLPS ;
}
renorm_cabac_encoder ( c ) ;
# ifdef STRICT_LIMITS
c - > symCount + + ;
# endif
}
2003-05-30 04:05:48 +03:00
/**
* @ param bit 0 - > write zero bit , ! = 0 write one bit
*/
2006-10-04 10:16:10 +03:00
static void put_cabac_bypass ( CABACContext * c , int bit ) {
2003-05-28 21:44:52 +03:00
c - > low + = c - > low ;
if ( bit ) {
c - > low + = c - > range ;
}
//FIXME optimize
if ( c - > low < 0x200 ) {
put_cabac_bit ( c , 0 ) ;
} else if ( c - > low < 0x400 ) {
c - > outstanding_count + + ;
c - > low - = 0x200 ;
} else {
put_cabac_bit ( c , 1 ) ;
c - > low - = 0x400 ;
}
2005-12-17 20:14:38 +02:00
2003-05-28 21:44:52 +03:00
# ifdef STRICT_LIMITS
c - > symCount + + ;
# endif
}
2003-06-09 05:24:51 +03:00
/**
*
* @ return the number of bytes written
*/
2006-10-04 10:16:10 +03:00
static int put_cabac_terminate ( CABACContext * c , int bit ) {
2003-05-28 21:44:52 +03:00
c - > range - = 2 ;
if ( ! bit ) {
renorm_cabac_encoder ( c ) ;
} else {
c - > low + = c - > range ;
c - > range = 2 ;
2005-12-17 20:14:38 +02:00
2003-05-28 21:44:52 +03:00
renorm_cabac_encoder ( c ) ;
assert ( c - > low < = 0x1FF ) ;
put_cabac_bit ( c , c - > low > > 9 ) ;
put_bits ( & c - > pb , 2 , ( ( c - > low > > 7 ) & 3 ) | 1 ) ;
2005-12-17 20:14:38 +02:00
2003-05-28 21:44:52 +03:00
flush_put_bits ( & c - > pb ) ; //FIXME FIXME FIXME XXX wrong
}
2005-12-17 20:14:38 +02:00
2003-05-28 21:44:52 +03:00
# ifdef STRICT_LIMITS
c - > symCount + + ;
# endif
2003-06-09 05:24:51 +03:00
2004-02-06 19:51:58 +02:00
return ( put_bits_count ( & c - > pb ) + 7 ) > > 3 ;
2003-05-28 21:44:52 +03:00
}
2003-05-30 04:05:48 +03:00
/**
* put ( truncated ) unary binarization .
*/
2006-10-04 10:16:10 +03:00
static void put_cabac_u ( CABACContext * c , uint8_t * state , int v , int max , int max_index , int truncated ) {
2003-05-30 04:05:48 +03:00
int i ;
2005-12-17 20:14:38 +02:00
2003-05-30 04:05:48 +03:00
assert ( v < = max ) ;
2005-12-17 20:14:38 +02:00
2003-05-30 04:05:48 +03:00
# if 1
for ( i = 0 ; i < v ; i + + ) {
put_cabac ( c , state , 1 ) ;
if ( i < max_index ) state + + ;
}
if ( truncated = = 0 | | v < max )
put_cabac ( c , state , 0 ) ;
# else
if ( v < = max_index ) {
for ( i = 0 ; i < v ; i + + ) {
put_cabac ( c , state + i , 1 ) ;
}
if ( truncated = = 0 | | v < max )
put_cabac ( c , state + i , 0 ) ;
} else {
for ( i = 0 ; i < = max_index ; i + + ) {
put_cabac ( c , state + i , 1 ) ;
}
for ( ; i < v ; i + + ) {
put_cabac ( c , state + max_index , 1 ) ;
}
if ( truncated = = 0 | | v < max )
put_cabac ( c , state + max_index , 0 ) ;
}
# endif
}
/**
* put unary exp golomb k - th order binarization .
*/
2006-10-04 10:16:10 +03:00
static void put_cabac_ueg ( CABACContext * c , uint8_t * state , int v , int max , int is_signed , int k , int max_index ) {
2003-05-30 04:05:48 +03:00
int i ;
2005-12-17 20:14:38 +02:00
2003-05-30 04:05:48 +03:00
if ( v = = 0 )
put_cabac ( c , state , 0 ) ;
else {
2003-06-06 13:04:15 +03:00
const int sign = v < 0 ;
2005-12-17 20:14:38 +02:00
2006-10-12 02:17:58 +03:00
if ( is_signed ) v = FFABS ( v ) ;
2005-12-17 20:14:38 +02:00
2003-05-30 04:05:48 +03:00
if ( v < max ) {
for ( i = 0 ; i < v ; i + + ) {
put_cabac ( c , state , 1 ) ;
if ( i < max_index ) state + + ;
}
put_cabac ( c , state , 0 ) ;
} else {
int m = 1 < < k ;
for ( i = 0 ; i < max ; i + + ) {
put_cabac ( c , state , 1 ) ;
if ( i < max_index ) state + + ;
}
v - = max ;
while ( v > = m ) { //FIXME optimize
put_cabac_bypass ( c , 1 ) ;
v - = m ;
m + = m ;
}
put_cabac_bypass ( c , 0 ) ;
while ( m > > = 1 ) {
put_cabac_bypass ( c , v & m ) ;
}
}
if ( is_signed )
put_cabac_bypass ( c , sign ) ;
}
}
2004-10-26 06:12:21 +03:00
static void refill ( CABACContext * c ) {
# if CABAC_BITS == 16
2006-10-07 15:41:55 +03:00
c - > low + = ( c - > bytestream [ 0 ] < < 9 ) + ( c - > bytestream [ 1 ] < < 1 ) ;
2004-10-26 06:12:21 +03:00
# else
c - > low + = c - > bytestream [ 0 ] < < 1 ;
# endif
c - > low - = CABAC_MASK ;
c - > bytestream + = CABAC_BITS / 8 ;
}
static void refill2 ( CABACContext * c ) {
int i , x ;
x = c - > low ^ ( c - > low - 1 ) ;
2006-10-15 23:40:50 +03:00
i = 7 - ff_h264_norm_shift [ x > > ( CABAC_BITS - 1 ) ] ;
2004-10-26 06:12:21 +03:00
x = - CABAC_MASK ;
2005-12-17 20:14:38 +02:00
2004-10-26 06:12:21 +03:00
# if CABAC_BITS == 16
x + = ( c - > bytestream [ 0 ] < < 9 ) + ( c - > bytestream [ 1 ] < < 1 ) ;
# else
x + = c - > bytestream [ 0 ] < < 1 ;
# endif
2005-12-17 20:14:38 +02:00
2004-10-26 06:12:21 +03:00
c - > low + = x < < i ;
c - > bytestream + = CABAC_BITS / 8 ;
}
2003-05-28 21:44:52 +03:00
static inline void renorm_cabac_decoder ( CABACContext * c ) {
2006-10-15 23:40:50 +03:00
while ( c - > range < 0x100 ) {
2003-05-28 21:44:52 +03:00
c - > range + = c - > range ;
c - > low + = c - > low ;
2004-10-26 06:12:21 +03:00
if ( ! ( c - > low & CABAC_MASK ) )
refill ( c ) ;
2003-05-28 21:44:52 +03:00
}
}
2004-10-26 06:12:21 +03:00
static inline void renorm_cabac_decoder_once ( CABACContext * c ) {
2006-10-08 01:37:34 +03:00
# ifdef ARCH_X86_DISABLED
2006-10-07 14:15:10 +03:00
int temp ;
#if 0
2006-10-08 01:34:32 +03:00
//P3:683 athlon:475
2006-10-07 14:15:10 +03:00
asm (
2006-10-15 23:40:50 +03:00
" lea -0x100(%0), %2 \n \t "
2006-10-07 14:15:10 +03:00
" shr $31, %2 \n \t " //FIXME 31->63 for x86-64
" shl %%cl, %0 \n \t "
" shl %%cl, %1 \n \t "
: " +r " ( c - > range ) , " +r " ( c - > low ) , " +c " ( temp )
) ;
# elif 0
2006-10-08 01:34:32 +03:00
//P3:680 athlon:474
2006-10-07 14:15:10 +03:00
asm (
2006-10-15 23:40:50 +03:00
" cmp $0x100, %0 \n \t "
2006-10-07 14:15:10 +03:00
" setb %%cl \n \t " //FIXME 31->63 for x86-64
" shl %%cl, %0 \n \t "
" shl %%cl, %1 \n \t "
: " +r " ( c - > range ) , " +r " ( c - > low ) , " +c " ( temp )
) ;
# elif 1
int temp2 ;
2006-10-08 01:34:32 +03:00
//P3:665 athlon:517
2006-10-07 14:15:10 +03:00
asm (
2006-10-15 23:40:50 +03:00
" lea -0x100(%0), %%eax \n \t "
2006-10-07 14:15:10 +03:00
" cdq \n \t "
" mov %0, %%eax \n \t "
" and %%edx, %0 \n \t "
" and %1, %%edx \n \t "
" add %%eax, %0 \n \t "
" add %%edx, %1 \n \t "
: " +r " ( c - > range ) , " +r " ( c - > low ) , " +a " ( temp ) , " +d " ( temp2 )
) ;
# elif 0
int temp2 ;
2006-10-08 01:34:32 +03:00
//P3:673 athlon:509
2006-10-07 14:15:10 +03:00
asm (
2006-10-15 23:40:50 +03:00
" cmp $0x100, %0 \n \t "
2006-10-07 14:15:10 +03:00
" sbb %%edx, %%edx \n \t "
" mov %0, %%eax \n \t "
" and %%edx, %0 \n \t "
" and %1, %%edx \n \t "
" add %%eax, %0 \n \t "
" add %%edx, %1 \n \t "
: " +r " ( c - > range ) , " +r " ( c - > low ) , " +a " ( temp ) , " +d " ( temp2 )
) ;
# else
int temp2 ;
2006-10-08 01:34:32 +03:00
//P3:677 athlon:511
2006-10-07 14:15:10 +03:00
asm (
2006-10-15 23:40:50 +03:00
" cmp $0x100, %0 \n \t "
2006-10-07 14:15:10 +03:00
" lea (%0, %0), %%eax \n \t "
" lea (%1, %1), %%edx \n \t "
" cmovb %%eax, %0 \n \t "
" cmovb %%edx, %1 \n \t "
: " +r " ( c - > range ) , " +r " ( c - > low ) , " +a " ( temp ) , " +d " ( temp2 )
) ;
# endif
# else
2006-10-08 01:34:32 +03:00
//P3:675 athlon:476
2006-10-15 23:40:50 +03:00
int shift = ( uint32_t ) ( c - > range - 0x100 ) > > 31 ;
2006-08-27 12:19:02 +03:00
c - > range < < = shift ;
c - > low < < = shift ;
2006-10-07 14:15:10 +03:00
# endif
2004-10-26 06:12:21 +03:00
if ( ! ( c - > low & CABAC_MASK ) )
refill ( c ) ;
}
2006-10-12 17:49:19 +03:00
static int always_inline get_cabac_inline ( CABACContext * c , uint8_t * const state ) {
2006-08-27 12:19:02 +03:00
//FIXME gcc generates duplicate load/stores for c->low and c->range
2006-10-09 17:15:14 +03:00
# define LOW "0"
# define RANGE "4"
2006-10-23 19:54:21 +03:00
# ifdef ARCH_X86_64
# define BYTESTART "16"
# define BYTE "24"
# define BYTEEND "32"
# else
2006-10-11 17:44:17 +03:00
# define BYTESTART "12"
# define BYTE "16"
# define BYTEEND "20"
2006-10-23 19:54:21 +03:00
# endif
2006-11-01 19:18:49 +02:00
# if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
2006-10-19 23:24:33 +03:00
int bit ;
2006-10-10 11:16:41 +03:00
# ifndef BRANCHLESS_CABAC_DECODER
2006-10-09 17:15:14 +03:00
asm volatile (
2006-10-17 12:38:37 +03:00
" movzbl (%1), %0 \n \t "
2006-10-09 17:15:14 +03:00
" movl " RANGE " (%2), %%ebx \n \t "
" movl " RANGE " (%2), %%edx \n \t "
2006-10-15 23:40:50 +03:00
" andl $0xC0, %%ebx \n \t "
2006-10-17 12:38:37 +03:00
" movzbl " MANGLE ( ff_h264_lps_range ) " (%0, %%ebx, 2), %%esi \n \t "
2006-10-09 17:15:14 +03:00
" movl " LOW " (%2), %%ebx \n \t "
//eax:state ebx:low, edx:range, esi:RangeLPS
" subl %%esi, %%edx \n \t "
2006-10-15 23:40:50 +03:00
" movl %%edx, %%ecx \n \t "
" shll $17, %%ecx \n \t "
" cmpl %%ecx, %%ebx \n \t "
2006-10-09 17:15:14 +03:00
" ja 1f \n \t "
2006-10-11 20:59:40 +03:00
# if 1
//athlon:4067 P3:4110
2006-10-15 23:40:50 +03:00
" lea -0x100(%%edx), %%ecx \n \t "
2006-10-11 20:59:40 +03:00
" shr $31, %%ecx \n \t "
" shl %%cl, %%edx \n \t "
" shl %%cl, %%ebx \n \t "
# else
//athlon:4057 P3:4130
2006-10-15 23:40:50 +03:00
" cmp $0x100, %%edx \n \t " //FIXME avoidable
2006-10-09 17:15:14 +03:00
" setb %%cl \n \t "
" shl %%cl, %%edx \n \t "
" shl %%cl, %%ebx \n \t "
2006-10-11 20:59:40 +03:00
# endif
2006-10-17 12:38:37 +03:00
" movzbl " MANGLE ( ff_h264_mps_state ) " (%0), %%ecx \n \t "
2006-10-09 17:15:14 +03:00
" movb %%cl, (%1) \n \t "
//eax:state ebx:low, edx:range, esi:RangeLPS
" test %%bx, %%bx \n \t "
" jnz 2f \n \t "
2006-10-23 19:54:21 +03:00
" mov " BYTE " (%2), %% " REG_S " \n \t "
2006-10-09 17:15:14 +03:00
" subl $0xFFFF, %%ebx \n \t "
2006-10-23 19:54:21 +03:00
" movzwl (%% " REG_S " ), %%ecx \n \t "
2006-10-09 17:15:14 +03:00
" bswap %%ecx \n \t "
" shrl $15, %%ecx \n \t "
2006-10-23 19:54:21 +03:00
" add $2, %% " REG_S " \n \t "
2006-10-09 17:15:14 +03:00
" addl %%ecx, %%ebx \n \t "
2006-10-23 19:54:21 +03:00
" mov %% " REG_S " , " BYTE " (%2) \n \t "
2006-10-09 17:15:14 +03:00
" jmp 2f \n \t "
" 1: \n \t "
//eax:state ebx:low, edx:range, esi:RangeLPS
2006-10-15 23:40:50 +03:00
" subl %%ecx, %%ebx \n \t "
2006-10-09 17:15:14 +03:00
" movl %%esi, %%edx \n \t "
2006-10-10 00:57:10 +03:00
" movzbl " MANGLE ( ff_h264_norm_shift ) " (%%esi), %%ecx \n \t "
2006-10-09 17:15:14 +03:00
" shll %%cl, %%ebx \n \t "
" shll %%cl, %%edx \n \t "
2006-10-17 12:38:37 +03:00
" movzbl " MANGLE ( ff_h264_lps_state ) " (%0), %%ecx \n \t "
2006-10-15 23:40:50 +03:00
" movb %%cl, (%1) \n \t "
2006-10-23 19:54:21 +03:00
" add $1, %0 \n \t "
2006-10-09 17:15:14 +03:00
" test %%bx, %%bx \n \t "
" jnz 2f \n \t "
2006-10-23 19:54:21 +03:00
" mov " BYTE " (%2), %% " REG_c " \n \t "
" movzwl (%% " REG_c " ), %%esi \n \t "
2006-10-09 17:15:14 +03:00
" bswap %%esi \n \t "
" shrl $15, %%esi \n \t "
" subl $0xFFFF, %%esi \n \t "
2006-10-23 19:54:21 +03:00
" add $2, %% " REG_c " \n \t "
" mov %% " REG_c " , " BYTE " (%2) \n \t "
2006-10-09 17:15:14 +03:00
" leal -1(%%ebx), %%ecx \n \t "
" xorl %%ebx, %%ecx \n \t "
2006-10-15 23:40:50 +03:00
" shrl $15, %%ecx \n \t "
2006-10-10 00:57:10 +03:00
" movzbl " MANGLE ( ff_h264_norm_shift ) " (%%ecx), %%ecx \n \t "
2006-10-11 18:20:08 +03:00
" neg %%ecx \n \t "
" add $7, %%ecx \n \t "
2006-10-09 17:15:14 +03:00
" shll %%cl , %%esi \n \t "
" addl %%esi, %%ebx \n \t "
" 2: \n \t "
" movl %%edx, " RANGE " (%2) \n \t "
" movl %%ebx, " LOW " (%2) \n \t "
: " =&a " ( bit ) //FIXME this is fragile gcc either runs out of registers or misscompiles it (for example if "+a"(bit) or "+m"(*state) is used
: " r " ( state ) , " r " ( c )
2006-10-23 19:54:21 +03:00
: " % " REG_c , " %ebx " , " %edx " , " % " REG_S , " memory "
2006-10-09 17:15:14 +03:00
) ;
2006-10-10 09:56:51 +03:00
bit & = 1 ;
2006-10-12 10:51:18 +03:00
# else /* BRANCHLESS_CABAC_DECODER */
2006-10-20 03:35:54 +03:00
2006-10-20 20:53:19 +03:00
# if defined CMOV_IS_FAST
2006-10-20 03:35:54 +03:00
# define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
" mov " tmp " , %%ecx \n \t " \
" shl $17 , " tmp " \n \t " \
" cmp " low " , " tmp " \n \t " \
" cmova %%ecx , " range " \n \t " \
" sbb %%ecx , %%ecx \n \t " \
" and %%ecx , " tmp " \n \t " \
" sub " tmp " , " low " \n \t " \
" xor %%ecx , " ret " \n \t "
2006-10-12 10:51:18 +03:00
# else /* CMOV_IS_FAST */
2006-10-20 03:35:54 +03:00
# define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
" mov " tmp " , %%ecx \n \t " \
" shl $17 , " tmp " \n \t " \
" sub " low " , " tmp " \n \t " \
" sar $31 , " tmp " \n \t " /*lps_mask*/ \
" sub %%ecx , " range " \n \t " /*RangeLPS - range*/ \
" and " tmp " , " range " \n \t " /*(RangeLPS - range)&lps_mask*/ \
" add %%ecx , " range " \n \t " /*new range*/ \
" shl $17 , %%ecx \n \t " \
" and " tmp " , %%ecx \n \t " \
" sub %%ecx , " low " \n \t " \
" xor " tmp " , " ret " \n \t "
2006-10-12 10:51:18 +03:00
# endif /* CMOV_IS_FAST */
2006-10-09 23:51:33 +03:00
2006-10-20 03:35:54 +03:00
# define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
" movzbl " statep " , " ret " \n \t " \
" mov " range " , " tmp " \n \t " \
" and $0xC0 , " range " \n \t " \
" movzbl " MANGLE ( ff_h264_lps_range ) " ( " ret " , " range " , 2), " range " \n \t " \
" sub " range " , " tmp " \n \t " \
BRANCHLESS_GET_CABAC_UPDATE ( ret , cabac , statep , low , lowword , range , tmp , tmpbyte ) \
" movzbl " MANGLE ( ff_h264_norm_shift ) " ( " range " ), %%ecx \n \t " \
" shl %%cl , " range " \n \t " \
" movzbl " MANGLE ( ff_h264_mlps_state ) " +128( " ret " ), " tmp " \n \t " \
" mov " tmpbyte " , " statep " \n \t " \
" shl %%cl , " low " \n \t " \
" test " lowword " , " lowword " \n \t " \
" jnz 1f \n \t " \
2006-10-23 19:54:21 +03:00
" mov " BYTE " ( " cabac " ), %% " REG_c " \n \t " \
" movzwl (%% " REG_c " ) , " tmp " \n \t " \
2006-10-20 03:35:54 +03:00
" bswap " tmp " \n \t " \
" shr $15 , " tmp " \n \t " \
" sub $0xFFFF , " tmp " \n \t " \
2006-10-23 19:54:21 +03:00
" add $2 , %% " REG_c " \n \t " \
" mov %% " REG_c " , " BYTE " ( " cabac " ) \n \t " \
2006-10-20 03:35:54 +03:00
" lea -1( " low " ) , %%ecx \n \t " \
" xor " low " , %%ecx \n \t " \
" shr $15 , %%ecx \n \t " \
" movzbl " MANGLE ( ff_h264_norm_shift ) " (%%ecx), %%ecx \n \t " \
" neg %%ecx \n \t " \
" add $7 , %%ecx \n \t " \
" shl %%cl , " tmp " \n \t " \
" add " tmp " , " low " \n \t " \
" 1: \n \t "
2006-10-09 23:51:33 +03:00
2006-10-20 03:35:54 +03:00
asm volatile (
" movl " RANGE " (%2), %%esi \n \t "
" movl " LOW " (%2), %%ebx \n \t "
BRANCHLESS_GET_CABAC ( " %0 " , " %2 " , " (%1) " , " %%ebx " , " %%bx " , " %%esi " , " %%edx " , " %%dl " )
2006-10-15 23:40:50 +03:00
" movl %%esi, " RANGE " (%2) \n \t "
2006-10-11 19:11:41 +03:00
" movl %%ebx, " LOW " (%2) \n \t "
2006-10-09 23:51:33 +03:00
: " =&a " ( bit )
: " r " ( state ) , " r " ( c )
2006-10-23 19:54:21 +03:00
: " % " REG_c , " %ebx " , " %edx " , " %esi " , " memory "
2006-10-09 23:51:33 +03:00
) ;
2006-10-10 04:17:39 +03:00
bit & = 1 ;
2006-10-12 10:51:18 +03:00
# endif /* BRANCHLESS_CABAC_DECODER */
2006-11-01 19:18:49 +02:00
# else /* defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) */
2006-08-27 12:19:02 +03:00
int s = * state ;
2006-10-18 12:37:25 +03:00
int RangeLPS = ff_h264_lps_range [ 2 * ( c - > range & 0xC0 ) + s ] ;
2005-02-24 21:08:50 +02:00
int bit , lps_mask attribute_unused ;
2005-12-17 20:14:38 +02:00
2003-05-28 21:44:52 +03:00
c - > range - = RangeLPS ;
2006-10-10 11:16:41 +03:00
# ifndef BRANCHLESS_CABAC_DECODER
2006-10-15 23:40:50 +03:00
if ( c - > low < ( c - > range < < 17 ) ) {
2006-08-27 12:19:02 +03:00
bit = s & 1 ;
2006-10-11 17:44:17 +03:00
* state = ff_h264_mps_state [ s ] ;
2004-10-26 06:12:21 +03:00
renorm_cabac_decoder_once ( c ) ;
2003-05-28 21:44:52 +03:00
} else {
2006-10-15 23:40:50 +03:00
bit = ff_h264_norm_shift [ RangeLPS ] ;
c - > low - = ( c - > range < < 17 ) ;
2006-10-11 17:44:17 +03:00
* state = ff_h264_lps_state [ s ] ;
2006-10-08 16:20:22 +03:00
c - > range = RangeLPS < < bit ;
c - > low < < = bit ;
bit = ( s & 1 ) ^ 1 ;
2004-10-26 06:12:21 +03:00
if ( ! ( c - > low & 0xFFFF ) ) {
refill2 ( c ) ;
2006-10-08 16:20:22 +03:00
}
2003-05-28 21:44:52 +03:00
}
2006-10-12 10:51:18 +03:00
# else /* BRANCHLESS_CABAC_DECODER */
2006-10-15 23:40:50 +03:00
lps_mask = ( ( c - > range < < 17 ) - c - > low ) > > 31 ;
2005-12-17 20:14:38 +02:00
2006-10-15 23:40:50 +03:00
c - > low - = ( c - > range < < 17 ) & lps_mask ;
2004-10-26 06:12:21 +03:00
c - > range + = ( RangeLPS - c - > range ) & lps_mask ;
2005-12-17 20:14:38 +02:00
2006-10-09 23:44:11 +03:00
s ^ = lps_mask ;
2006-10-13 17:21:25 +03:00
* state = ( ff_h264_mlps_state + 128 ) [ s ] ;
2006-10-09 23:44:11 +03:00
bit = s & 1 ;
2005-12-17 20:14:38 +02:00
2006-10-15 23:40:50 +03:00
lps_mask = ff_h264_norm_shift [ c - > range ] ;
2004-10-26 06:12:21 +03:00
c - > range < < = lps_mask ;
c - > low < < = lps_mask ;
if ( ! ( c - > low & CABAC_MASK ) )
refill2 ( c ) ;
2006-10-12 10:51:18 +03:00
# endif /* BRANCHLESS_CABAC_DECODER */
2006-11-01 19:18:49 +02:00
# endif /* defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) */
2005-12-17 20:14:38 +02:00
return bit ;
2003-05-28 21:44:52 +03:00
}
2006-10-12 17:49:19 +03:00
static int __attribute ( ( noinline ) ) get_cabac_noinline ( CABACContext * c , uint8_t * const state ) {
return get_cabac_inline ( c , state ) ;
}
static int get_cabac ( CABACContext * c , uint8_t * const state ) {
return get_cabac_inline ( c , state ) ;
}
2006-10-04 10:16:10 +03:00
static int get_cabac_bypass ( CABACContext * c ) {
2006-10-19 04:19:03 +03:00
#if 0 //not faster
int bit ;
asm volatile (
" movl " RANGE " (%1), %%ebx \n \t "
" movl " LOW " (%1), %%eax \n \t "
" shl $17, %%ebx \n \t "
" add %%eax, %%eax \n \t "
" sub %%ebx, %%eax \n \t "
" cdq \n \t "
" and %%edx, %%ebx \n \t "
" add %%ebx, %%eax \n \t "
" test %%ax, %%ax \n \t "
" jnz 1f \n \t "
2006-10-23 19:54:21 +03:00
" movl " BYTE " (%1), %% " REG_b " \n \t "
2006-10-19 04:19:03 +03:00
" subl $0xFFFF, %%eax \n \t "
2006-10-23 19:54:21 +03:00
" movzwl (%% " REG_b " ), %%ecx \n \t "
2006-10-19 04:19:03 +03:00
" bswap %%ecx \n \t "
" shrl $15, %%ecx \n \t "
2006-10-23 19:54:21 +03:00
" addl $2, %% " REG_b " \n \t "
2006-10-19 04:19:03 +03:00
" addl %%ecx, %%eax \n \t "
2006-10-23 19:54:21 +03:00
" movl %% " REG_b " , " BYTE " (%1) \n \t "
2006-10-19 04:19:03 +03:00
" 1: \n \t "
" movl %%eax, " LOW " (%1) \n \t "
: " =&d " ( bit )
: " r " ( c )
2006-10-23 19:54:21 +03:00
: " %eax " , " % " REG_b , " %ecx " , " memory "
2006-10-19 04:19:03 +03:00
) ;
return bit + 1 ;
# else
2006-10-15 23:40:50 +03:00
int range ;
2003-05-28 21:44:52 +03:00
c - > low + = c - > low ;
2004-10-26 06:12:21 +03:00
if ( ! ( c - > low & CABAC_MASK ) )
refill ( c ) ;
2005-12-17 20:14:38 +02:00
2006-10-15 23:40:50 +03:00
range = c - > range < < 17 ;
if ( c - > low < range ) {
2003-05-28 21:44:52 +03:00
return 0 ;
} else {
2006-10-15 23:40:50 +03:00
c - > low - = range ;
2003-05-28 21:44:52 +03:00
return 1 ;
}
2006-10-19 04:19:03 +03:00
# endif
}
static always_inline int get_cabac_bypass_sign ( CABACContext * c , int val ) {
# ifdef ARCH_X86
asm volatile (
" movl " RANGE " (%1), %%ebx \n \t "
" movl " LOW " (%1), %%eax \n \t "
" shl $17, %%ebx \n \t "
" add %%eax, %%eax \n \t "
" sub %%ebx, %%eax \n \t "
" cdq \n \t "
" and %%edx, %%ebx \n \t "
" add %%ebx, %%eax \n \t "
" xor %%edx, %%ecx \n \t "
" sub %%edx, %%ecx \n \t "
" test %%ax, %%ax \n \t "
" jnz 1f \n \t "
2006-10-23 19:54:21 +03:00
" mov " BYTE " (%1), %% " REG_b " \n \t "
2006-10-19 04:19:03 +03:00
" subl $0xFFFF, %%eax \n \t "
2006-10-23 19:54:21 +03:00
" movzwl (%% " REG_b " ), %%edx \n \t "
2006-10-19 04:19:03 +03:00
" bswap %%edx \n \t "
" shrl $15, %%edx \n \t "
2006-10-23 19:54:21 +03:00
" add $2, %% " REG_b " \n \t "
2006-10-19 04:19:03 +03:00
" addl %%edx, %%eax \n \t "
2006-10-23 19:54:21 +03:00
" mov %% " REG_b " , " BYTE " (%1) \n \t "
2006-10-19 04:19:03 +03:00
" 1: \n \t "
" movl %%eax, " LOW " (%1) \n \t "
: " +c " ( val )
: " r " ( c )
2006-10-23 19:54:21 +03:00
: " %eax " , " % " REG_b , " %edx " , " memory "
2006-10-19 04:19:03 +03:00
) ;
return val ;
# else
int range , mask ;
c - > low + = c - > low ;
if ( ! ( c - > low & CABAC_MASK ) )
refill ( c ) ;
range = c - > range < < 17 ;
c - > low - = range ;
mask = c - > low > > 31 ;
range & = mask ;
c - > low + = range ;
return ( val ^ mask ) - mask ;
# endif
2003-05-28 21:44:52 +03:00
}
2006-10-19 04:19:03 +03:00
2006-10-18 01:18:29 +03:00
//FIXME the x86 code from this file should be moved into i386/h264 or cabac something.c/h (note ill kill you if you move my code away from under my fingers before iam finished with it!)
//FIXME use some macros to avoid duplicatin get_cabac (cant be done yet as that would make optimization work hard)
2006-11-01 19:18:49 +02:00
# if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
2006-10-18 01:18:29 +03:00
static int decode_significance_x86 ( CABACContext * c , int max_coeff , uint8_t * significant_coeff_ctx_base , int * index ) {
void * end = significant_coeff_ctx_base + max_coeff - 1 ;
int minusstart = - ( int ) significant_coeff_ctx_base ;
2006-10-20 13:00:28 +03:00
int minusindex = 4 - ( int ) index ;
2006-10-18 01:18:29 +03:00
int coeff_count ;
asm volatile (
" movl " RANGE " (%3), %%esi \n \t "
" movl " LOW " (%3), %%ebx \n \t "
" 2: \n \t "
2006-10-20 03:51:04 +03:00
BRANCHLESS_GET_CABAC ( " %%edx " , " %3 " , " (%1) " , " %%ebx " , " %%bx " , " %%esi " , " %%eax " , " %%al " )
2006-10-18 01:18:29 +03:00
2006-10-20 03:51:04 +03:00
" test $1, %%edx \n \t "
2006-10-18 01:18:29 +03:00
" jz 3f \n \t "
2006-10-20 03:51:04 +03:00
BRANCHLESS_GET_CABAC ( " %%edx " , " %3 " , " 61(%1) " , " %%ebx " , " %%bx " , " %%esi " , " %%eax " , " %%al " )
2006-10-23 19:54:21 +03:00
" mov %2, %% " REG_a " \n \t "
2006-10-18 01:18:29 +03:00
" movl %4, %%ecx \n \t "
2006-10-23 19:54:21 +03:00
" add %1, %% " REG_c " \n \t "
" movl %%ecx, (%% " REG_a " ) \n \t "
2006-10-18 01:18:29 +03:00
2006-10-20 03:51:04 +03:00
" test $1, %%edx \n \t "
2006-10-18 01:18:29 +03:00
" jnz 4f \n \t "
2006-10-23 19:54:21 +03:00
" add $4, %% " REG_a " \n \t "
" mov %% " REG_a " , %2 \n \t "
2006-10-20 11:46:33 +03:00
2006-10-18 01:18:29 +03:00
" 3: \n \t "
2006-10-23 19:54:21 +03:00
" add $1, %1 \n \t "
" cmp %5, %1 \n \t "
2006-10-18 01:18:29 +03:00
" jb 2b \n \t "
2006-10-23 19:54:21 +03:00
" mov %2, %% " REG_a " \n \t "
2006-10-18 01:18:29 +03:00
" movl %4, %%ecx \n \t "
2006-10-23 19:54:21 +03:00
" add %1, %% " REG_c " \n \t "
" movl %%ecx, (%% " REG_a " ) \n \t "
2006-10-18 01:18:29 +03:00
" 4: \n \t "
2006-10-23 19:54:21 +03:00
" add %6, %%eax \n \t "
2006-10-18 01:18:29 +03:00
" shr $2, %%eax \n \t "
" movl %%esi, " RANGE " (%3) \n \t "
" movl %%ebx, " LOW " (%3) \n \t "
: " =&a " ( coeff_count ) , " +r " ( significant_coeff_ctx_base ) , " +m " ( index ) \
: " r " ( c ) , " m " ( minusstart ) , " m " ( end ) , " m " ( minusindex ) \
2006-10-23 19:54:21 +03:00
: " % " REG_c , " %ebx " , " %edx " , " %esi " , " memory " \
2006-10-18 01:18:29 +03:00
) ;
return coeff_count ;
}
2006-10-21 00:34:02 +03:00
static int decode_significance_8x8_x86 ( CABACContext * c , uint8_t * significant_coeff_ctx_base , int * index , uint8_t * sig_off ) {
int minusindex = 4 - ( int ) index ;
int coeff_count ;
2006-10-23 19:54:21 +03:00
long last = 0 ;
2006-10-21 00:34:02 +03:00
asm volatile (
" movl " RANGE " (%3), %%esi \n \t "
" movl " LOW " (%3), %%ebx \n \t "
2006-10-23 19:54:21 +03:00
" mov %1, %% " REG_D " \n \t "
2006-10-21 00:34:02 +03:00
" 2: \n \t "
2006-10-23 19:54:21 +03:00
" mov %6, %% " REG_a " \n \t "
" movzbl (%% " REG_a " , %% " REG_D " ), %%edi \n \t "
" add %5, %% " REG_D " \n \t "
2006-10-21 00:34:02 +03:00
2006-10-23 19:54:21 +03:00
BRANCHLESS_GET_CABAC ( " %%edx " , " %3 " , " (%% " REG_D " ) " , " %%ebx " , " %%bx " , " %%esi " , " %%eax " , " %%al " )
2006-10-21 00:34:02 +03:00
" mov %1, %%edi \n \t "
" test $1, %%edx \n \t "
" jz 3f \n \t "
" movzbl " MANGLE ( last_coeff_flag_offset_8x8 ) " (%%edi), %%edi \n \t "
2006-10-23 19:54:21 +03:00
" add %5, %% " REG_D " \n \t "
2006-10-21 00:34:02 +03:00
2006-10-23 19:54:21 +03:00
BRANCHLESS_GET_CABAC ( " %%edx " , " %3 " , " 15(%% " REG_D " ) " , " %%ebx " , " %%bx " , " %%esi " , " %%eax " , " %%al " )
2006-10-21 00:34:02 +03:00
2006-10-23 19:54:21 +03:00
" mov %2, %% " REG_a " \n \t "
2006-10-21 00:34:02 +03:00
" mov %1, %%edi \n \t "
2006-10-23 19:54:21 +03:00
" movl %%edi, (%% " REG_a " ) \n \t "
2006-10-21 00:34:02 +03:00
" test $1, %%edx \n \t "
" jnz 4f \n \t "
2006-10-23 19:54:21 +03:00
" add $4, %% " REG_a " \n \t "
" mov %% " REG_a " , %2 \n \t "
2006-10-21 00:34:02 +03:00
" 3: \n \t "
" addl $1, %%edi \n \t "
" mov %%edi, %1 \n \t "
" cmpl $63, %%edi \n \t "
" jb 2b \n \t "
2006-10-23 19:54:21 +03:00
" mov %2, %% " REG_a " \n \t "
" movl %%edi, (%% " REG_a " ) \n \t "
2006-10-21 00:34:02 +03:00
" 4: \n \t "
" addl %4, %%eax \n \t "
" shr $2, %%eax \n \t "
" movl %%esi, " RANGE " (%3) \n \t "
" movl %%ebx, " LOW " (%3) \n \t "
: " =&a " ( coeff_count ) , " +m " ( last ) , " +m " ( index ) \
: " r " ( c ) , " m " ( minusindex ) , " m " ( significant_coeff_ctx_base ) , " m " ( sig_off ) \
2006-10-23 19:54:21 +03:00
: " % " REG_c , " %ebx " , " %edx " , " %esi " , " % " REG_D , " memory " \
2006-10-21 00:34:02 +03:00
) ;
return coeff_count ;
}
2006-11-01 19:18:49 +02:00
# endif /* defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) */
2003-05-28 21:44:52 +03:00
2003-06-09 05:24:51 +03:00
/**
*
* @ return the number of bytes read or 0 if no end
*/
2006-10-04 10:16:10 +03:00
static int get_cabac_terminate ( CABACContext * c ) {
2006-10-15 23:40:50 +03:00
c - > range - = 2 ;
if ( c - > low < c - > range < < 17 ) {
2004-10-26 06:12:21 +03:00
renorm_cabac_decoder_once ( c ) ;
2003-05-28 21:44:52 +03:00
return 0 ;
} else {
2003-06-09 05:24:51 +03:00
return c - > bytestream - c - > bytestream_start ;
2005-12-17 20:14:38 +02:00
}
2003-05-28 21:44:52 +03:00
}
2003-05-30 04:05:48 +03:00
/**
* get ( truncated ) unnary binarization .
*/
2006-10-04 10:16:10 +03:00
static int get_cabac_u ( CABACContext * c , uint8_t * state , int max , int max_index , int truncated ) {
2003-05-30 04:05:48 +03:00
int i ;
2005-12-17 20:14:38 +02:00
for ( i = 0 ; i < max ; i + + ) {
2003-05-30 04:05:48 +03:00
if ( get_cabac ( c , state ) = = 0 )
return i ;
2005-12-17 20:14:38 +02:00
2003-05-30 04:05:48 +03:00
if ( i < max_index ) state + + ;
}
return truncated ? max : - 1 ;
}
/**
* get unary exp golomb k - th order binarization .
*/
2006-10-04 10:16:10 +03:00
static int get_cabac_ueg ( CABACContext * c , uint8_t * state , int max , int is_signed , int k , int max_index ) {
2003-05-30 04:05:48 +03:00
int i , v ;
int m = 1 < < k ;
2005-12-17 20:14:38 +02:00
if ( get_cabac ( c , state ) = = 0 )
2003-05-30 04:05:48 +03:00
return 0 ;
2005-12-17 20:14:38 +02:00
2003-05-30 04:05:48 +03:00
if ( 0 < max_index ) state + + ;
2005-12-17 20:14:38 +02:00
for ( i = 1 ; i < max ; i + + ) {
2003-05-30 04:05:48 +03:00
if ( get_cabac ( c , state ) = = 0 ) {
if ( is_signed & & get_cabac_bypass ( c ) ) {
return - i ;
} else
return i ;
}
if ( i < max_index ) state + + ;
}
2005-12-17 20:14:38 +02:00
2003-05-30 04:05:48 +03:00
while ( get_cabac_bypass ( c ) ) {
i + = m ;
m + = m ;
}
2005-12-17 20:14:38 +02:00
2003-05-30 04:05:48 +03:00
v = 0 ;
while ( m > > = 1 ) {
v + = v + get_cabac_bypass ( c ) ;
}
i + = v ;
if ( is_signed & & get_cabac_bypass ( c ) ) {
return - i ;
} else
return i ;
}