1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-03-17 20:17:55 +02:00

H.264: template left MB handling

Faster H.264 decoding with ALLOW_INTERLACE off.
This commit is contained in:
Jason Garrett-Glaser 2011-06-29 15:38:39 -07:00
parent ca80f11ec3
commit 556f8a066c
4 changed files with 121 additions and 113 deletions

View File

@ -3046,7 +3046,7 @@ int ff_h264_get_slice_type(const H264Context *h)
} }
static av_always_inline void fill_filter_caches_inter(H264Context *h, MpegEncContext * const s, int mb_type, int top_xy, static av_always_inline void fill_filter_caches_inter(H264Context *h, MpegEncContext * const s, int mb_type, int top_xy,
int left_xy[2], int top_type, int left_type[2], int mb_xy, int list) int left_xy[LEFT_MBS], int top_type, int left_type[LEFT_MBS], int mb_xy, int list)
{ {
int b_stride = h->b_stride; int b_stride = h->b_stride;
int16_t (*mv_dst)[2] = &h->mv_cache[list][scan8[0]]; int16_t (*mv_dst)[2] = &h->mv_cache[list][scan8[0]];
@ -3066,11 +3066,11 @@ static av_always_inline void fill_filter_caches_inter(H264Context *h, MpegEncCon
AV_WN32A(&ref_cache[0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u); AV_WN32A(&ref_cache[0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
} }
if(!IS_INTERLACED(mb_type^left_type[0])){ if(!IS_INTERLACED(mb_type^left_type[LTOP])){
if(USES_LIST(left_type[0], list)){ if(USES_LIST(left_type[LTOP], list)){
const int b_xy= h->mb2b_xy[left_xy[0]] + 3; const int b_xy= h->mb2b_xy[left_xy[LTOP]] + 3;
const int b8_xy= 4*left_xy[0] + 1; const int b8_xy= 4*left_xy[LTOP] + 1;
int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[LTOP]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
AV_COPY32(mv_dst - 1 + 0, s->current_picture.motion_val[list][b_xy + b_stride*0]); AV_COPY32(mv_dst - 1 + 0, s->current_picture.motion_val[list][b_xy + b_stride*0]);
AV_COPY32(mv_dst - 1 + 8, s->current_picture.motion_val[list][b_xy + b_stride*1]); AV_COPY32(mv_dst - 1 + 8, s->current_picture.motion_val[list][b_xy + b_stride*1]);
AV_COPY32(mv_dst - 1 +16, s->current_picture.motion_val[list][b_xy + b_stride*2]); AV_COPY32(mv_dst - 1 +16, s->current_picture.motion_val[list][b_xy + b_stride*2]);
@ -3128,8 +3128,8 @@ static av_always_inline void fill_filter_caches_inter(H264Context *h, MpegEncCon
static int fill_filter_caches(H264Context *h, int mb_type){ static int fill_filter_caches(H264Context *h, int mb_type){
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
const int mb_xy= h->mb_xy; const int mb_xy= h->mb_xy;
int top_xy, left_xy[2]; int top_xy, left_xy[LEFT_MBS];
int top_type, left_type[2]; int top_type, left_type[LEFT_MBS];
uint8_t *nnz; uint8_t *nnz;
uint8_t *nnz_cache; uint8_t *nnz_cache;
@ -3138,56 +3138,56 @@ static int fill_filter_caches(H264Context *h, int mb_type){
/* Wow, what a mess, why didn't they simplify the interlacing & intra /* Wow, what a mess, why didn't they simplify the interlacing & intra
* stuff, I can't imagine that these complex rules are worth it. */ * stuff, I can't imagine that these complex rules are worth it. */
left_xy[1] = left_xy[0] = mb_xy-1; left_xy[LBOT] = left_xy[LTOP] = mb_xy-1;
if(FRAME_MBAFF){ if(FRAME_MBAFF){
const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]); const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
const int curr_mb_field_flag = IS_INTERLACED(mb_type); const int curr_mb_field_flag = IS_INTERLACED(mb_type);
if(s->mb_y&1){ if(s->mb_y&1){
if (left_mb_field_flag != curr_mb_field_flag) { if (left_mb_field_flag != curr_mb_field_flag) {
left_xy[0] -= s->mb_stride; left_xy[LTOP] -= s->mb_stride;
} }
}else{ }else{
if(curr_mb_field_flag){ if(curr_mb_field_flag){
top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1); top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1);
} }
if (left_mb_field_flag != curr_mb_field_flag) { if (left_mb_field_flag != curr_mb_field_flag) {
left_xy[1] += s->mb_stride; left_xy[LBOT] += s->mb_stride;
} }
} }
} }
h->top_mb_xy = top_xy; h->top_mb_xy = top_xy;
h->left_mb_xy[0] = left_xy[0]; h->left_mb_xy[LTOP] = left_xy[LTOP];
h->left_mb_xy[1] = left_xy[1]; h->left_mb_xy[LBOT] = left_xy[LBOT];
{ {
//for sufficiently low qp, filtering wouldn't do anything //for sufficiently low qp, filtering wouldn't do anything
//this is a conservative estimate: could also check beta_offset and more accurate chroma_qp //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
int qp = s->current_picture.qscale_table[mb_xy]; int qp = s->current_picture.qscale_table[mb_xy];
if(qp <= qp_thresh if(qp <= qp_thresh
&& (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh) && (left_xy[LTOP]<0 || ((qp + s->current_picture.qscale_table[left_xy[LTOP]] + 1)>>1) <= qp_thresh)
&& (top_xy < 0 || ((qp + s->current_picture.qscale_table[top_xy ] + 1)>>1) <= qp_thresh)){ && (top_xy <0 || ((qp + s->current_picture.qscale_table[top_xy ] + 1)>>1) <= qp_thresh)){
if(!FRAME_MBAFF) if(!FRAME_MBAFF)
return 1; return 1;
if( (left_xy[0]< 0 || ((qp + s->current_picture.qscale_table[left_xy[1] ] + 1)>>1) <= qp_thresh) if( (left_xy[LTOP]< 0 || ((qp + s->current_picture.qscale_table[left_xy[LBOT] ] + 1)>>1) <= qp_thresh)
&& (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh)) && (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh))
return 1; return 1;
} }
} }
top_type = s->current_picture.mb_type[top_xy] ; top_type = s->current_picture.mb_type[top_xy];
left_type[0] = s->current_picture.mb_type[left_xy[0]]; left_type[LTOP] = s->current_picture.mb_type[left_xy[LTOP]];
left_type[1] = s->current_picture.mb_type[left_xy[1]]; left_type[LBOT] = s->current_picture.mb_type[left_xy[LBOT]];
if(h->deblocking_filter == 2){ if(h->deblocking_filter == 2){
if(h->slice_table[top_xy ] != h->slice_num) top_type= 0; if(h->slice_table[top_xy ] != h->slice_num) top_type= 0;
if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0; if(h->slice_table[left_xy[LBOT]] != h->slice_num) left_type[LTOP]= left_type[LBOT]= 0;
}else{ }else{
if(h->slice_table[top_xy ] == 0xFFFF) top_type= 0; if(h->slice_table[top_xy ] == 0xFFFF) top_type= 0;
if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0; if(h->slice_table[left_xy[LBOT]] == 0xFFFF) left_type[LTOP]= left_type[LBOT] =0;
} }
h->top_type = top_type ; h->top_type = top_type;
h->left_type[0]= left_type[0]; h->left_type[LTOP]= left_type[LTOP];
h->left_type[1]= left_type[1]; h->left_type[LBOT]= left_type[LBOT];
if(IS_INTRA(mb_type)) if(IS_INTRA(mb_type))
return 0; return 0;
@ -3209,8 +3209,8 @@ static int fill_filter_caches(H264Context *h, int mb_type){
AV_COPY32(&nnz_cache[4+8*0], &nnz[3*4]); AV_COPY32(&nnz_cache[4+8*0], &nnz[3*4]);
} }
if(left_type[0]){ if(left_type[LTOP]){
nnz = h->non_zero_count[left_xy[0]]; nnz = h->non_zero_count[left_xy[LTOP]];
nnz_cache[3+8*1]= nnz[3+0*4]; nnz_cache[3+8*1]= nnz[3+0*4];
nnz_cache[3+8*2]= nnz[3+1*4]; nnz_cache[3+8*2]= nnz[3+1*4];
nnz_cache[3+8*3]= nnz[3+2*4]; nnz_cache[3+8*3]= nnz[3+2*4];
@ -3225,13 +3225,13 @@ static int fill_filter_caches(H264Context *h, int mb_type){
nnz_cache[6+8*0]= nnz_cache[6+8*0]=
nnz_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12; nnz_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;
} }
if(IS_8x8DCT(left_type[0])){ if(IS_8x8DCT(left_type[LTOP])){
nnz_cache[3+8*1]= nnz_cache[3+8*1]=
nnz_cache[3+8*2]= (h->cbp_table[left_xy[0]]&0x2000) >> 12; //FIXME check MBAFF nnz_cache[3+8*2]= (h->cbp_table[left_xy[LTOP]]&0x2000) >> 12; //FIXME check MBAFF
} }
if(IS_8x8DCT(left_type[1])){ if(IS_8x8DCT(left_type[LBOT])){
nnz_cache[3+8*3]= nnz_cache[3+8*3]=
nnz_cache[3+8*4]= (h->cbp_table[left_xy[1]]&0x8000) >> 12; //FIXME check MBAFF nnz_cache[3+8*4]= (h->cbp_table[left_xy[LBOT]]&0x8000) >> 12; //FIXME check MBAFF
} }
if(IS_8x8DCT(mb_type)){ if(IS_8x8DCT(mb_type)){

View File

@ -70,6 +70,10 @@
#define MB_FIELD h->mb_field_decoding_flag #define MB_FIELD h->mb_field_decoding_flag
#define FRAME_MBAFF h->mb_aff_frame #define FRAME_MBAFF h->mb_aff_frame
#define FIELD_PICTURE (s->picture_structure != PICT_FRAME) #define FIELD_PICTURE (s->picture_structure != PICT_FRAME)
#define LEFT_MBS 2
#define LTOP 0
#define LBOT 1
#define LEFT(i) (i)
#else #else
#define MB_MBAFF 0 #define MB_MBAFF 0
#define MB_FIELD 0 #define MB_FIELD 0
@ -77,6 +81,10 @@
#define FIELD_PICTURE 0 #define FIELD_PICTURE 0
#undef IS_INTERLACED #undef IS_INTERLACED
#define IS_INTERLACED(mb_type) 0 #define IS_INTERLACED(mb_type) 0
#define LEFT_MBS 1
#define LTOP 0
#define LBOT 0
#define LEFT(i) 0
#endif #endif
#define FIELD_OR_MBAFF_PICTURE (FRAME_MBAFF || FIELD_PICTURE) #define FIELD_OR_MBAFF_PICTURE (FRAME_MBAFF || FIELD_PICTURE)
@ -272,12 +280,12 @@ typedef struct H264Context{
int topleft_mb_xy; int topleft_mb_xy;
int top_mb_xy; int top_mb_xy;
int topright_mb_xy; int topright_mb_xy;
int left_mb_xy[2]; int left_mb_xy[LEFT_MBS];
int topleft_type; int topleft_type;
int top_type; int top_type;
int topright_type; int topright_type;
int left_type[2]; int left_type[LEFT_MBS];
const uint8_t * left_block; const uint8_t * left_block;
int topleft_partition; int topleft_partition;
@ -767,7 +775,7 @@ static inline void pred_pskip_motion(H264Context * const h, int * const mx, int
static void fill_decode_neighbors(H264Context *h, int mb_type){ static void fill_decode_neighbors(H264Context *h, int mb_type){
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
const int mb_xy= h->mb_xy; const int mb_xy= h->mb_xy;
int topleft_xy, top_xy, topright_xy, left_xy[2]; int topleft_xy, top_xy, topright_xy, left_xy[LEFT_MBS];
static const uint8_t left_block_options[4][32]={ static const uint8_t left_block_options[4][32]={
{0,1,2,3,7,10,8,11,3+0*4, 3+1*4, 3+2*4, 3+3*4, 1+4*4, 1+8*4, 1+5*4, 1+9*4}, {0,1,2,3,7,10,8,11,3+0*4, 3+1*4, 3+2*4, 3+3*4, 1+4*4, 1+8*4, 1+5*4, 1+9*4},
{2,2,3,3,8,11,8,11,3+2*4, 3+2*4, 3+3*4, 3+3*4, 1+5*4, 1+9*4, 1+5*4, 1+9*4}, {2,2,3,3,8,11,8,11,3+2*4, 3+2*4, 3+3*4, 3+3*4, 1+5*4, 1+9*4, 1+5*4, 1+9*4},
@ -784,16 +792,16 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){
topleft_xy = top_xy - 1; topleft_xy = top_xy - 1;
topright_xy= top_xy + 1; topright_xy= top_xy + 1;
left_xy[1] = left_xy[0] = mb_xy-1; left_xy[LBOT] = left_xy[LTOP] = mb_xy-1;
h->left_block = left_block_options[0]; h->left_block = left_block_options[0];
if(FRAME_MBAFF){ if(FRAME_MBAFF){
const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]); const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
const int curr_mb_field_flag = IS_INTERLACED(mb_type); const int curr_mb_field_flag = IS_INTERLACED(mb_type);
if(s->mb_y&1){ if(s->mb_y&1){
if (left_mb_field_flag != curr_mb_field_flag) { if (left_mb_field_flag != curr_mb_field_flag) {
left_xy[1] = left_xy[0] = mb_xy - s->mb_stride - 1; left_xy[LBOT] = left_xy[LTOP] = mb_xy - s->mb_stride - 1;
if (curr_mb_field_flag) { if (curr_mb_field_flag) {
left_xy[1] += s->mb_stride; left_xy[LBOT] += s->mb_stride;
h->left_block = left_block_options[3]; h->left_block = left_block_options[3];
} else { } else {
topleft_xy += s->mb_stride; topleft_xy += s->mb_stride;
@ -810,7 +818,7 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){
} }
if (left_mb_field_flag != curr_mb_field_flag) { if (left_mb_field_flag != curr_mb_field_flag) {
if (curr_mb_field_flag) { if (curr_mb_field_flag) {
left_xy[1] += s->mb_stride; left_xy[LBOT] += s->mb_stride;
h->left_block = left_block_options[3]; h->left_block = left_block_options[3];
} else { } else {
h->left_block = left_block_options[2]; h->left_block = left_block_options[2];
@ -822,25 +830,25 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){
h->topleft_mb_xy = topleft_xy; h->topleft_mb_xy = topleft_xy;
h->top_mb_xy = top_xy; h->top_mb_xy = top_xy;
h->topright_mb_xy= topright_xy; h->topright_mb_xy= topright_xy;
h->left_mb_xy[0] = left_xy[0]; h->left_mb_xy[LTOP] = left_xy[LTOP];
h->left_mb_xy[1] = left_xy[1]; h->left_mb_xy[LBOT] = left_xy[LBOT];
//FIXME do we need all in the context? //FIXME do we need all in the context?
h->topleft_type = s->current_picture.mb_type[topleft_xy] ; h->topleft_type = s->current_picture.mb_type[topleft_xy] ;
h->top_type = s->current_picture.mb_type[top_xy] ; h->top_type = s->current_picture.mb_type[top_xy] ;
h->topright_type= s->current_picture.mb_type[topright_xy]; h->topright_type= s->current_picture.mb_type[topright_xy];
h->left_type[0] = s->current_picture.mb_type[left_xy[0]] ; h->left_type[LTOP] = s->current_picture.mb_type[left_xy[LTOP]] ;
h->left_type[1] = s->current_picture.mb_type[left_xy[1]] ; h->left_type[LBOT] = s->current_picture.mb_type[left_xy[LBOT]] ;
if(FMO){ if(FMO){
if(h->slice_table[topleft_xy ] != h->slice_num) h->topleft_type = 0; if(h->slice_table[topleft_xy ] != h->slice_num) h->topleft_type = 0;
if(h->slice_table[top_xy ] != h->slice_num) h->top_type = 0; if(h->slice_table[top_xy ] != h->slice_num) h->top_type = 0;
if(h->slice_table[left_xy[0] ] != h->slice_num) h->left_type[0] = h->left_type[1] = 0; if(h->slice_table[left_xy[LTOP] ] != h->slice_num) h->left_type[LTOP] = h->left_type[LBOT] = 0;
}else{ }else{
if(h->slice_table[topleft_xy ] != h->slice_num){ if(h->slice_table[topleft_xy ] != h->slice_num){
h->topleft_type = 0; h->topleft_type = 0;
if(h->slice_table[top_xy ] != h->slice_num) h->top_type = 0; if(h->slice_table[top_xy ] != h->slice_num) h->top_type = 0;
if(h->slice_table[left_xy[0] ] != h->slice_num) h->left_type[0] = h->left_type[1] = 0; if(h->slice_table[left_xy[LTOP] ] != h->slice_num) h->left_type[LTOP] = h->left_type[LBOT] = 0;
} }
} }
if(h->slice_table[topright_xy] != h->slice_num) h->topright_type= 0; if(h->slice_table[topright_xy] != h->slice_num) h->topright_type= 0;
@ -848,23 +856,23 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){
static void fill_decode_caches(H264Context *h, int mb_type){ static void fill_decode_caches(H264Context *h, int mb_type){
MpegEncContext * const s = &h->s; MpegEncContext * const s = &h->s;
int topleft_xy, top_xy, topright_xy, left_xy[2]; int topleft_xy, top_xy, topright_xy, left_xy[LEFT_MBS];
int topleft_type, top_type, topright_type, left_type[2]; int topleft_type, top_type, topright_type, left_type[LEFT_MBS];
const uint8_t * left_block= h->left_block; const uint8_t * left_block= h->left_block;
int i; int i;
uint8_t *nnz; uint8_t *nnz;
uint8_t *nnz_cache; uint8_t *nnz_cache;
topleft_xy = h->topleft_mb_xy ; topleft_xy = h->topleft_mb_xy;
top_xy = h->top_mb_xy ; top_xy = h->top_mb_xy;
topright_xy = h->topright_mb_xy; topright_xy = h->topright_mb_xy;
left_xy[0] = h->left_mb_xy[0] ; left_xy[LTOP] = h->left_mb_xy[LTOP];
left_xy[1] = h->left_mb_xy[1] ; left_xy[LBOT] = h->left_mb_xy[LBOT];
topleft_type = h->topleft_type ; topleft_type = h->topleft_type;
top_type = h->top_type ; top_type = h->top_type;
topright_type= h->topright_type ; topright_type = h->topright_type;
left_type[0] = h->left_type[0] ; left_type[LTOP]= h->left_type[LTOP];
left_type[1] = h->left_type[1] ; left_type[LBOT]= h->left_type[LBOT];
if(!IS_SKIP(mb_type)){ if(!IS_SKIP(mb_type)){
if(IS_INTRA(mb_type)){ if(IS_INTRA(mb_type)){
@ -879,27 +887,27 @@ static void fill_decode_caches(H264Context *h, int mb_type){
h->top_samples_available= 0x33FF; h->top_samples_available= 0x33FF;
h->topright_samples_available= 0x26EA; h->topright_samples_available= 0x26EA;
} }
if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){ if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[LTOP])){
if(IS_INTERLACED(mb_type)){ if(IS_INTERLACED(mb_type)){
if(!(left_type[0] & type_mask)){ if(!(left_type[LTOP] & type_mask)){
h->topleft_samples_available&= 0xDFFF; h->topleft_samples_available&= 0xDFFF;
h->left_samples_available&= 0x5FFF; h->left_samples_available&= 0x5FFF;
} }
if(!(left_type[1] & type_mask)){ if(!(left_type[LBOT] & type_mask)){
h->topleft_samples_available&= 0xFF5F; h->topleft_samples_available&= 0xFF5F;
h->left_samples_available&= 0xFF5F; h->left_samples_available&= 0xFF5F;
} }
}else{ }else{
int left_typei = s->current_picture.mb_type[left_xy[0] + s->mb_stride]; int left_typei = s->current_picture.mb_type[left_xy[LTOP] + s->mb_stride];
assert(left_xy[0] == left_xy[1]); assert(left_xy[LTOP] == left_xy[LBOT]);
if(!((left_typei & type_mask) && (left_type[0] & type_mask))){ if(!((left_typei & type_mask) && (left_type[LTOP] & type_mask))){
h->topleft_samples_available&= 0xDF5F; h->topleft_samples_available&= 0xDF5F;
h->left_samples_available&= 0x5F5F; h->left_samples_available&= 0x5F5F;
} }
} }
}else{ }else{
if(!(left_type[0] & type_mask)){ if(!(left_type[LTOP] & type_mask)){
h->topleft_samples_available&= 0xDF5F; h->topleft_samples_available&= 0xDF5F;
h->left_samples_available&= 0x5F5F; h->left_samples_available&= 0x5F5F;
} }
@ -921,13 +929,13 @@ static void fill_decode_caches(H264Context *h, int mb_type){
h->intra4x4_pred_mode_cache[7+8*0]= 2 - 3*!(top_type & type_mask); h->intra4x4_pred_mode_cache[7+8*0]= 2 - 3*!(top_type & type_mask);
} }
for(i=0; i<2; i++){ for(i=0; i<2; i++){
if(IS_INTRA4x4(left_type[i])){ if(IS_INTRA4x4(left_type[LEFT(i)])){
int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[left_xy[i]]; int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[left_xy[LEFT(i)]];
h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= mode[6-left_block[0+2*i]]; h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= mode[6-left_block[0+2*i]];
h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= mode[6-left_block[1+2*i]]; h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= mode[6-left_block[1+2*i]];
}else{ }else{
h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= 2 - 3*!(left_type[i] & type_mask); h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= 2 - 3*!(left_type[LEFT(i)] & type_mask);
} }
} }
} }
@ -962,8 +970,8 @@ static void fill_decode_caches(H264Context *h, int mb_type){
} }
for (i=0; i<2; i++) { for (i=0; i<2; i++) {
if(left_type[i]){ if(left_type[LEFT(i)]){
nnz = h->non_zero_count[left_xy[i]]; nnz = h->non_zero_count[left_xy[LEFT(i)]];
nnz_cache[3+8* 1 + 2*8*i]= nnz[left_block[8+0+2*i]]; nnz_cache[3+8* 1 + 2*8*i]= nnz[left_block[8+0+2*i]];
nnz_cache[3+8* 2 + 2*8*i]= nnz[left_block[8+1+2*i]]; nnz_cache[3+8* 2 + 2*8*i]= nnz[left_block[8+1+2*i]];
if(CHROMA444){ if(CHROMA444){
@ -993,10 +1001,10 @@ static void fill_decode_caches(H264Context *h, int mb_type){
h->top_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F; h->top_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F;
} }
// left_cbp // left_cbp
if (left_type[0]) { if (left_type[LTOP]) {
h->left_cbp = (h->cbp_table[left_xy[0]] & 0x7F0) h->left_cbp = (h->cbp_table[left_xy[LTOP]] & 0x7F0)
| ((h->cbp_table[left_xy[0]]>>(left_block[0]&(~1)))&2) | ((h->cbp_table[left_xy[LTOP]]>>(left_block[0]&(~1)))&2)
| (((h->cbp_table[left_xy[1]]>>(left_block[2]&(~1)))&2) << 2); | (((h->cbp_table[left_xy[LBOT]]>>(left_block[2]&(~1)))&2) << 2);
} else { } else {
h->left_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F; h->left_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F;
} }
@ -1031,9 +1039,9 @@ static void fill_decode_caches(H264Context *h, int mb_type){
if(mb_type & (MB_TYPE_16x8|MB_TYPE_8x8)){ if(mb_type & (MB_TYPE_16x8|MB_TYPE_8x8)){
for(i=0; i<2; i++){ for(i=0; i<2; i++){
int cache_idx = -1 + i*2*8; int cache_idx = -1 + i*2*8;
if(USES_LIST(left_type[i], list)){ if(USES_LIST(left_type[LEFT(i)], list)){
const int b_xy= h->mb2b_xy[left_xy[i]] + 3; const int b_xy= h->mb2b_xy[left_xy[LEFT(i)]] + 3;
const int b8_xy= 4*left_xy[i] + 1; const int b8_xy= 4*left_xy[LEFT(i)] + 1;
AV_COPY32(mv_cache[cache_idx ], mv[b_xy + b_stride*left_block[0+i*2]]); AV_COPY32(mv_cache[cache_idx ], mv[b_xy + b_stride*left_block[0+i*2]]);
AV_COPY32(mv_cache[cache_idx+8], mv[b_xy + b_stride*left_block[1+i*2]]); AV_COPY32(mv_cache[cache_idx+8], mv[b_xy + b_stride*left_block[1+i*2]]);
ref_cache[cache_idx ]= ref[b8_xy + (left_block[0+i*2]&~1)]; ref_cache[cache_idx ]= ref[b8_xy + (left_block[0+i*2]&~1)];
@ -1042,18 +1050,18 @@ static void fill_decode_caches(H264Context *h, int mb_type){
AV_ZERO32(mv_cache[cache_idx ]); AV_ZERO32(mv_cache[cache_idx ]);
AV_ZERO32(mv_cache[cache_idx+8]); AV_ZERO32(mv_cache[cache_idx+8]);
ref_cache[cache_idx ]= ref_cache[cache_idx ]=
ref_cache[cache_idx+8]= (left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE; ref_cache[cache_idx+8]= (left_type[LEFT(i)]) ? LIST_NOT_USED : PART_NOT_AVAILABLE;
} }
} }
}else{ }else{
if(USES_LIST(left_type[0], list)){ if(USES_LIST(left_type[LTOP], list)){
const int b_xy= h->mb2b_xy[left_xy[0]] + 3; const int b_xy= h->mb2b_xy[left_xy[LTOP]] + 3;
const int b8_xy= 4*left_xy[0] + 1; const int b8_xy= 4*left_xy[LTOP] + 1;
AV_COPY32(mv_cache[-1], mv[b_xy + b_stride*left_block[0]]); AV_COPY32(mv_cache[-1], mv[b_xy + b_stride*left_block[0]]);
ref_cache[-1]= ref[b8_xy + (left_block[0]&~1)]; ref_cache[-1]= ref[b8_xy + (left_block[0]&~1)];
}else{ }else{
AV_ZERO32(mv_cache[-1]); AV_ZERO32(mv_cache[-1]);
ref_cache[-1]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE; ref_cache[-1]= left_type[LTOP] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
} }
} }
@ -1095,16 +1103,16 @@ static void fill_decode_caches(H264Context *h, int mb_type){
}else{ }else{
AV_ZERO64(mvd_cache[0 - 1*8]); AV_ZERO64(mvd_cache[0 - 1*8]);
} }
if(USES_LIST(left_type[0], list)){ if(USES_LIST(left_type[LTOP], list)){
const int b_xy= h->mb2br_xy[left_xy[0]] + 6; const int b_xy= h->mb2br_xy[left_xy[LTOP]] + 6;
AV_COPY16(mvd_cache[-1 + 0*8], mvd[b_xy - left_block[0]]); AV_COPY16(mvd_cache[-1 + 0*8], mvd[b_xy - left_block[0]]);
AV_COPY16(mvd_cache[-1 + 1*8], mvd[b_xy - left_block[1]]); AV_COPY16(mvd_cache[-1 + 1*8], mvd[b_xy - left_block[1]]);
}else{ }else{
AV_ZERO16(mvd_cache[-1 + 0*8]); AV_ZERO16(mvd_cache[-1 + 0*8]);
AV_ZERO16(mvd_cache[-1 + 1*8]); AV_ZERO16(mvd_cache[-1 + 1*8]);
} }
if(USES_LIST(left_type[1], list)){ if(USES_LIST(left_type[LBOT], list)){
const int b_xy= h->mb2br_xy[left_xy[1]] + 6; const int b_xy= h->mb2br_xy[left_xy[LBOT]] + 6;
AV_COPY16(mvd_cache[-1 + 2*8], mvd[b_xy - left_block[2]]); AV_COPY16(mvd_cache[-1 + 2*8], mvd[b_xy - left_block[2]]);
AV_COPY16(mvd_cache[-1 + 3*8], mvd[b_xy - left_block[3]]); AV_COPY16(mvd_cache[-1 + 3*8], mvd[b_xy - left_block[3]]);
}else{ }else{
@ -1128,17 +1136,17 @@ static void fill_decode_caches(H264Context *h, int mb_type){
AV_WN32A(&direct_cache[-1*8], 0x01010101*(MB_TYPE_16x16>>1)); AV_WN32A(&direct_cache[-1*8], 0x01010101*(MB_TYPE_16x16>>1));
} }
if(IS_DIRECT(left_type[0])) if(IS_DIRECT(left_type[LTOP]))
direct_cache[-1 + 0*8]= MB_TYPE_DIRECT2>>1; direct_cache[-1 + 0*8]= MB_TYPE_DIRECT2>>1;
else if(IS_8X8(left_type[0])) else if(IS_8X8(left_type[LTOP]))
direct_cache[-1 + 0*8]= direct_table[4*left_xy[0] + 1 + (left_block[0]&~1)]; direct_cache[-1 + 0*8]= direct_table[4*left_xy[LTOP] + 1 + (left_block[0]&~1)];
else else
direct_cache[-1 + 0*8]= MB_TYPE_16x16>>1; direct_cache[-1 + 0*8]= MB_TYPE_16x16>>1;
if(IS_DIRECT(left_type[1])) if(IS_DIRECT(left_type[LBOT]))
direct_cache[-1 + 2*8]= MB_TYPE_DIRECT2>>1; direct_cache[-1 + 2*8]= MB_TYPE_DIRECT2>>1;
else if(IS_8X8(left_type[1])) else if(IS_8X8(left_type[LBOT]))
direct_cache[-1 + 2*8]= direct_table[4*left_xy[1] + 1 + (left_block[2]&~1)]; direct_cache[-1 + 2*8]= direct_table[4*left_xy[LBOT] + 1 + (left_block[2]&~1)];
else else
direct_cache[-1 + 2*8]= MB_TYPE_16x16>>1; direct_cache[-1 + 2*8]= MB_TYPE_16x16>>1;
} }
@ -1152,10 +1160,10 @@ static void fill_decode_caches(H264Context *h, int mb_type){
MAP_F2F(scan8[0] + 2 - 1*8, top_type)\ MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
MAP_F2F(scan8[0] + 3 - 1*8, top_type)\ MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\ MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\ MAP_F2F(scan8[0] - 1 + 0*8, left_type[LTOP])\
MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\ MAP_F2F(scan8[0] - 1 + 1*8, left_type[LTOP])\
MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\ MAP_F2F(scan8[0] - 1 + 2*8, left_type[LBOT])\
MAP_F2F(scan8[0] - 1 + 3*8, left_type[1]) MAP_F2F(scan8[0] - 1 + 3*8, left_type[LBOT])
if(MB_FIELD){ if(MB_FIELD){
#define MAP_F2F(idx, mb_type)\ #define MAP_F2F(idx, mb_type)\
if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
@ -1179,7 +1187,7 @@ static void fill_decode_caches(H264Context *h, int mb_type){
} }
} }
h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]); h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[LTOP]);
} }
/** /**

View File

@ -1296,9 +1296,9 @@ static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_sl
if(intra_slice){ if(intra_slice){
int ctx=0; int ctx=0;
if( h->left_type[0] & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)) if( h->left_type[LTOP] & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM))
ctx++; ctx++;
if( h->top_type & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)) if( h->top_type & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM))
ctx++; ctx++;
if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 ) if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
return 0; /* I4x4 */ return 0; /* I4x4 */
@ -1376,10 +1376,10 @@ static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
int ctx = 0; int ctx = 0;
/* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */ /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
if( h->left_type[0] && h->chroma_pred_mode_table[mba_xy] != 0 ) if( h->left_type[LTOP] && h->chroma_pred_mode_table[mba_xy] != 0 )
ctx++; ctx++;
if( h->top_type && h->chroma_pred_mode_table[mbb_xy] != 0 ) if( h->top_type && h->chroma_pred_mode_table[mbb_xy] != 0 )
ctx++; ctx++;
if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 ) if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
@ -1880,7 +1880,7 @@ int ff_h264_decode_mb_cabac(H264Context *h) {
int ctx = 0; int ctx = 0;
assert(h->slice_type_nos == AV_PICTURE_TYPE_B); assert(h->slice_type_nos == AV_PICTURE_TYPE_B);
if( !IS_DIRECT( h->left_type[0]-1 ) ) if( !IS_DIRECT( h->left_type[LTOP]-1 ) )
ctx++; ctx++;
if( !IS_DIRECT( h->top_type-1 ) ) if( !IS_DIRECT( h->top_type-1 ) )
ctx++; ctx++;
@ -2250,7 +2250,7 @@ decode_intra_mb:
int i; int i;
uint8_t *nnz_cache = h->non_zero_count_cache; uint8_t *nnz_cache = h->non_zero_count_cache;
for (i = 0; i < 2; i++){ for (i = 0; i < 2; i++){
if (h->left_type[i] && !IS_8x8DCT(h->left_type[i])){ if (h->left_type[LEFT(i)] && !IS_8x8DCT(h->left_type[LEFT(i)])){
nnz_cache[3+8* 1 + 2*8*i]= nnz_cache[3+8* 1 + 2*8*i]=
nnz_cache[3+8* 2 + 2*8*i]= nnz_cache[3+8* 2 + 2*8*i]=
nnz_cache[3+8* 6 + 2*8*i]= nnz_cache[3+8* 6 + 2*8*i]=

View File

@ -227,7 +227,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
return; return;
} }
assert(!FRAME_MBAFF); assert(!FRAME_MBAFF);
left_type= h->left_type[0]; left_type= h->left_type[LTOP];
top_type= h->top_type; top_type= h->top_type;
mb_type = s->current_picture.mb_type[mb_xy]; mb_type = s->current_picture.mb_type[mb_xy];
@ -329,7 +329,7 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
AV_WN64A(bS[1][2], 0x0002000200020002ULL); AV_WN64A(bS[1][2], 0x0002000200020002ULL);
} else { } else {
int mask_edge1 = (3*(((5*mb_type)>>5)&1)) | (mb_type>>4); //(mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : (mb_type & MB_TYPE_16x8) ? 1 : 0; int mask_edge1 = (3*(((5*mb_type)>>5)&1)) | (mb_type>>4); //(mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : (mb_type & MB_TYPE_16x8) ? 1 : 0;
int mask_edge0 = 3*((mask_edge1>>1) & ((5*left_type)>>5)&1); // (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) && (h->left_type[0] & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 0; int mask_edge0 = 3*((mask_edge1>>1) & ((5*left_type)>>5)&1); // (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) && (h->left_type[LTOP] & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 0;
int step = 1+(mb_type>>24); //IS_8x8DCT(mb_type) ? 2 : 1; int step = 1+(mb_type>>24); //IS_8x8DCT(mb_type) ? 2 : 1;
edges = 4 - 3*((mb_type>>3) & !(h->cbp & 15)); //(mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4; edges = 4 - 3*((mb_type>>3) & !(h->cbp & 15)); //(mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
h->h264dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache, h->h264dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
@ -411,7 +411,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
int edge; int edge;
int chroma_qp_avg[2]; int chroma_qp_avg[2];
const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy; const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
const int mbm_type = dir == 0 ? h->left_type[0] : h->top_type; const int mbm_type = dir == 0 ? h->left_type[LTOP] : h->top_type;
// how often to recheck mv-based bS when iterating between edges // how often to recheck mv-based bS when iterating between edges
static const uint8_t mask_edge_tab[2][8]={{0,3,3,3,1,1,1,1}, static const uint8_t mask_edge_tab[2][8]={{0,3,3,3,1,1,1,1},
@ -647,9 +647,9 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
if (FRAME_MBAFF if (FRAME_MBAFF
// and current and left pair do not have the same interlaced type // and current and left pair do not have the same interlaced type
&& IS_INTERLACED(mb_type^h->left_type[0]) && IS_INTERLACED(mb_type^h->left_type[LTOP])
// and left mb is in available to us // and left mb is in available to us
&& h->left_type[0]) { && h->left_type[LTOP]) {
/* First vertical edge is different in MBAFF frames /* First vertical edge is different in MBAFF frames
* There are 8 different bS to compute and 2 different Qp * There are 8 different bS to compute and 2 different Qp
*/ */
@ -677,8 +677,8 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
const uint8_t *off= offset[MB_FIELD][mb_y&1]; const uint8_t *off= offset[MB_FIELD][mb_y&1];
for( i = 0; i < 8; i++ ) { for( i = 0; i < 8; i++ ) {
int j= MB_FIELD ? i>>2 : i&1; int j= MB_FIELD ? i>>2 : i&1;
int mbn_xy = h->left_mb_xy[j]; int mbn_xy = h->left_mb_xy[LEFT(j)];
int mbn_type= h->left_type[j]; int mbn_type= h->left_type[LEFT(j)];
if( IS_INTRA( mbn_type ) ) if( IS_INTRA( mbn_type ) )
bS[i] = 4; bS[i] = 4;