From 3eeb7557637e8e48fbc64e844a94775edb496906 Mon Sep 17 00:00:00 2001 From: Christophe GISQUET Date: Mon, 2 Jan 2012 20:53:54 +0100 Subject: [PATCH] rv34: Inter/intra MB code split Split inter/intra macroblock handling code. This will allow further optimizations such as performing inverse transform and block reconstruction in a single pass as well as specialize code. Signed-off-by: Janne Grunau --- libavcodec/rv34.c | 250 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 194 insertions(+), 56 deletions(-) diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c index 48b5193f38..48f34b93e0 100644 --- a/libavcodec/rv34.c +++ b/libavcodec/rv34.c @@ -351,44 +351,70 @@ static inline RV34VLC* choose_vlc_set(int quant, int mod, int type) } /** - * Decode macroblock header and return CBP in case of success, -1 otherwise. + * Decode intra macroblock header and return CBP in case of success, -1 otherwise. */ -static int rv34_decode_mb_header(RV34DecContext *r, int8_t *intra_types) +static int rv34_decode_intra_mb_header(RV34DecContext *r, int8_t *intra_types) +{ + MpegEncContext *s = &r->s; + GetBitContext *gb = &s->gb; + int mb_pos = s->mb_x + s->mb_y * s->mb_stride; + int t; + + r->is16 = get_bits1(gb); + if(r->is16){ + s->current_picture_ptr->f.mb_type[mb_pos] = MB_TYPE_INTRA16x16; + r->block_type = RV34_MB_TYPE_INTRA16x16; + t = get_bits(gb, 2); + fill_rectangle(intra_types, 4, 4, r->intra_types_stride, t, sizeof(intra_types[0])); + r->luma_vlc = 2; + }else{ + if(!r->rv30){ + if(!get_bits1(gb)) + av_log(s->avctx, AV_LOG_ERROR, "Need DQUANT\n"); + } + s->current_picture_ptr->f.mb_type[mb_pos] = MB_TYPE_INTRA; + r->block_type = RV34_MB_TYPE_INTRA; + if(r->decode_intra_types(r, gb, intra_types) < 0) + return -1; + r->luma_vlc = 1; + } + + r->chroma_vlc = 0; + r->cur_vlcs = choose_vlc_set(r->si.quant, r->si.vlc_set, 0); + + return rv34_decode_cbp(gb, r->cur_vlcs, r->is16); +} + +/** + * Decode inter macroblock header and return CBP in case of success, -1 otherwise. + */ +static int rv34_decode_inter_mb_header(RV34DecContext *r, int8_t *intra_types) { MpegEncContext *s = &r->s; GetBitContext *gb = &s->gb; int mb_pos = s->mb_x + s->mb_y * s->mb_stride; int i, t; - if(!r->si.type){ - r->is16 = get_bits1(gb); - if(!r->is16 && !r->rv30){ - if(!get_bits1(gb)) - av_log(s->avctx, AV_LOG_ERROR, "Need DQUANT\n"); - } - s->current_picture_ptr->f.mb_type[mb_pos] = r->is16 ? MB_TYPE_INTRA16x16 : MB_TYPE_INTRA; - r->block_type = r->is16 ? RV34_MB_TYPE_INTRA16x16 : RV34_MB_TYPE_INTRA; - }else{ - r->block_type = r->decode_mb_info(r); - if(r->block_type == -1) - return -1; - s->current_picture_ptr->f.mb_type[mb_pos] = rv34_mb_type_to_lavc[r->block_type]; - r->mb_type[mb_pos] = r->block_type; - if(r->block_type == RV34_MB_SKIP){ - if(s->pict_type == AV_PICTURE_TYPE_P) - r->mb_type[mb_pos] = RV34_MB_P_16x16; - if(s->pict_type == AV_PICTURE_TYPE_B) - r->mb_type[mb_pos] = RV34_MB_B_DIRECT; - } - r->is16 = !!IS_INTRA16x16(s->current_picture_ptr->f.mb_type[mb_pos]); - rv34_decode_mv(r, r->block_type); - if(r->block_type == RV34_MB_SKIP){ - fill_rectangle(intra_types, 4, 4, r->intra_types_stride, 0, sizeof(intra_types[0])); - return 0; - } - r->chroma_vlc = 1; - r->luma_vlc = 0; + r->block_type = r->decode_mb_info(r); + if(r->block_type == -1) + return -1; + s->current_picture_ptr->f.mb_type[mb_pos] = rv34_mb_type_to_lavc[r->block_type]; + r->mb_type[mb_pos] = r->block_type; + if(r->block_type == RV34_MB_SKIP){ + if(s->pict_type == AV_PICTURE_TYPE_P) + r->mb_type[mb_pos] = RV34_MB_P_16x16; + if(s->pict_type == AV_PICTURE_TYPE_B) + r->mb_type[mb_pos] = RV34_MB_B_DIRECT; } + r->is16 = !!IS_INTRA16x16(s->current_picture_ptr->f.mb_type[mb_pos]); + rv34_decode_mv(r, r->block_type); + if(r->block_type == RV34_MB_SKIP){ + fill_rectangle(intra_types, 4, 4, r->intra_types_stride, 0, sizeof(intra_types[0])); + return 0; + } + r->chroma_vlc = 1; + r->luma_vlc = 0; + if(IS_INTRA(s->current_picture_ptr->f.mb_type[mb_pos])){ if(r->is16){ t = get_bits(gb, 2); @@ -1123,7 +1149,7 @@ static int rv34_set_deblock_coef(RV34DecContext *r) return hmvmask | vmvmask; } -static int rv34_decode_macroblock(RV34DecContext *r, int8_t *intra_types) +static int rv34_decode_inter_macroblock(RV34DecContext *r, int8_t *intra_types) { MpegEncContext *s = &r->s; GetBitContext *gb = &s->gb; @@ -1131,7 +1157,6 @@ static int rv34_decode_macroblock(RV34DecContext *r, int8_t *intra_types) int q_dc, q_ac, has_ac; int i, blknum, blkoff; LOCAL_ALIGNED_16(DCTELEM, block16, [64]); - int luma_dc_quant; int dist; int mb_pos = s->mb_x + s->mb_y * s->mb_stride; @@ -1151,20 +1176,19 @@ static int rv34_decode_macroblock(RV34DecContext *r, int8_t *intra_types) r->avail_cache[1] = s->current_picture_ptr->f.mb_type[mb_pos - s->mb_stride - 1]; s->qscale = r->si.quant; - cbp = cbp2 = rv34_decode_mb_header(r, intra_types); + cbp = cbp2 = rv34_decode_inter_mb_header(r, intra_types); r->cbp_luma [mb_pos] = cbp; r->cbp_chroma[mb_pos] = cbp >> 16; - if(s->pict_type == AV_PICTURE_TYPE_I) - r->deblock_coefs[mb_pos] = 0xFFFF; - else - r->deblock_coefs[mb_pos] = rv34_set_deblock_coef(r) | r->cbp_luma[mb_pos]; + r->deblock_coefs[mb_pos] = rv34_set_deblock_coef(r) | r->cbp_luma[mb_pos]; s->current_picture_ptr->f.qscale_table[mb_pos] = s->qscale; if(cbp == -1) return -1; - luma_dc_quant = r->block_type == RV34_MB_P_MIX16x16 ? r->luma_dc_quant_p[s->qscale] : r->luma_dc_quant_i[s->qscale]; if(r->is16){ + int luma_dc_quant = r->block_type == RV34_MB_P_MIX16x16 + ? r->luma_dc_quant_p[s->qscale] + : r->luma_dc_quant_i[s->qscale]; q_dc = rv34_qscale_tab[luma_dc_quant]; q_ac = rv34_qscale_tab[s->qscale]; s->dsp.clear_block(block16); @@ -1172,25 +1196,37 @@ static int rv34_decode_macroblock(RV34DecContext *r, int8_t *intra_types) r->rdsp.rv34_inv_transform_tab[1](block16); else r->rdsp.rv34_inv_transform_dc_tab[1](block16); - } - q_ac = rv34_qscale_tab[s->qscale]; - for(i = 0; i < 16; i++, cbp >>= 1){ - DCTELEM *ptr; - if(!r->is16 && !(cbp & 1)) continue; - blknum = ((i & 2) >> 1) + ((i & 8) >> 2); - blkoff = ((i & 1) << 2) + ((i & 4) << 3); - ptr = s->block[blknum] + blkoff; - if(cbp & 1) - has_ac = rv34_decode_block(ptr, gb, r->cur_vlcs, r->luma_vlc, 0, q_ac, q_ac, q_ac); - else - has_ac = 0; - if(r->is16) //FIXME: optimize + q_ac = rv34_qscale_tab[s->qscale]; + for(i = 0; i < 16; i++, cbp >>= 1){ + DCTELEM *ptr; + blknum = ((i & 2) >> 1) + ((i & 8) >> 2); + blkoff = ((i & 1) << 2) + ((i & 4) << 3); + ptr = s->block[blknum] + blkoff; + if(cbp & 1) + has_ac = rv34_decode_block(ptr, gb, r->cur_vlcs, r->luma_vlc, 0, q_ac, q_ac, q_ac); + else + has_ac = 0; ptr[0] = block16[(i & 3) | ((i & 0xC) << 1)]; - if(has_ac) - r->rdsp.rv34_inv_transform_tab[0](ptr); - else - r->rdsp.rv34_inv_transform_dc_tab[0](ptr); + if(has_ac) + r->rdsp.rv34_inv_transform_tab[0](ptr); + else + r->rdsp.rv34_inv_transform_dc_tab[0](ptr); + } + }else{ + q_ac = rv34_qscale_tab[s->qscale]; + for(i = 0; i < 16; i++, cbp >>= 1){ + DCTELEM *ptr; + if(!(cbp & 1)) continue; + blknum = ((i & 2) >> 1) + ((i & 8) >> 2); + blkoff = ((i & 1) << 2) + ((i & 4) << 3); + ptr = s->block[blknum] + blkoff; + has_ac = rv34_decode_block(ptr, gb, r->cur_vlcs, r->luma_vlc, 0, q_ac, q_ac, q_ac); + if(has_ac) + r->rdsp.rv34_inv_transform_tab[0](ptr); + else + r->rdsp.rv34_inv_transform_dc_tab[0](ptr); + } } if(r->block_type == RV34_MB_P_MIX16x16) r->cur_vlcs = choose_vlc_set(r->si.quant, r->si.vlc_set, 1); @@ -1215,6 +1251,104 @@ static int rv34_decode_macroblock(RV34DecContext *r, int8_t *intra_types) return 0; } +static int rv34_decode_intra_macroblock(RV34DecContext *r, int8_t *intra_types) +{ + MpegEncContext *s = &r->s; + GetBitContext *gb = &s->gb; + int cbp, cbp2; + int q_dc, q_ac, has_ac; + int i, blknum, blkoff; + LOCAL_ALIGNED_16(DCTELEM, block16, [64]); + int dist; + int mb_pos = s->mb_x + s->mb_y * s->mb_stride; + + // Calculate which neighbours are available. Maybe it's worth optimizing too. + memset(r->avail_cache, 0, sizeof(r->avail_cache)); + fill_rectangle(r->avail_cache + 6, 2, 2, 4, 1, 4); + dist = (s->mb_x - s->resync_mb_x) + (s->mb_y - s->resync_mb_y) * s->mb_width; + if(s->mb_x && dist) + r->avail_cache[5] = + r->avail_cache[9] = s->current_picture_ptr->f.mb_type[mb_pos - 1]; + if(dist >= s->mb_width) + r->avail_cache[2] = + r->avail_cache[3] = s->current_picture_ptr->f.mb_type[mb_pos - s->mb_stride]; + if(((s->mb_x+1) < s->mb_width) && dist >= s->mb_width - 1) + r->avail_cache[4] = s->current_picture_ptr->f.mb_type[mb_pos - s->mb_stride + 1]; + if(s->mb_x && dist > s->mb_width) + r->avail_cache[1] = s->current_picture_ptr->f.mb_type[mb_pos - s->mb_stride - 1]; + + s->qscale = r->si.quant; + cbp = cbp2 = rv34_decode_intra_mb_header(r, intra_types); + r->cbp_luma [mb_pos] = cbp; + r->cbp_chroma[mb_pos] = cbp >> 16; + r->deblock_coefs[mb_pos] = 0xFFFF; + s->current_picture_ptr->f.qscale_table[mb_pos] = s->qscale; + + if(cbp == -1) + return -1; + + if(r->is16){ + int luma_dc_quant = r->block_type == RV34_MB_P_MIX16x16 + ? r->luma_dc_quant_p[s->qscale] + : r->luma_dc_quant_i[s->qscale]; + q_dc = rv34_qscale_tab[luma_dc_quant]; + q_ac = rv34_qscale_tab[s->qscale]; + s->dsp.clear_block(block16); + if (rv34_decode_block(block16, gb, r->cur_vlcs, 3, 0, q_dc, q_dc, q_ac)) + r->rdsp.rv34_inv_transform_tab[1](block16); + else + r->rdsp.rv34_inv_transform_dc_tab[1](block16); + + q_ac = rv34_qscale_tab[s->qscale]; + for(i = 0; i < 16; i++, cbp >>= 1){ + DCTELEM *ptr; + blknum = ((i & 2) >> 1) + ((i & 8) >> 2); + blkoff = ((i & 1) << 2) + ((i & 4) << 3); + ptr = s->block[blknum] + blkoff; + if(cbp & 1) + has_ac = rv34_decode_block(ptr, gb, r->cur_vlcs, r->luma_vlc, 0, q_ac, q_ac, q_ac); + else + has_ac = 0; + ptr[0] = block16[(i & 3) | ((i & 0xC) << 1)]; + if(has_ac) + r->rdsp.rv34_inv_transform_tab[0](ptr); + else + r->rdsp.rv34_inv_transform_dc_tab[0](ptr); + } + }else{ + q_ac = rv34_qscale_tab[s->qscale]; + for(i = 0; i < 16; i++, cbp >>= 1){ + DCTELEM *ptr; + if(!(cbp & 1)) continue; + blknum = ((i & 2) >> 1) + ((i & 8) >> 2); + blkoff = ((i & 1) << 2) + ((i & 4) << 3); + ptr = s->block[blknum] + blkoff; + has_ac = rv34_decode_block(ptr, gb, r->cur_vlcs, r->luma_vlc, 0, q_ac, q_ac, q_ac); + if(has_ac) + r->rdsp.rv34_inv_transform_tab[0](ptr); + else + r->rdsp.rv34_inv_transform_dc_tab[0](ptr); + } + } + + q_dc = rv34_qscale_tab[rv34_chroma_quant[1][s->qscale]]; + q_ac = rv34_qscale_tab[rv34_chroma_quant[0][s->qscale]]; + for(; i < 24; i++, cbp >>= 1){ + DCTELEM *ptr; + if(!(cbp & 1)) continue; + blknum = ((i & 4) >> 2) + 4; + blkoff = ((i & 1) << 2) + ((i & 2) << 4); + ptr = s->block[blknum] + blkoff; + if (rv34_decode_block(ptr, gb, r->cur_vlcs, r->chroma_vlc, 1, q_dc, q_ac, q_ac)) + r->rdsp.rv34_inv_transform_tab[0](ptr); + else + r->rdsp.rv34_inv_transform_dc_tab[0](ptr); + } + rv34_output_macroblock(r, intra_types, cbp2, r->is16); + + return 0; +} + static int check_slice_end(RV34DecContext *r, MpegEncContext *s) { int bits; @@ -1324,7 +1458,11 @@ static int rv34_decode_slice(RV34DecContext *r, int end, const uint8_t* buf, int ff_update_block_index(s); s->dsp.clear_blocks(s->block[0]); - if(rv34_decode_macroblock(r, r->intra_types + s->mb_x * 4 + 4) < 0){ + if(r->si.type) + res = rv34_decode_inter_macroblock(r, r->intra_types + s->mb_x * 4 + 4); + else + res = rv34_decode_intra_macroblock(r, r->intra_types + s->mb_x * 4 + 4); + if(res < 0){ ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, ER_MB_ERROR); return -1; }