diff --git a/libavcodec/h264.c b/libavcodec/h264.c index f61f524508..8d652f13ce 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -457,6 +457,7 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, const int full_my= my>>2; const int pic_width = 16*s->mb_width; const int pic_height = 16*s->mb_height >> MB_FIELD; + int ysh; if(mx&7) extra_width -= 3; if(my&7) extra_height -= 3; @@ -465,7 +466,8 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, || full_my < 0-extra_height || full_mx + 16/*FIXME*/ > pic_width + extra_width || full_my + 16/*FIXME*/ > pic_height + extra_height){ - s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); + s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, + 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize; emu=1; } @@ -502,25 +504,27 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, return; } - if(MB_FIELD){ + ysh = 3 - !!(CHROMA422); + if(!CHROMA422 && MB_FIELD){ // chroma offset when predicting from a field of opposite parity my += 2 * ((s->mb_y & 1) - (pic->f.reference - 1)); emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1); } - src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) + (my >> 3) * h->mb_uvlinesize; - src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) + (my >> 3) * h->mb_uvlinesize; + + src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) + (my >> ysh) * h->mb_uvlinesize; + src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) + (my >> ysh) * h->mb_uvlinesize; if(emu){ - s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); + s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, (16>>!(CHROMA422)) + 1, (mx>>3), (my>>ysh), pic_width>>1, pic_height>>!(CHROMA422)); src_cb= s->edge_emu_buffer; } - chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7); + chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height << !!(CHROMA422), mx&7, (my << !!(CHROMA422)) &7); if(emu){ - s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); + s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, (16>>!(CHROMA422)) + 1, (mx>>3), (my>>ysh), pic_width>>1, pic_height>>!(CHROMA422)); src_cr= s->edge_emu_buffer; } - chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7); + chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height << !!(CHROMA422), mx&7, (my << !!(CHROMA422)) &7); } static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta, @@ -537,6 +541,9 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei if(chroma444){ dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; + } else if (CHROMA422) { + dest_cb += ( x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize; + dest_cr += ( x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize; }else{ dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; @@ -577,6 +584,9 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom chroma_weight_op = luma_weight_op; dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize; + } else if (CHROMA422) { + dest_cb += ( x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize; + dest_cr += ( x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize; }else{ dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize; @@ -606,6 +616,14 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0); chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0); chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0); + if (CHROMA422) { + chroma_weight_avg(dest_cb + chroma_height * h->mb_uvlinesize, + tmp_cb + chroma_height * h->mb_uvlinesize, + h->mb_uvlinesize, 5, weight0, weight1, 0); + chroma_weight_avg(dest_cr + chroma_height * h->mb_uvlinesize, + tmp_cr + chroma_height * h->mb_uvlinesize, + h->mb_uvlinesize, 5, weight0, weight1, 0); + } }else{ luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom, h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0], @@ -616,6 +634,18 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0], h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]); + if (CHROMA422) { + chroma_weight_avg(dest_cb + chroma_height * h->mb_uvlinesize, + tmp_cb + chroma_height * h->mb_uvlinesize, + h->mb_uvlinesize, h->chroma_log2_weight_denom, + h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0], + h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]); + chroma_weight_avg(dest_cr + chroma_height * h->mb_uvlinesize, + tmp_cr + chroma_height * h->mb_uvlinesize, + h->mb_uvlinesize, h->chroma_log2_weight_denom, + h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0], + h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]); + } } }else{ int list = list1 ? 1 : 0; @@ -632,6 +662,14 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]); chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]); + if (CHROMA422) { + chroma_weight_op(dest_cb + chroma_height * h->mb_uvlinesize, + h->mb_uvlinesize, h->chroma_log2_weight_denom, + h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]); + chroma_weight_op(dest_cr + chroma_height * h->mb_uvlinesize, + h->mb_uvlinesize, h->chroma_log2_weight_denom, + h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]); + } } } } @@ -1851,13 +1889,13 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i } if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ if (!h->sps.chroma_format_idc) { - for (i = 0; i < 8; i++) { + for (i = 0; i < block_h; i++) { uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize); for (j = 0; j < 8; j++) { tmp_cb[j] = 1 << (bit_depth - 1); } } - for (i = 0; i < 8; i++) { + for (i = 0; i < block_h; i++) { uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize); for (j = 0; j < 8; j++) { tmp_cr[j] = 1 << (bit_depth - 1); @@ -1882,7 +1920,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i } if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ if (!h->sps.chroma_format_idc) { - for (i = 0; i < 8; i++) { + for (i = 0; i < block_h; i++) { memset(dest_cb + i*uvlinesize, 128, 8); memset(dest_cr + i*uvlinesize, 128, 8); } @@ -1931,6 +1969,12 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16)) idct_add (dest[j-1] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize); } + if (CHROMA422) { + for(i=j*16+4; inon_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16)) + idct_add (dest[j-1] + block_offset[i+4], h->mb + (i*16 << pixel_shift), uvlinesize); + } + } } } }else{ diff --git a/libavcodec/h264pred.c b/libavcodec/h264pred.c index 17199d01e6..37a4cf1486 100644 --- a/libavcodec/h264pred.c +++ b/libavcodec/h264pred.c @@ -462,10 +462,10 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, co h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x16_dc , depth);\ h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x16_left_dc , depth);\ h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x16_top_dc , depth);\ - h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\ - h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\ - h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\ - h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\ + h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_l0t, depth);\ + h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_0lt, depth);\ + h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_l00, depth);\ + h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_0l0, depth);\ }\ }else{\ h->pred8x8[DC_PRED8x8 ]= FUNCD(pred8x8_dc_rv40);\ @@ -510,8 +510,13 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, co h->pred4x4_add [ HOR_PRED ]= FUNCC(pred4x4_horizontal_add , depth);\ h->pred8x8l_add [VERT_PRED ]= FUNCC(pred8x8l_vertical_add , depth);\ h->pred8x8l_add [ HOR_PRED ]= FUNCC(pred8x8l_horizontal_add , depth);\ + if (chroma_format_idc == 1) {\ h->pred8x8_add [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add , depth);\ h->pred8x8_add [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add , depth);\ + } else {\ + h->pred8x8_add [VERT_PRED8x8]= FUNCC(pred8x16_vertical_add , depth);\ + h->pred8x8_add [ HOR_PRED8x8]= FUNCC(pred8x16_horizontal_add , depth);\ + }\ h->pred16x16_add[VERT_PRED8x8]= FUNCC(pred16x16_vertical_add , depth);\ h->pred16x16_add[ HOR_PRED8x8]= FUNCC(pred16x16_horizontal_add , depth);\ diff --git a/libavcodec/h264pred_template.c b/libavcodec/h264pred_template.c index d4f654e18c..318b56196d 100644 --- a/libavcodec/h264pred_template.c +++ b/libavcodec/h264pred_template.c @@ -657,29 +657,50 @@ static void FUNCC(pred8x16_dc)(uint8_t *_src, int stride){ } } -//the following 4 function should not be optimized! static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){ FUNCC(pred8x8_top_dc)(src, stride); FUNCC(pred4x4_dc)(src, NULL, stride); } +static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, int stride){ + FUNCC(pred8x16_top_dc)(src, stride); + FUNCC(pred4x4_dc)(src, NULL, stride); +} + static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, int stride){ FUNCC(pred8x8_dc)(src, stride); FUNCC(pred4x4_top_dc)(src, NULL, stride); } +static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, int stride){ + FUNCC(pred8x16_dc)(src, stride); + FUNCC(pred4x4_top_dc)(src, NULL, stride); +} + static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, int stride){ FUNCC(pred8x8_left_dc)(src, stride); FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride); FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride); } +static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, int stride){ + FUNCC(pred8x16_left_dc)(src, stride); + FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride); + FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride); +} + static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, int stride){ FUNCC(pred8x8_left_dc)(src, stride); FUNCC(pred4x4_128_dc)(src , NULL, stride); FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride); } +static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, int stride){ + FUNCC(pred8x16_left_dc)(src, stride); + FUNCC(pred4x4_128_dc)(src , NULL, stride); + FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride); +} + static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){ int j, k; int a; @@ -1126,8 +1147,24 @@ static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset, c FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); } +static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ + int i; + for(i=0; i<4; i++) + FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); + for(i=4; i<8; i++) + FUNCC(pred4x4_vertical_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride); +} + static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ int i; for(i=0; i<4; i++) FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); } + +static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ + int i; + for(i=0; i<4; i++) + FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); + for(i=4; i<8; i++) + FUNCC(pred4x4_horizontal_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride); +} diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c index 910ad8401f..06ee7cad43 100644 --- a/libavcodec/x86/h264dsp_mmx.c +++ b/libavcodec/x86/h264dsp_mmx.c @@ -354,7 +354,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom { int mm_flags = av_get_cpu_flags(); - if (mm_flags & AV_CPU_FLAG_MMX2) { + if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMX2) { c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2; }