From 83976e40e89655162e5394cf8915d9b6d89702d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Raulet?= Date: Sat, 26 Apr 2014 15:35:23 +0200 Subject: [PATCH] hevc: C code update for new motion compensation pretty print C Reviewed-by: "Ronald S. Bultje" Signed-off-by: Michael Niedermayer --- libavcodec/hevc.c | 536 +++++++++------- libavcodec/hevc.h | 19 +- libavcodec/hevc_cabac.c | 4 +- libavcodec/hevc_filter.c | 315 +++++----- libavcodec/hevc_mvs.c | 109 ++-- libavcodec/hevcdsp.c | 125 ++-- libavcodec/hevcdsp.h | 49 +- libavcodec/hevcdsp_template.c | 1071 ++++++++++++++++++++++++++------ libavcodec/hevcpred_template.c | 12 +- 9 files changed, 1509 insertions(+), 731 deletions(-) diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c index 9ee0d265b6..369f0fc3b6 100644 --- a/libavcodec/hevc.c +++ b/libavcodec/hevc.c @@ -38,9 +38,7 @@ #include "golomb.h" #include "hevc.h" -const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 }; -const uint8_t ff_hevc_qpel_extra_after[4] = { 0, 3, 4, 4 }; -const uint8_t ff_hevc_qpel_extra[4] = { 0, 6, 7, 6 }; +const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 }; /** * NOTE: Each function hls_foo correspond to the function foo in the @@ -126,7 +124,7 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps) goto fail; s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField), - av_buffer_alloc); + av_buffer_allocz); s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab), av_buffer_allocz); if (!s->tab_mvf_pool || !s->rpl_tab_pool) @@ -151,7 +149,7 @@ static void pred_weight_table(HEVCContext *s, GetBitContext *gb) s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb); if (s->sps->chroma_format_idc != 0) { int delta = get_se_golomb(gb); - s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7); + s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7); } for (i = 0; i < s->sh.nb_refs[L0]; i++) { @@ -179,7 +177,7 @@ static void pred_weight_table(HEVCContext *s, GetBitContext *gb) int delta_chroma_weight_l0 = get_se_golomb(gb); int delta_chroma_offset_l0 = get_se_golomb(gb); s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0; - s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j]) + s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j]) >> s->sh.chroma_log2_weight_denom) + 128), -128, 127); } } else { @@ -215,7 +213,7 @@ static void pred_weight_table(HEVCContext *s, GetBitContext *gb) int delta_chroma_weight_l1 = get_se_golomb(gb); int delta_chroma_offset_l1 = get_se_golomb(gb); s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1; - s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j]) + s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j]) >> s->sh.chroma_log2_weight_denom) + 128), -128, 127); } } else { @@ -1037,9 +1035,7 @@ static int hls_transform_tree(HEVCContext *s, int x0, int y0, } } if (!s->sh.disable_deblocking_filter_flag) { - ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size, - lc->slice_or_tiles_up_boundary, - lc->slice_or_tiles_left_boundary); + ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size); if (s->pps->transquant_bypass_enable_flag && lc->cu.cu_transquant_bypass_flag) set_deblocking_bypass(s, x0, y0, log2_trafo_size); @@ -1065,9 +1061,8 @@ static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size) const uint8_t *pcm = skip_bytes(&s->HEVClc->cc, (length + 7) >> 3); int ret; - ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size, - lc->slice_or_tiles_up_boundary, - lc->slice_or_tiles_left_boundary); + if (!s->sh.disable_deblocking_filter_flag) + ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size); ret = init_get_bits(&gb, pcm, length); if (ret < 0) @@ -1080,7 +1075,7 @@ static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size) } /** - * 8.5.3.2.2.1 Luma sample interpolation process + * 8.5.3.2.2.1 Luma sample unidirectional interpolation process * * @param s HEVC decoding context * @param dst target buffer for block data at block position @@ -1091,49 +1086,148 @@ static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size) * @param y_off vertical position of block from origin (0, 0) * @param block_w width of block * @param block_h height of block + * @param luma_weight weighting factor applied to the luma prediction + * @param luma_offset additive offset applied to the luma prediction value */ -static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride, - AVFrame *ref, const Mv *mv, int x_off, int y_off, - int block_w, int block_h) + +static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, + AVFrame *ref, const Mv *mv, int x_off, int y_off, + int block_w, int block_h, int luma_weight, int luma_offset) { HEVCLocalContext *lc = s->HEVClc; uint8_t *src = ref->data[0]; ptrdiff_t srcstride = ref->linesize[0]; int pic_width = s->sps->width; int pic_height = s->sps->height; - - int mx = mv->x & 3; - int my = mv->y & 3; - int extra_left = ff_hevc_qpel_extra_before[mx]; - int extra_top = ff_hevc_qpel_extra_before[my]; + int mx = mv->x & 3; + int my = mv->y & 3; + int weight_flag = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) || + (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag); + int idx = ff_hevc_pel_weight[block_w]; x_off += mv->x >> 2; y_off += mv->y >> 2; src += y_off * srcstride + (x_off << s->sps->pixel_shift); - if (x_off < extra_left || y_off < extra_top || - x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] || - y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) { + if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER || + x_off >= pic_width - block_w - QPEL_EXTRA_AFTER || + y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) { const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift; - int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift); - int buf_offset = extra_top * - edge_emu_stride + (extra_left << s->sps->pixel_shift); + int offset = QPEL_EXTRA_BEFORE * srcstride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift); + int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift); s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset, edge_emu_stride, srcstride, - block_w + ff_hevc_qpel_extra[mx], - block_h + ff_hevc_qpel_extra[my], - x_off - extra_left, y_off - extra_top, + block_w + QPEL_EXTRA, + block_h + QPEL_EXTRA, + x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE, pic_width, pic_height); src = lc->edge_emu_buffer + buf_offset; srcstride = edge_emu_stride; } - s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w, - block_h, lc->mc_buffer); + + if (!weight_flag) + s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride, + block_h, mx, my, block_w); + else + s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride, + block_h, s->sh.luma_log2_weight_denom, + luma_weight, luma_offset, mx, my, block_w); } /** - * 8.5.3.2.2.2 Chroma sample interpolation process + * 8.5.3.2.2.1 Luma sample bidirectional interpolation process + * + * @param s HEVC decoding context + * @param dst target buffer for block data at block position + * @param dststride stride of the dst buffer + * @param ref0 reference picture0 buffer at origin (0, 0) + * @param mv0 motion vector0 (relative to block position) to get pixel data from + * @param x_off horizontal position of block from origin (0, 0) + * @param y_off vertical position of block from origin (0, 0) + * @param block_w width of block + * @param block_h height of block + * @param ref1 reference picture1 buffer at origin (0, 0) + * @param mv1 motion vector1 (relative to block position) to get pixel data from + * @param current_mv current motion vector structure + */ + static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, + AVFrame *ref0, const Mv *mv0, int x_off, int y_off, + int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv) +{ + HEVCLocalContext *lc = s->HEVClc; + DECLARE_ALIGNED(16, int16_t, tmp[MAX_PB_SIZE * MAX_PB_SIZE]); + ptrdiff_t src0stride = ref0->linesize[0]; + ptrdiff_t src1stride = ref1->linesize[0]; + int pic_width = s->sps->width; + int pic_height = s->sps->height; + int mx0 = mv0->x & 3; + int my0 = mv0->y & 3; + int mx1 = mv1->x & 3; + int my1 = mv1->y & 3; + int weight_flag = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) || + (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag); + int x_off0 = x_off + (mv0->x >> 2); + int y_off0 = y_off + (mv0->y >> 2); + int x_off1 = x_off + (mv1->x >> 2); + int y_off1 = y_off + (mv1->y >> 2); + int idx = ff_hevc_pel_weight[block_w]; + + uint8_t *src0 = ref0->data[0] + y_off0 * src0stride + (x_off0 << s->sps->pixel_shift); + uint8_t *src1 = ref1->data[0] + y_off1 * src1stride + (x_off1 << s->sps->pixel_shift); + + if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER || + x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER || + y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) { + const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift; + int offset = QPEL_EXTRA_BEFORE * src0stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift); + int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift); + + s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset, + edge_emu_stride, src0stride, + block_w + QPEL_EXTRA, + block_h + QPEL_EXTRA, + x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE, + pic_width, pic_height); + src0 = lc->edge_emu_buffer + buf_offset; + src0stride = edge_emu_stride; + } + + if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER || + x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER || + y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) { + const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift; + int offset = QPEL_EXTRA_BEFORE * src1stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift); + int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift); + + s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset, + edge_emu_stride, src1stride, + block_w + QPEL_EXTRA, + block_h + QPEL_EXTRA, + x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE, + pic_width, pic_height); + src1 = lc->edge_emu_buffer2 + buf_offset; + src1stride = edge_emu_stride; + } + + s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](tmp, MAX_PB_SIZE, src0, src0stride, + block_h, mx0, my0, block_w); + if (!weight_flag) + s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, tmp, MAX_PB_SIZE, + block_h, mx1, my1, block_w); + else + s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, tmp, MAX_PB_SIZE, + block_h, s->sh.luma_log2_weight_denom, + s->sh.luma_weight_l0[current_mv->ref_idx[0]], + s->sh.luma_weight_l1[current_mv->ref_idx[1]], + s->sh.luma_offset_l0[current_mv->ref_idx[0]], + s->sh.luma_offset_l1[current_mv->ref_idx[1]], + mx1, my1, block_w); + +} + +/** + * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process * * @param s HEVC decoding context * @param dst1 target buffer for block data at block position (U plane) @@ -1145,70 +1239,165 @@ static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride, * @param y_off vertical position of block from origin (0, 0) * @param block_w width of block * @param block_h height of block + * @param chroma_weight weighting factor applied to the chroma prediction + * @param chroma_offset additive offset applied to the chroma prediction value */ -static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2, - ptrdiff_t dststride, AVFrame *ref, const Mv *mv, - int x_off, int y_off, int block_w, int block_h) + +static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0, + ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist, + int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset) { HEVCLocalContext *lc = s->HEVClc; - uint8_t *src1 = ref->data[1]; - uint8_t *src2 = ref->data[2]; - ptrdiff_t src1stride = ref->linesize[1]; - ptrdiff_t src2stride = ref->linesize[2]; - int pic_width = s->sps->width >> 1; - int pic_height = s->sps->height >> 1; + int pic_width = s->sps->width >> s->sps->hshift[1]; + int pic_height = s->sps->height >> s->sps->vshift[1]; + const Mv *mv = ¤t_mv->mv[reflist]; + int weight_flag = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) || + (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag); + int idx = ff_hevc_pel_weight[block_w]; + int hshift = s->sps->hshift[1]; + int vshift = s->sps->vshift[1]; + intptr_t mx = mv->x & ((1 << (2 + hshift)) - 1); + intptr_t my = mv->y & ((1 << (2 + vshift)) - 1); + intptr_t _mx = mx << (1 - hshift); + intptr_t _my = my << (1 - vshift); - int mx = mv->x & 7; - int my = mv->y & 7; - - x_off += mv->x >> 3; - y_off += mv->y >> 3; - src1 += y_off * src1stride + (x_off << s->sps->pixel_shift); - src2 += y_off * src2stride + (x_off << s->sps->pixel_shift); + x_off += mv->x >> (2 + hshift); + y_off += mv->y >> (2 + vshift); + src0 += y_off * srcstride + (x_off << s->sps->pixel_shift); if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER || x_off >= pic_width - block_w - EPEL_EXTRA_AFTER || y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) { const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift; + int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->sps->pixel_shift)); + int buf_offset0 = EPEL_EXTRA_BEFORE * + (edge_emu_stride + (1 << s->sps->pixel_shift)); + s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0, + edge_emu_stride, srcstride, + block_w + EPEL_EXTRA, block_h + EPEL_EXTRA, + x_off - EPEL_EXTRA_BEFORE, + y_off - EPEL_EXTRA_BEFORE, + pic_width, pic_height); + + src0 = lc->edge_emu_buffer + buf_offset0; + srcstride = edge_emu_stride; + } + if (!weight_flag) + s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride, + block_h, _mx, _my, block_w); + else + s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride, + block_h, s->sh.chroma_log2_weight_denom, + chroma_weight, chroma_offset, _mx, _my, block_w); +} + +/** + * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process + * + * @param s HEVC decoding context + * @param dst target buffer for block data at block position + * @param dststride stride of the dst buffer + * @param ref0 reference picture0 buffer at origin (0, 0) + * @param mv0 motion vector0 (relative to block position) to get pixel data from + * @param x_off horizontal position of block from origin (0, 0) + * @param y_off vertical position of block from origin (0, 0) + * @param block_w width of block + * @param block_h height of block + * @param ref1 reference picture1 buffer at origin (0, 0) + * @param mv1 motion vector1 (relative to block position) to get pixel data from + * @param current_mv current motion vector structure + * @param cidx chroma component(cb, cr) + */ +static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1, + int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx) +{ + DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]); + int tmpstride = MAX_PB_SIZE; + HEVCLocalContext *lc = s->HEVClc; + uint8_t *src1 = ref0->data[cidx+1]; + uint8_t *src2 = ref1->data[cidx+1]; + ptrdiff_t src1stride = ref0->linesize[cidx+1]; + ptrdiff_t src2stride = ref1->linesize[cidx+1]; + int weight_flag = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) || + (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag); + int pic_width = s->sps->width >> s->sps->hshift[1]; + int pic_height = s->sps->height >> s->sps->vshift[1]; + Mv *mv0 = ¤t_mv->mv[0]; + Mv *mv1 = ¤t_mv->mv[1]; + int hshift = s->sps->hshift[1]; + int vshift = s->sps->vshift[1]; + + intptr_t mx0 = mv0->x & ((1 << (2 + hshift)) - 1); + intptr_t my0 = mv0->y & ((1 << (2 + vshift)) - 1); + intptr_t mx1 = mv1->x & ((1 << (2 + hshift)) - 1); + intptr_t my1 = mv1->y & ((1 << (2 + vshift)) - 1); + intptr_t _mx0 = mx0 << (1 - hshift); + intptr_t _my0 = my0 << (1 - vshift); + intptr_t _mx1 = mx1 << (1 - hshift); + intptr_t _my1 = my1 << (1 - vshift); + + int x_off0 = x_off + (mv0->x >> (2 + hshift)); + int y_off0 = y_off + (mv0->y >> (2 + vshift)); + int x_off1 = x_off + (mv1->x >> (2 + hshift)); + int y_off1 = y_off + (mv1->y >> (2 + vshift)); + int idx = ff_hevc_pel_weight[block_w]; + src1 += y_off0 * src1stride + (x_off0 << s->sps->pixel_shift); + src2 += y_off1 * src2stride + (x_off1 << s->sps->pixel_shift); + + if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER || + x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER || + y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) { + const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift; int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift)); int buf_offset1 = EPEL_EXTRA_BEFORE * (edge_emu_stride + (1 << s->sps->pixel_shift)); - int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift)); - int buf_offset2 = EPEL_EXTRA_BEFORE * - (edge_emu_stride + (1 << s->sps->pixel_shift)); s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1, edge_emu_stride, src1stride, block_w + EPEL_EXTRA, block_h + EPEL_EXTRA, - x_off - EPEL_EXTRA_BEFORE, - y_off - EPEL_EXTRA_BEFORE, + x_off0 - EPEL_EXTRA_BEFORE, + y_off0 - EPEL_EXTRA_BEFORE, pic_width, pic_height); src1 = lc->edge_emu_buffer + buf_offset1; src1stride = edge_emu_stride; - s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride, - block_w, block_h, mx, my, lc->mc_buffer); + } - s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2, + if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER || + x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER || + y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) { + const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift; + int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift)); + int buf_offset1 = EPEL_EXTRA_BEFORE * + (edge_emu_stride + (1 << s->sps->pixel_shift)); + + s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1, edge_emu_stride, src2stride, block_w + EPEL_EXTRA, block_h + EPEL_EXTRA, - x_off - EPEL_EXTRA_BEFORE, - y_off - EPEL_EXTRA_BEFORE, + x_off1 - EPEL_EXTRA_BEFORE, + y_off1 - EPEL_EXTRA_BEFORE, pic_width, pic_height); - src2 = lc->edge_emu_buffer + buf_offset2; - src2stride = edge_emu_stride; - s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride, - block_w, block_h, mx, my, - lc->mc_buffer); - } else { - s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride, - block_w, block_h, mx, my, - lc->mc_buffer); - s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride, - block_w, block_h, mx, my, - lc->mc_buffer); + src2 = lc->edge_emu_buffer2 + buf_offset1; + src2stride = edge_emu_stride; } + + s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](tmp, tmpstride, src1, src1stride, + block_h, _mx0, _my0, block_w); + if (!weight_flag) + s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1], + src2, src2stride, tmp, tmpstride, + block_h, _mx1, _my1, block_w); + else + s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1], + src2, src2stride, tmp, tmpstride, + block_h, + s->sh.chroma_log2_weight_denom, + s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx], + s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx], + s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx], + s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx], + _mx1, _my1, block_w); } static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref, @@ -1236,9 +1425,6 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, MvField *tab_mvf = s->ref->tab_mvf; RefPicList *refPicList = s->ref->refPicList; HEVCFrame *ref0, *ref1; - - int tmpstride = MAX_PB_SIZE; - uint8_t *dst0 = POS(0, x0, y0); uint8_t *dst1 = POS(1, x0, y0); uint8_t *dst2 = POS(2, x0, y0); @@ -1287,6 +1473,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, } else { enum InterPredIdc inter_pred_idc = PRED_L0; ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH); + current_mv.pred_flag = 0; if (s->sh.slice_type == B_SLICE) inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH); @@ -1295,7 +1482,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]); current_mv.ref_idx[0] = ref_idx[0]; } - current_mv.pred_flag[0] = 1; + current_mv.pred_flag = PF_L0; ff_hevc_hls_mvd_coding(s, x0, y0, 0); mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s); ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size, @@ -1318,7 +1505,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ff_hevc_hls_mvd_coding(s, x0, y0, 1); } - current_mv.pred_flag[1] = 1; + current_mv.pred_flag += PF_L1; mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s); ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size, partIdx, merge_idx, ¤t_mv, @@ -1336,148 +1523,69 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, } } - if (current_mv.pred_flag[0]) { + if (current_mv.pred_flag & PF_L0) { ref0 = refPicList[0].ref[current_mv.ref_idx[0]]; if (!ref0) return; hevc_await_progress(s, ref0, ¤t_mv.mv[0], y0, nPbH); } - if (current_mv.pred_flag[1]) { + if (current_mv.pred_flag & PF_L1) { ref1 = refPicList[1].ref[current_mv.ref_idx[1]]; if (!ref1) return; hevc_await_progress(s, ref1, ¤t_mv.mv[1], y0, nPbH); } - if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) { - DECLARE_ALIGNED(16, int16_t, tmp[MAX_PB_SIZE * MAX_PB_SIZE]); - DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]); + if (current_mv.pred_flag == PF_L0) { + int x0_c = x0 >> s->sps->hshift[1]; + int y0_c = y0 >> s->sps->vshift[1]; + int nPbW_c = nPbW >> s->sps->hshift[1]; + int nPbH_c = nPbH >> s->sps->vshift[1]; - luma_mc(s, tmp, tmpstride, ref0->frame, - ¤t_mv.mv[0], x0, y0, nPbW, nPbH); + luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame, + ¤t_mv.mv[0], x0, y0, nPbW, nPbH, + s->sh.luma_weight_l0[current_mv.ref_idx[0]], + s->sh.luma_offset_l0[current_mv.ref_idx[0]]); - if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) || - (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) { - s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom, - s->sh.luma_weight_l0[current_mv.ref_idx[0]], - s->sh.luma_offset_l0[current_mv.ref_idx[0]], - dst0, s->frame->linesize[0], tmp, - tmpstride, nPbW, nPbH); - } else { - s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH); - } - chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame, - ¤t_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2); + chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1], + 0, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, + s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]); + chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2], + 0, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, + s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]); + } else if (current_mv.pred_flag == PF_L1) { + int x0_c = x0 >> s->sps->hshift[1]; + int y0_c = y0 >> s->sps->vshift[1]; + int nPbW_c = nPbW >> s->sps->hshift[1]; + int nPbH_c = nPbH >> s->sps->vshift[1]; - if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) || - (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) { - s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom, - s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], - s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0], - dst1, s->frame->linesize[1], tmp, tmpstride, - nPbW / 2, nPbH / 2); - s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom, - s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], - s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1], - dst2, s->frame->linesize[2], tmp2, tmpstride, - nPbW / 2, nPbH / 2); - } else { - s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2); - s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2); - } - } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) { - DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]); - DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]); + luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame, + ¤t_mv.mv[1], x0, y0, nPbW, nPbH, + s->sh.luma_weight_l1[current_mv.ref_idx[1]], + s->sh.luma_offset_l1[current_mv.ref_idx[1]]); - if (!ref1) - return; + chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1], + 1, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, + s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]); - luma_mc(s, tmp, tmpstride, ref1->frame, - ¤t_mv.mv[1], x0, y0, nPbW, nPbH); + chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2], + 1, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, + s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]); + } else if (current_mv.pred_flag == PF_BI) { + int x0_c = x0 >> s->sps->hshift[1]; + int y0_c = y0 >> s->sps->vshift[1]; + int nPbW_c = nPbW >> s->sps->hshift[1]; + int nPbH_c = nPbH >> s->sps->vshift[1]; - if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) || - (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) { - s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom, - s->sh.luma_weight_l1[current_mv.ref_idx[1]], - s->sh.luma_offset_l1[current_mv.ref_idx[1]], - dst0, s->frame->linesize[0], tmp, tmpstride, - nPbW, nPbH); - } else { - s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH); - } + luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame, + ¤t_mv.mv[0], x0, y0, nPbW, nPbH, + ref1->frame, ¤t_mv.mv[1], ¤t_mv); - chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame, - ¤t_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2); + chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame, + x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, 0); - if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) || - (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) { - s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom, - s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], - s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0], - dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2); - s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom, - s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], - s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1], - dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2); - } else { - s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2); - s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2); - } - } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) { - DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]); - DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]); - DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]); - DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]); - HEVCFrame *ref0 = refPicList[0].ref[current_mv.ref_idx[0]]; - HEVCFrame *ref1 = refPicList[1].ref[current_mv.ref_idx[1]]; - - if (!ref0 || !ref1) - return; - - luma_mc(s, tmp, tmpstride, ref0->frame, - ¤t_mv.mv[0], x0, y0, nPbW, nPbH); - luma_mc(s, tmp2, tmpstride, ref1->frame, - ¤t_mv.mv[1], x0, y0, nPbW, nPbH); - - if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) || - (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) { - s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom, - s->sh.luma_weight_l0[current_mv.ref_idx[0]], - s->sh.luma_weight_l1[current_mv.ref_idx[1]], - s->sh.luma_offset_l0[current_mv.ref_idx[0]], - s->sh.luma_offset_l1[current_mv.ref_idx[1]], - dst0, s->frame->linesize[0], - tmp, tmp2, tmpstride, nPbW, nPbH); - } else { - s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0], - tmp, tmp2, tmpstride, nPbW, nPbH); - } - - chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame, - ¤t_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2); - chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame, - ¤t_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2); - - if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) || - (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) { - s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom, - s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], - s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], - s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0], - s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0], - dst1, s->frame->linesize[1], tmp, tmp3, - tmpstride, nPbW / 2, nPbH / 2); - s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom, - s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], - s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], - s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1], - s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1], - dst2, s->frame->linesize[2], tmp2, tmp4, - tmpstride, nPbW / 2, nPbH / 2); - } else { - s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2); - s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2); - } + chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame, + x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, 1); } } @@ -1557,15 +1665,7 @@ static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size, intra_pred_mode, size_in_pus); for (j = 0; j < size_in_pus; j++) { - tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra = 1; - tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0; - tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0; - tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0] = 0; - tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1] = 0; - tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x = 0; - tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y = 0; - tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x = 0; - tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y = 0; + tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA; } } @@ -1640,11 +1740,12 @@ static void intra_prediction_unit_default_value(HEVCContext *s, if (size_in_pus == 0) size_in_pus = 1; - for (j = 0; j < size_in_pus; j++) { + for (j = 0; j < size_in_pus; j++) memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus); - for (k = 0; k < size_in_pus; k++) - tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA; - } + if (lc->cu.pred_mode == MODE_INTRA) + for (j = 0; j < size_in_pus; j++) + for (k = 0; k < size_in_pus; k++) + tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA; } static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size) @@ -1680,7 +1781,6 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size) if (s->sh.slice_type != I_SLICE) { uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb); - lc->cu.pred_mode = MODE_SKIP; x = y_cb * min_cb_width + x_cb; for (y = 0; y < length; y++) { memset(&s->skip_flag[x], skip_flag, length); @@ -1694,9 +1794,7 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size) intra_prediction_unit_default_value(s, x0, y0, log2_cb_size); if (!s->sh.disable_deblocking_filter_flag) - ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size, - lc->slice_or_tiles_up_boundary, - lc->slice_or_tiles_left_boundary); + ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size); } else { if (s->sh.slice_type != I_SLICE) lc->cu.pred_mode = ff_hevc_pred_mode_decode(s); @@ -1779,9 +1877,7 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size) return ret; } else { if (!s->sh.disable_deblocking_filter_flag) - ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size, - lc->slice_or_tiles_up_boundary, - lc->slice_or_tiles_left_boundary); + ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size); } } } diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h index 4e85742fa2..3c88b69a47 100644 --- a/libavcodec/hevc.h +++ b/libavcodec/hevc.h @@ -71,6 +71,9 @@ #define EPEL_EXTRA_BEFORE 1 #define EPEL_EXTRA_AFTER 2 #define EPEL_EXTRA 3 +#define QPEL_EXTRA_BEFORE 3 +#define QPEL_EXTRA_AFTER 4 +#define QPEL_EXTRA 7 #define EDGE_EMU_BUFFER_STRIDE 80 @@ -201,6 +204,13 @@ enum InterPredIdc { PRED_BI, }; +enum PredFlag { + PF_INTRA = 0, + PF_L0, + PF_L1, + PF_BI, +}; + enum IntraPredMode { INTRA_PLANAR = 0, INTRA_DC, @@ -626,8 +636,7 @@ typedef struct Mv { typedef struct MvField { Mv mv[2]; int8_t ref_idx[2]; - int8_t pred_flag[2]; - uint8_t is_intra; + int8_t pred_flag; } MvField; typedef struct NeighbourAvailable { @@ -735,6 +744,8 @@ typedef struct HEVCLocalContext { int end_of_tiles_y; /* +7 is for subpixel interpolation, *2 for high bit depths */ DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[(MAX_PB_SIZE + 7) * EDGE_EMU_BUFFER_STRIDE * 2]; + DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer2)[(MAX_PB_SIZE + 7) * EDGE_EMU_BUFFER_STRIDE * 2]; + CodingTree ct; CodingUnit cu; PredictionUnit pu; @@ -973,9 +984,7 @@ void ff_hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, void ff_hevc_set_qPy(HEVCContext *s, int xC, int yC, int xBase, int yBase, int log2_cb_size); void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0, - int log2_trafo_size, - int slice_or_tiles_up_boundary, - int slice_or_tiles_left_boundary); + int log2_trafo_size); int ff_hevc_cu_qp_delta_sign_flag(HEVCContext *s); int ff_hevc_cu_qp_delta_abs(HEVCContext *s); void ff_hevc_hls_filter(HEVCContext *s, int x, int y); diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c index 7d04eda0c6..288f88576c 100644 --- a/libavcodec/hevc_cabac.c +++ b/libavcodec/hevc_cabac.c @@ -537,7 +537,7 @@ static void cabac_init_state(HEVCContext *s) int init_value = init_values[init_type][i]; int m = (init_value >> 4) * 5 - 45; int n = ((init_value & 15) << 3) - 16; - int pre = 2 * (((m * av_clip_c(s->sh.slice_qp, 0, 51)) >> 4) + n) - 127; + int pre = 2 * (((m * av_clip(s->sh.slice_qp, 0, 51)) >> 4) + n) - 127; pre ^= pre >> 31; if (pre > 124) @@ -1114,7 +1114,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, else offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset; - qp_i = av_clip_c(qp_y + offset, - s->sps->qp_bd_offset, 57); + qp_i = av_clip(qp_y + offset, - s->sps->qp_bd_offset, 57); if (qp_i < 30) qp = qp_i; else if (qp_i > 43) diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c index 443f2223cb..ec7a21187d 100644 --- a/libavcodec/hevc_filter.c +++ b/libavcodec/hevc_filter.c @@ -60,7 +60,7 @@ static int chroma_tc(HEVCContext *s, int qp_y, int c_idx, int tc_offset) else offset = s->pps->cr_qp_offset; - qp_i = av_clip_c(qp_y + offset, 0, 57); + qp_i = av_clip(qp_y + offset, 0, 57); if (qp_i < 30) qp = qp_i; else if (qp_i > 43) @@ -68,7 +68,7 @@ static int chroma_tc(HEVCContext *s, int qp_y, int c_idx, int tc_offset) else qp = qp_c[qp_i - 30]; - idxt = av_clip_c(qp + DEFAULT_INTRA_TC_OFFSET + tc_offset, 0, 53); + idxt = av_clip(qp + DEFAULT_INTRA_TC_OFFSET + tc_offset, 0, 53); return tctable[idxt]; } @@ -477,138 +477,96 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0) } } -static int boundary_strength(HEVCContext *s, MvField *curr, - uint8_t curr_cbf_luma, MvField *neigh, - uint8_t neigh_cbf_luma, - RefPicList *neigh_refPicList, - int tu_border) +static int boundary_strength(HEVCContext *s, MvField *curr, MvField *neigh, + RefPicList *neigh_refPicList) { - int mvs = curr->pred_flag[0] + curr->pred_flag[1]; - - if (tu_border) { - if (curr->is_intra || neigh->is_intra) - return 2; - if (curr_cbf_luma || neigh_cbf_luma) + if (curr->pred_flag == PF_BI && neigh->pred_flag == PF_BI) { + // same L0 and L1 + if (s->ref->refPicList[0].list[curr->ref_idx[0]] == neigh_refPicList[0].list[neigh->ref_idx[0]] && + s->ref->refPicList[0].list[curr->ref_idx[0]] == s->ref->refPicList[1].list[curr->ref_idx[1]] && + neigh_refPicList[0].list[neigh->ref_idx[0]] == neigh_refPicList[1].list[neigh->ref_idx[1]]) { + if ((FFABS(neigh->mv[0].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[0].y) >= 4 || + FFABS(neigh->mv[1].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[1].y) >= 4) && + (FFABS(neigh->mv[1].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[0].y) >= 4 || + FFABS(neigh->mv[0].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[1].y) >= 4)) + return 1; + else + return 0; + } else if (neigh_refPicList[0].list[neigh->ref_idx[0]] == s->ref->refPicList[0].list[curr->ref_idx[0]] && + neigh_refPicList[1].list[neigh->ref_idx[1]] == s->ref->refPicList[1].list[curr->ref_idx[1]]) { + if (FFABS(neigh->mv[0].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[0].y) >= 4 || + FFABS(neigh->mv[1].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[1].y) >= 4) + return 1; + else + return 0; + } else if (neigh_refPicList[1].list[neigh->ref_idx[1]] == s->ref->refPicList[0].list[curr->ref_idx[0]] && + neigh_refPicList[0].list[neigh->ref_idx[0]] == s->ref->refPicList[1].list[curr->ref_idx[1]]) { + if (FFABS(neigh->mv[1].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[0].y) >= 4 || + FFABS(neigh->mv[0].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[1].y) >= 4) + return 1; + else + return 0; + } else { return 1; - } - - if (mvs == neigh->pred_flag[0] + neigh->pred_flag[1]) { - if (mvs == 2) { - // same L0 and L1 - if (s->ref->refPicList[0].list[curr->ref_idx[0]] == neigh_refPicList[0].list[neigh->ref_idx[0]] && - s->ref->refPicList[0].list[curr->ref_idx[0]] == s->ref->refPicList[1].list[curr->ref_idx[1]] && - neigh_refPicList[0].list[neigh->ref_idx[0]] == neigh_refPicList[1].list[neigh->ref_idx[1]]) { - if ((abs(neigh->mv[0].x - curr->mv[0].x) >= 4 || abs(neigh->mv[0].y - curr->mv[0].y) >= 4 || - abs(neigh->mv[1].x - curr->mv[1].x) >= 4 || abs(neigh->mv[1].y - curr->mv[1].y) >= 4) && - (abs(neigh->mv[1].x - curr->mv[0].x) >= 4 || abs(neigh->mv[1].y - curr->mv[0].y) >= 4 || - abs(neigh->mv[0].x - curr->mv[1].x) >= 4 || abs(neigh->mv[0].y - curr->mv[1].y) >= 4)) - return 1; - else - return 0; - } else if (neigh_refPicList[0].list[neigh->ref_idx[0]] == s->ref->refPicList[0].list[curr->ref_idx[0]] && - neigh_refPicList[1].list[neigh->ref_idx[1]] == s->ref->refPicList[1].list[curr->ref_idx[1]]) { - if (abs(neigh->mv[0].x - curr->mv[0].x) >= 4 || abs(neigh->mv[0].y - curr->mv[0].y) >= 4 || - abs(neigh->mv[1].x - curr->mv[1].x) >= 4 || abs(neigh->mv[1].y - curr->mv[1].y) >= 4) - return 1; - else - return 0; - } else if (neigh_refPicList[1].list[neigh->ref_idx[1]] == s->ref->refPicList[0].list[curr->ref_idx[0]] && - neigh_refPicList[0].list[neigh->ref_idx[0]] == s->ref->refPicList[1].list[curr->ref_idx[1]]) { - if (abs(neigh->mv[1].x - curr->mv[0].x) >= 4 || abs(neigh->mv[1].y - curr->mv[0].y) >= 4 || - abs(neigh->mv[0].x - curr->mv[1].x) >= 4 || abs(neigh->mv[0].y - curr->mv[1].y) >= 4) - return 1; - else - return 0; - } else { - return 1; - } - } else { // 1 MV - Mv A, B; - int ref_A, ref_B; - - if (curr->pred_flag[0]) { - A = curr->mv[0]; - ref_A = s->ref->refPicList[0].list[curr->ref_idx[0]]; - } else { - A = curr->mv[1]; - ref_A = s->ref->refPicList[1].list[curr->ref_idx[1]]; - } - - if (neigh->pred_flag[0]) { - B = neigh->mv[0]; - ref_B = neigh_refPicList[0].list[neigh->ref_idx[0]]; - } else { - B = neigh->mv[1]; - ref_B = neigh_refPicList[1].list[neigh->ref_idx[1]]; - } - - if (ref_A == ref_B) { - if (abs(A.x - B.x) >= 4 || abs(A.y - B.y) >= 4) - return 1; - else - return 0; - } else - return 1; } + } else if ((curr->pred_flag != PF_BI) && (neigh->pred_flag != PF_BI)){ // 1 MV + Mv A, B; + int ref_A, ref_B; + + if (curr->pred_flag & 1) { + A = curr->mv[0]; + ref_A = s->ref->refPicList[0].list[curr->ref_idx[0]]; + } else { + A = curr->mv[1]; + ref_A = s->ref->refPicList[1].list[curr->ref_idx[1]]; + } + + if (neigh->pred_flag & 1) { + B = neigh->mv[0]; + ref_B = neigh_refPicList[0].list[neigh->ref_idx[0]]; + } else { + B = neigh->mv[1]; + ref_B = neigh_refPicList[1].list[neigh->ref_idx[1]]; + } + + if (ref_A == ref_B) { + if (FFABS(A.x - B.x) >= 4 || FFABS(A.y - B.y) >= 4) + return 1; + else + return 0; + } else + return 1; } return 1; } void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0, - int log2_trafo_size, - int slice_or_tiles_up_boundary, - int slice_or_tiles_left_boundary) + int log2_trafo_size) { + HEVCLocalContext *lc = s->HEVClc; MvField *tab_mvf = s->ref->tab_mvf; int log2_min_pu_size = s->sps->log2_min_pu_size; int log2_min_tu_size = s->sps->log2_min_tb_size; int min_pu_width = s->sps->min_pu_width; int min_tu_width = s->sps->min_tb_width; int is_intra = tab_mvf[(y0 >> log2_min_pu_size) * min_pu_width + - (x0 >> log2_min_pu_size)].is_intra; + (x0 >> log2_min_pu_size)].pred_flag == PF_INTRA; int i, j, bs; if (y0 > 0 && (y0 & 7) == 0) { - int yp_pu = (y0 - 1) >> log2_min_pu_size; - int yq_pu = y0 >> log2_min_pu_size; - int yp_tu = (y0 - 1) >> log2_min_tu_size; - int yq_tu = y0 >> log2_min_tu_size; - - for (i = 0; i < (1 << log2_trafo_size); i += 4) { - int x_pu = (x0 + i) >> log2_min_pu_size; - int x_tu = (x0 + i) >> log2_min_tu_size; - MvField *top = &tab_mvf[yp_pu * min_pu_width + x_pu]; - MvField *curr = &tab_mvf[yq_pu * min_pu_width + x_pu]; - uint8_t top_cbf_luma = s->cbf_luma[yp_tu * min_tu_width + x_tu]; - uint8_t curr_cbf_luma = s->cbf_luma[yq_tu * min_tu_width + x_tu]; + int bd_ctby = y0 & ((1 << s->sps->log2_ctb_size) - 1); + int bd_slice = s->sh.slice_loop_filter_across_slices_enabled_flag || + !(lc->slice_or_tiles_up_boundary & 1); + int bd_tiles = s->pps->loop_filter_across_tiles_enabled_flag || + !(lc->slice_or_tiles_up_boundary & 2); + if (((bd_slice && bd_tiles) || bd_ctby)) { + int yp_pu = (y0 - 1) >> log2_min_pu_size; + int yq_pu = y0 >> log2_min_pu_size; + int yp_tu = (y0 - 1) >> log2_min_tu_size; + int yq_tu = y0 >> log2_min_tu_size; RefPicList *top_refPicList = ff_hevc_get_ref_list(s, s->ref, - x0 + i, y0 - 1); - - bs = boundary_strength(s, curr, curr_cbf_luma, - top, top_cbf_luma, top_refPicList, 1); - if (!s->sh.slice_loop_filter_across_slices_enabled_flag && - (slice_or_tiles_up_boundary & 1) && - (y0 % (1 << s->sps->log2_ctb_size)) == 0) - bs = 0; - else if (!s->pps->loop_filter_across_tiles_enabled_flag && - (slice_or_tiles_up_boundary & 2) && - (y0 % (1 << s->sps->log2_ctb_size)) == 0) - bs = 0; - if (y0 == 0 || s->sh.disable_deblocking_filter_flag == 1) - bs = 0; - if (bs) - s->horizontal_bs[((x0 + i) + y0 * s->bs_width) >> 2] = bs; - } - } - - // bs for TU internal horizontal PU boundaries - if (log2_trafo_size > s->sps->log2_min_pu_size && !is_intra) - for (j = 8; j < (1 << log2_trafo_size); j += 8) { - int yp_pu = (y0 + j - 1) >> log2_min_pu_size; - int yq_pu = (y0 + j) >> log2_min_pu_size; - int yp_tu = (y0 + j - 1) >> log2_min_tu_size; - int yq_tu = (y0 + j) >> log2_min_tu_size; + x0, y0 - 1); for (i = 0; i < (1 << log2_trafo_size); i += 4) { int x_pu = (x0 + i) >> log2_min_pu_size; @@ -617,81 +575,86 @@ void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0, MvField *curr = &tab_mvf[yq_pu * min_pu_width + x_pu]; uint8_t top_cbf_luma = s->cbf_luma[yp_tu * min_tu_width + x_tu]; uint8_t curr_cbf_luma = s->cbf_luma[yq_tu * min_tu_width + x_tu]; - RefPicList *top_refPicList = ff_hevc_get_ref_list(s, s->ref, - x0 + i, - y0 + j - 1); - bs = boundary_strength(s, curr, curr_cbf_luma, - top, top_cbf_luma, top_refPicList, 0); - if (s->sh.disable_deblocking_filter_flag == 1) - bs = 0; - if (bs) - s->horizontal_bs[((x0 + i) + (y0 + j) * s->bs_width) >> 2] = bs; + if (curr->pred_flag == PF_INTRA || top->pred_flag == PF_INTRA) + bs = 2; + else if (curr_cbf_luma || top_cbf_luma) + bs = 1; + else + bs = boundary_strength(s, curr, top, top_refPicList); + s->horizontal_bs[((x0 + i) + y0 * s->bs_width) >> 2] = bs; } } - - // bs for vertical TU boundaries - if (x0 > 0 && (x0 & 7) == 0) { - int xp_pu = (x0 - 1) >> log2_min_pu_size; - int xq_pu = x0 >> log2_min_pu_size; - int xp_tu = (x0 - 1) >> log2_min_tu_size; - int xq_tu = x0 >> log2_min_tu_size; - - for (i = 0; i < (1 << log2_trafo_size); i += 4) { - int y_pu = (y0 + i) >> log2_min_pu_size; - int y_tu = (y0 + i) >> log2_min_tu_size; - MvField *left = &tab_mvf[y_pu * min_pu_width + xp_pu]; - MvField *curr = &tab_mvf[y_pu * min_pu_width + xq_pu]; - - uint8_t left_cbf_luma = s->cbf_luma[y_tu * min_tu_width + xp_tu]; - uint8_t curr_cbf_luma = s->cbf_luma[y_tu * min_tu_width + xq_tu]; - RefPicList *left_refPicList = ff_hevc_get_ref_list(s, s->ref, - x0 - 1, y0 + i); - - bs = boundary_strength(s, curr, curr_cbf_luma, - left, left_cbf_luma, left_refPicList, 1); - if (!s->sh.slice_loop_filter_across_slices_enabled_flag && - (slice_or_tiles_left_boundary & 1) && - (x0 % (1 << s->sps->log2_ctb_size)) == 0) - bs = 0; - else if (!s->pps->loop_filter_across_tiles_enabled_flag && - (slice_or_tiles_left_boundary & 2) && - (x0 % (1 << s->sps->log2_ctb_size)) == 0) - bs = 0; - if (x0 == 0 || s->sh.disable_deblocking_filter_flag == 1) - bs = 0; - if (bs) - s->vertical_bs[(x0 >> 3) + ((y0 + i) >> 2) * s->bs_width] = bs; - } } - // bs for TU internal vertical PU boundaries - if (log2_trafo_size > log2_min_pu_size && !is_intra) - for (j = 0; j < (1 << log2_trafo_size); j += 4) { - int y_pu = (y0 + j) >> log2_min_pu_size; - int y_tu = (y0 + j) >> log2_min_tu_size; + // bs for vertical TU boundaries + if (x0 > 0 && (x0 & 7) == 0) { + int bd_ctbx = x0 & ((1 << s->sps->log2_ctb_size) - 1); + int bd_slice = s->sh.slice_loop_filter_across_slices_enabled_flag || + !(lc->slice_or_tiles_left_boundary & 1); + int bd_tiles = s->pps->loop_filter_across_tiles_enabled_flag || + !(lc->slice_or_tiles_left_boundary & 2); + if (((bd_slice && bd_tiles) || bd_ctbx)) { + int xp_pu = (x0 - 1) >> log2_min_pu_size; + int xq_pu = x0 >> log2_min_pu_size; + int xp_tu = (x0 - 1) >> log2_min_tu_size; + int xq_tu = x0 >> log2_min_tu_size; + RefPicList *left_refPicList = ff_hevc_get_ref_list(s, s->ref, + x0 - 1, y0); - for (i = 8; i < (1 << log2_trafo_size); i += 8) { - int xp_pu = (x0 + i - 1) >> log2_min_pu_size; - int xq_pu = (x0 + i) >> log2_min_pu_size; - int xp_tu = (x0 + i - 1) >> log2_min_tu_size; - int xq_tu = (x0 + i) >> log2_min_tu_size; + for (i = 0; i < (1 << log2_trafo_size); i += 4) { + int y_pu = (y0 + i) >> log2_min_pu_size; + int y_tu = (y0 + i) >> log2_min_tu_size; MvField *left = &tab_mvf[y_pu * min_pu_width + xp_pu]; MvField *curr = &tab_mvf[y_pu * min_pu_width + xq_pu]; uint8_t left_cbf_luma = s->cbf_luma[y_tu * min_tu_width + xp_tu]; uint8_t curr_cbf_luma = s->cbf_luma[y_tu * min_tu_width + xq_tu]; - RefPicList *left_refPicList = ff_hevc_get_ref_list(s, s->ref, - x0 + i - 1, - y0 + j); - bs = boundary_strength(s, curr, curr_cbf_luma, - left, left_cbf_luma, left_refPicList, 0); - if (s->sh.disable_deblocking_filter_flag == 1) - bs = 0; - if (bs) - s->vertical_bs[((x0 + i) >> 3) + ((y0 + j) >> 2) * s->bs_width] = bs; + if (curr->pred_flag == PF_INTRA || left->pred_flag == PF_INTRA) + bs = 2; + else if (curr_cbf_luma || left_cbf_luma) + bs = 1; + else + bs = boundary_strength(s, curr, left, left_refPicList); + s->vertical_bs[(x0 >> 3) + ((y0 + i) >> 2) * s->bs_width] = bs; } } + } + + if (log2_trafo_size > log2_min_pu_size && !is_intra) { + RefPicList *refPicList = ff_hevc_get_ref_list(s, s->ref, + x0, + y0); + // bs for TU internal horizontal PU boundaries + for (j = 8; j < (1 << log2_trafo_size); j += 8) { + int yp_pu = (y0 + j - 1) >> log2_min_pu_size; + int yq_pu = (y0 + j) >> log2_min_pu_size; + + for (i = 0; i < (1 << log2_trafo_size); i += 4) { + int x_pu = (x0 + i) >> log2_min_pu_size; + MvField *top = &tab_mvf[yp_pu * min_pu_width + x_pu]; + MvField *curr = &tab_mvf[yq_pu * min_pu_width + x_pu]; + + bs = boundary_strength(s, curr, top, refPicList); + s->horizontal_bs[((x0 + i) + (y0 + j) * s->bs_width) >> 2] = bs; + } + } + + // bs for TU internal vertical PU boundaries + for (j = 0; j < (1 << log2_trafo_size); j += 4) { + int y_pu = (y0 + j) >> log2_min_pu_size; + + for (i = 8; i < (1 << log2_trafo_size); i += 8) { + int xp_pu = (x0 + i - 1) >> log2_min_pu_size; + int xq_pu = (x0 + i) >> log2_min_pu_size; + MvField *left = &tab_mvf[y_pu * min_pu_width + xp_pu]; + MvField *curr = &tab_mvf[y_pu * min_pu_width + xq_pu]; + + bs = boundary_strength(s, curr, left, refPicList); + s->vertical_bs[((x0 + i) >> 3) + ((y0 + j) >> 2) * s->bs_width] = bs; + } + } + } } #undef LUMA diff --git a/libavcodec/hevc_mvs.c b/libavcodec/hevc_mvs.c index 2b5c07c655..b7ca79c35e 100644 --- a/libavcodec/hevc_mvs.c +++ b/libavcodec/hevc_mvs.c @@ -123,16 +123,18 @@ static int isDiffMER(HEVCContext *s, int xN, int yN, int xP, int yP) // check if the mv's and refidx are the same between A and B static int compareMVrefidx(struct MvField A, struct MvField B) { - if (A.pred_flag[0] && A.pred_flag[1] && B.pred_flag[0] && B.pred_flag[1]) - return MATCH(ref_idx[0]) && MATCH(mv[0].x) && MATCH(mv[0].y) && - MATCH(ref_idx[1]) && MATCH(mv[1].x) && MATCH(mv[1].y); - - if (A.pred_flag[0] && !A.pred_flag[1] && B.pred_flag[0] && !B.pred_flag[1]) - return MATCH(ref_idx[0]) && MATCH(mv[0].x) && MATCH(mv[0].y); - - if (!A.pred_flag[0] && A.pred_flag[1] && !B.pred_flag[0] && B.pred_flag[1]) - return MATCH(ref_idx[1]) && MATCH(mv[1].x) && MATCH(mv[1].y); - + int a_pf = A.pred_flag; + int b_pf = B.pred_flag; + if (a_pf == b_pf) { + if (a_pf == PF_BI) { + return MATCH(ref_idx[0]) && MATCH(mv[0].x) && MATCH(mv[0].y) && + MATCH(ref_idx[1]) && MATCH(mv[1].x) && MATCH(mv[1].y); + } else if (a_pf == PF_L0) { + return MATCH(ref_idx[0]) && MATCH(mv[0].x) && MATCH(mv[0].y); + } else if (a_pf == PF_L1) { + return MATCH(ref_idx[1]) && MATCH(mv[1].x) && MATCH(mv[1].y); + } + } return 0; } @@ -140,14 +142,14 @@ static av_always_inline void mv_scale(Mv *dst, Mv *src, int td, int tb) { int tx, scale_factor; - td = av_clip_int8_c(td); - tb = av_clip_int8_c(tb); + td = av_clip_int8(td); + tb = av_clip_int8(tb); tx = (0x4000 + abs(td / 2)) / td; - scale_factor = av_clip_c((tb * tx + 32) >> 6, -4096, 4095); - dst->x = av_clip_int16_c((scale_factor * src->x + 127 + - (scale_factor * src->x < 0)) >> 8); - dst->y = av_clip_int16_c((scale_factor * src->y + 127 + - (scale_factor * src->y < 0)) >> 8); + scale_factor = av_clip((tb * tx + 32) >> 6, -4096, 4095); + dst->x = av_clip_int16((scale_factor * src->x + 127 + + (scale_factor * src->x < 0)) >> 8); + dst->y = av_clip_int16((scale_factor * src->y + 127 + + (scale_factor * src->y < 0)) >> 8); } static int check_mvset(Mv *mvLXCol, Mv *mvCol, @@ -168,10 +170,7 @@ static int check_mvset(Mv *mvLXCol, Mv *mvCol, col_poc_diff = colPic - refPicList_col[listCol].list[refidxCol]; cur_poc_diff = poc - refPicList[X].list[refIdxLx]; - if (!col_poc_diff) - col_poc_diff = 1; // error resilience - - if (cur_lt || col_poc_diff == cur_poc_diff) { + if (cur_lt || col_poc_diff == cur_poc_diff || !col_poc_diff) { mvLXCol->x = mvCol->x; mvLXCol->y = mvCol->y; } else { @@ -193,17 +192,14 @@ static int derive_temporal_colocated_mvs(HEVCContext *s, MvField temp_col, { RefPicList *refPicList = s->ref->refPicList; - if (temp_col.is_intra) { - mvLXCol->x = 0; - mvLXCol->y = 0; + if (temp_col.pred_flag == PF_INTRA) return 0; - } - if (temp_col.pred_flag[0] == 0) + if (!(temp_col.pred_flag & PF_L0)) return CHECK_MVSET(1); - else if (temp_col.pred_flag[0] == 1 && temp_col.pred_flag[1] == 0) + else if (temp_col.pred_flag == PF_L0) return CHECK_MVSET(0); - else if (temp_col.pred_flag[0] == 1 && temp_col.pred_flag[1] == 1) { + else if (temp_col.pred_flag == PF_BI) { int check_diffpicount = 0; int i = 0; for (i = 0; i < refPicList[0].nb_refs; i++) { @@ -295,7 +291,7 @@ static int temporal_luma_motion_vector(HEVCContext *s, int x0, int y0, } #define AVAILABLE(cand, v) \ - (cand && !TAB_MVF_PU(v).is_intra) + (cand && !(TAB_MVF_PU(v).pred_flag == PF_INTRA)) #define PRED_BLOCK_AVAILABLE(v) \ check_prediction_block_available(s, log2_cb_size, \ @@ -458,9 +454,7 @@ static void derive_spatial_merge_candidates(HEVCContext *s, int x0, int y0, 0, &mv_l1_col, 1) : 0; if (available_l0 || available_l1) { - mergecandlist[nb_merge_cand].is_intra = 0; - mergecandlist[nb_merge_cand].pred_flag[0] = available_l0; - mergecandlist[nb_merge_cand].pred_flag[1] = available_l1; + mergecandlist[nb_merge_cand].pred_flag = available_l0 + (available_l1 << 1); if (available_l0) { mergecandlist[nb_merge_cand].mv[0] = mv_l0_col; mergecandlist[nb_merge_cand].ref_idx[0] = 0; @@ -487,20 +481,18 @@ static void derive_spatial_merge_candidates(HEVCContext *s, int x0, int y0, MvField l0_cand = mergecandlist[l0_cand_idx]; MvField l1_cand = mergecandlist[l1_cand_idx]; - if (l0_cand.pred_flag[0] && l1_cand.pred_flag[1] && + if ((l0_cand.pred_flag & PF_L0) && (l1_cand.pred_flag & PF_L1) && (refPicList[0].list[l0_cand.ref_idx[0]] != refPicList[1].list[l1_cand.ref_idx[1]] || l0_cand.mv[0].x != l1_cand.mv[1].x || l0_cand.mv[0].y != l1_cand.mv[1].y)) { mergecandlist[nb_merge_cand].ref_idx[0] = l0_cand.ref_idx[0]; mergecandlist[nb_merge_cand].ref_idx[1] = l1_cand.ref_idx[1]; - mergecandlist[nb_merge_cand].pred_flag[0] = 1; - mergecandlist[nb_merge_cand].pred_flag[1] = 1; + mergecandlist[nb_merge_cand].pred_flag = PF_BI; mergecandlist[nb_merge_cand].mv[0].x = l0_cand.mv[0].x; mergecandlist[nb_merge_cand].mv[0].y = l0_cand.mv[0].y; mergecandlist[nb_merge_cand].mv[1].x = l1_cand.mv[1].x; mergecandlist[nb_merge_cand].mv[1].y = l1_cand.mv[1].y; - mergecandlist[nb_merge_cand].is_intra = 0; nb_merge_cand++; } } @@ -508,13 +500,11 @@ static void derive_spatial_merge_candidates(HEVCContext *s, int x0, int y0, // append Zero motion vector candidates while (nb_merge_cand < s->sh.max_num_merge_cand) { - mergecandlist[nb_merge_cand].pred_flag[0] = 1; - mergecandlist[nb_merge_cand].pred_flag[1] = s->sh.slice_type == B_SLICE; + mergecandlist[nb_merge_cand].pred_flag = PF_L0 + ((s->sh.slice_type == B_SLICE) << 1); mergecandlist[nb_merge_cand].mv[0].x = 0; mergecandlist[nb_merge_cand].mv[0].y = 0; mergecandlist[nb_merge_cand].mv[1].x = 0; mergecandlist[nb_merge_cand].mv[1].y = 0; - mergecandlist[nb_merge_cand].is_intra = 0; mergecandlist[nb_merge_cand].ref_idx[0] = zero_idx < nb_refs ? zero_idx : 0; mergecandlist[nb_merge_cand].ref_idx[1] = zero_idx < nb_refs ? zero_idx : 0; @@ -550,11 +540,9 @@ void ff_hevc_luma_mv_merge_mode(HEVCContext *s, int x0, int y0, int nPbW, derive_spatial_merge_candidates(s, x0, y0, nPbW, nPbH, log2_cb_size, singleMCLFlag, part_idx, mergecand_list); - if (mergecand_list[merge_idx].pred_flag[0] == 1 && - mergecand_list[merge_idx].pred_flag[1] == 1 && + if (mergecand_list[merge_idx].pred_flag == PF_BI && (nPbW2 + nPbH2) == 12) { - mergecand_list[merge_idx].ref_idx[1] = -1; - mergecand_list[merge_idx].pred_flag[1] = 0; + mergecand_list[merge_idx].pred_flag = PF_L0; } *mv = mergecand_list[merge_idx]; @@ -585,7 +573,7 @@ static int mv_mp_mode_mx(HEVCContext *s, int x, int y, int pred_flag_index, RefPicList *refPicList = s->ref->refPicList; - if (TAB_MVF(x, y).pred_flag[pred_flag_index] == 1 && + if (((TAB_MVF(x, y).pred_flag) & (1 << pred_flag_index)) && refPicList[pred_flag_index].list[TAB_MVF(x, y).ref_idx[pred_flag_index]] == refPicList[ref_idx_curr].list[ref_idx]) { *mv = TAB_MVF(x, y).mv[pred_flag_index]; return 1; @@ -600,18 +588,20 @@ static int mv_mp_mode_mx_lt(HEVCContext *s, int x, int y, int pred_flag_index, int min_pu_width = s->sps->min_pu_width; RefPicList *refPicList = s->ref->refPicList; - int currIsLongTerm = refPicList[ref_idx_curr].isLongTerm[ref_idx]; - int colIsLongTerm = - refPicList[pred_flag_index].isLongTerm[(TAB_MVF(x, y).ref_idx[pred_flag_index])]; + if ((TAB_MVF(x, y).pred_flag) & (1 << pred_flag_index)) { + int currIsLongTerm = refPicList[ref_idx_curr].isLongTerm[ref_idx]; - if (TAB_MVF(x, y).pred_flag[pred_flag_index] && - colIsLongTerm == currIsLongTerm) { - *mv = TAB_MVF(x, y).mv[pred_flag_index]; - if (!currIsLongTerm) - dist_scale(s, mv, min_pu_width, x, y, - pred_flag_index, ref_idx_curr, ref_idx); - return 1; + int colIsLongTerm = + refPicList[pred_flag_index].isLongTerm[(TAB_MVF(x, y).ref_idx[pred_flag_index])]; + + if (colIsLongTerm == currIsLongTerm) { + *mv = TAB_MVF(x, y).mv[pred_flag_index]; + if (!currIsLongTerm) + dist_scale(s, mv, min_pu_width, x, y, + pred_flag_index, ref_idx_curr, ref_idx); + return 1; + } } return 0; } @@ -657,8 +647,8 @@ void ff_hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW, int xB2_pu = 0, yB2_pu = 0; int is_available_b2 = 0; Mv mvpcand_list[2] = { { 0 } }; - Mv mxA = { 0 }; - Mv mxB = { 0 }; + Mv mxA; + Mv mxB; int ref_idx_curr = 0; int ref_idx = 0; int pred_flag_index_l0; @@ -812,10 +802,5 @@ void ff_hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW, mvpcand_list[numMVPCandLX++] = mv_col; } - // insert zero motion vectors when the number of available candidates are less than 2 - while (numMVPCandLX < 2) - mvpcand_list[numMVPCandLX++] = (Mv){ 0, 0 }; - - mv->mv[LX].x = mvpcand_list[mvp_lx_flag].x; - mv->mv[LX].y = mvpcand_list[mvp_lx_flag].y; + mv->mv[LX] = mvpcand_list[mvp_lx_flag]; } diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c index be44bc9955..a7b4057fdb 100644 --- a/libavcodec/hevcdsp.c +++ b/libavcodec/hevcdsp.c @@ -89,14 +89,20 @@ static const int8_t transform[32][32] = { 90, -90, 88, -85, 82, -78, 73, -67, 61, -54, 46, -38, 31, -22, 13, -4 }, }; -DECLARE_ALIGNED(16, const int8_t, ff_hevc_epel_filters[7][16]) = { - { -2, 58, 10, -2, -2, 58, 10, -2, -2, 58, 10, -2, -2, 58, 10, -2 }, - { -4, 54, 16, -2, -4, 54, 16, -2, -4, 54, 16, -2, -4, 54, 16, -2 }, - { -6, 46, 28, -4, -6, 46, 28, -4, -6, 46, 28, -4, -6, 46, 28, -4 }, - { -4, 36, 36, -4, -4, 36, 36, -4, -4, 36, 36, -4, -4, 36, 36, -4 }, - { -4, 28, 46, -6, -4, 28, 46, -6, -4, 28, 46, -6, -4, 28, 46, -6 }, - { -2, 16, 54, -4, -2, 16, 54, -4, -2, 16, 54, -4, -2, 16, 54, -4 }, - { -2, 10, 58, -2, -2, 10, 58, -2, -2, 10, 58, -2, -2, 10, 58, -2 }, +DECLARE_ALIGNED(16, const int8_t, ff_hevc_epel_filters[7][4]) = { + { -2, 58, 10, -2}, + { -4, 54, 16, -2}, + { -6, 46, 28, -4}, + { -4, 36, 36, -4}, + { -4, 28, 46, -6}, + { -2, 16, 54, -4}, + { -2, 10, 58, -2}, +}; + +DECLARE_ALIGNED(16, const int8_t, ff_hevc_qpel_filters[3][16]) = { + { -1, 4,-10, 58, 17, -5, 1, 0, -1, 4,-10, 58, 17, -5, 1, 0}, + { -1, 4,-11, 40, 40,-11, 4, -1, -1, 4,-11, 40, 40,-11, 4, -1}, + { 0, 1, -5, 17, 58,-10, 4, -1, 0, 1, -5, 17, 58,-10, 4, -1} }; #define BIT_DEPTH 8 @@ -116,6 +122,71 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth) #undef FUNC #define FUNC(a, depth) a ## _ ## depth +#undef PEL_FUNC +#define PEL_FUNC(dst1, idx1, idx2, a, depth) \ + for(i = 0 ; i < 10 ; i++) \ +{ \ + hevcdsp->dst1[i][idx1][idx2] = a ## _ ## depth; \ +} + +#undef EPEL_FUNCS +#define EPEL_FUNCS(depth) \ + PEL_FUNC(put_hevc_epel, 0, 0, put_hevc_pel_pixels, depth); \ + PEL_FUNC(put_hevc_epel, 0, 1, put_hevc_epel_h, depth); \ + PEL_FUNC(put_hevc_epel, 1, 0, put_hevc_epel_v, depth); \ + PEL_FUNC(put_hevc_epel, 1, 1, put_hevc_epel_hv, depth) + +#undef EPEL_UNI_FUNCS +#define EPEL_UNI_FUNCS(depth) \ + PEL_FUNC(put_hevc_epel_uni, 0, 0, put_hevc_pel_uni_pixels, depth); \ + PEL_FUNC(put_hevc_epel_uni, 0, 1, put_hevc_epel_uni_h, depth); \ + PEL_FUNC(put_hevc_epel_uni, 1, 0, put_hevc_epel_uni_v, depth); \ + PEL_FUNC(put_hevc_epel_uni, 1, 1, put_hevc_epel_uni_hv, depth); \ + PEL_FUNC(put_hevc_epel_uni_w, 0, 0, put_hevc_pel_uni_w_pixels, depth); \ + PEL_FUNC(put_hevc_epel_uni_w, 0, 1, put_hevc_epel_uni_w_h, depth); \ + PEL_FUNC(put_hevc_epel_uni_w, 1, 0, put_hevc_epel_uni_w_v, depth); \ + PEL_FUNC(put_hevc_epel_uni_w, 1, 1, put_hevc_epel_uni_w_hv, depth) + +#undef EPEL_BI_FUNCS +#define EPEL_BI_FUNCS(depth) \ + PEL_FUNC(put_hevc_epel_bi, 0, 0, put_hevc_pel_bi_pixels, depth); \ + PEL_FUNC(put_hevc_epel_bi, 0, 1, put_hevc_epel_bi_h, depth); \ + PEL_FUNC(put_hevc_epel_bi, 1, 0, put_hevc_epel_bi_v, depth); \ + PEL_FUNC(put_hevc_epel_bi, 1, 1, put_hevc_epel_bi_hv, depth); \ + PEL_FUNC(put_hevc_epel_bi_w, 0, 0, put_hevc_pel_bi_w_pixels, depth); \ + PEL_FUNC(put_hevc_epel_bi_w, 0, 1, put_hevc_epel_bi_w_h, depth); \ + PEL_FUNC(put_hevc_epel_bi_w, 1, 0, put_hevc_epel_bi_w_v, depth); \ + PEL_FUNC(put_hevc_epel_bi_w, 1, 1, put_hevc_epel_bi_w_hv, depth) + +#undef QPEL_FUNCS +#define QPEL_FUNCS(depth) \ + PEL_FUNC(put_hevc_qpel, 0, 0, put_hevc_pel_pixels, depth); \ + PEL_FUNC(put_hevc_qpel, 0, 1, put_hevc_qpel_h, depth); \ + PEL_FUNC(put_hevc_qpel, 1, 0, put_hevc_qpel_v, depth); \ + PEL_FUNC(put_hevc_qpel, 1, 1, put_hevc_qpel_hv, depth) + +#undef QPEL_UNI_FUNCS +#define QPEL_UNI_FUNCS(depth) \ + PEL_FUNC(put_hevc_qpel_uni, 0, 0, put_hevc_pel_uni_pixels, depth); \ + PEL_FUNC(put_hevc_qpel_uni, 0, 1, put_hevc_qpel_uni_h, depth); \ + PEL_FUNC(put_hevc_qpel_uni, 1, 0, put_hevc_qpel_uni_v, depth); \ + PEL_FUNC(put_hevc_qpel_uni, 1, 1, put_hevc_qpel_uni_hv, depth); \ + PEL_FUNC(put_hevc_qpel_uni_w, 0, 0, put_hevc_pel_uni_w_pixels, depth); \ + PEL_FUNC(put_hevc_qpel_uni_w, 0, 1, put_hevc_qpel_uni_w_h, depth); \ + PEL_FUNC(put_hevc_qpel_uni_w, 1, 0, put_hevc_qpel_uni_w_v, depth); \ + PEL_FUNC(put_hevc_qpel_uni_w, 1, 1, put_hevc_qpel_uni_w_hv, depth) + +#undef QPEL_BI_FUNCS +#define QPEL_BI_FUNCS(depth) \ + PEL_FUNC(put_hevc_qpel_bi, 0, 0, put_hevc_pel_bi_pixels, depth); \ + PEL_FUNC(put_hevc_qpel_bi, 0, 1, put_hevc_qpel_bi_h, depth); \ + PEL_FUNC(put_hevc_qpel_bi, 1, 0, put_hevc_qpel_bi_v, depth); \ + PEL_FUNC(put_hevc_qpel_bi, 1, 1, put_hevc_qpel_bi_hv, depth); \ + PEL_FUNC(put_hevc_qpel_bi_w, 0, 0, put_hevc_pel_bi_w_pixels, depth); \ + PEL_FUNC(put_hevc_qpel_bi_w, 0, 1, put_hevc_qpel_bi_w_h, depth); \ + PEL_FUNC(put_hevc_qpel_bi_w, 1, 0, put_hevc_qpel_bi_w_v, depth); \ + PEL_FUNC(put_hevc_qpel_bi_w, 1, 1, put_hevc_qpel_bi_w_hv, depth) + #define HEVC_DSP(depth) \ hevcdsp->put_pcm = FUNC(put_pcm, depth); \ hevcdsp->transquant_bypass[0] = FUNC(transquant_bypass4x4, depth); \ @@ -138,35 +209,14 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth) hevcdsp->sao_edge_filter[1] = FUNC(sao_edge_filter_1, depth); \ hevcdsp->sao_edge_filter[2] = FUNC(sao_edge_filter_2, depth); \ hevcdsp->sao_edge_filter[3] = FUNC(sao_edge_filter_3, depth); \ - \ - hevcdsp->put_hevc_qpel[0][0] = FUNC(put_hevc_qpel_pixels, depth); \ - hevcdsp->put_hevc_qpel[0][1] = FUNC(put_hevc_qpel_h1, depth); \ - hevcdsp->put_hevc_qpel[0][2] = FUNC(put_hevc_qpel_h2, depth); \ - hevcdsp->put_hevc_qpel[0][3] = FUNC(put_hevc_qpel_h3, depth); \ - hevcdsp->put_hevc_qpel[1][0] = FUNC(put_hevc_qpel_v1, depth); \ - hevcdsp->put_hevc_qpel[1][1] = FUNC(put_hevc_qpel_h1v1, depth); \ - hevcdsp->put_hevc_qpel[1][2] = FUNC(put_hevc_qpel_h2v1, depth); \ - hevcdsp->put_hevc_qpel[1][3] = FUNC(put_hevc_qpel_h3v1, depth); \ - hevcdsp->put_hevc_qpel[2][0] = FUNC(put_hevc_qpel_v2, depth); \ - hevcdsp->put_hevc_qpel[2][1] = FUNC(put_hevc_qpel_h1v2, depth); \ - hevcdsp->put_hevc_qpel[2][2] = FUNC(put_hevc_qpel_h2v2, depth); \ - hevcdsp->put_hevc_qpel[2][3] = FUNC(put_hevc_qpel_h3v2, depth); \ - hevcdsp->put_hevc_qpel[3][0] = FUNC(put_hevc_qpel_v3, depth); \ - hevcdsp->put_hevc_qpel[3][1] = FUNC(put_hevc_qpel_h1v3, depth); \ - hevcdsp->put_hevc_qpel[3][2] = FUNC(put_hevc_qpel_h2v3, depth); \ - hevcdsp->put_hevc_qpel[3][3] = FUNC(put_hevc_qpel_h3v3, depth); \ - \ - hevcdsp->put_hevc_epel[0][0] = FUNC(put_hevc_epel_pixels, depth); \ - hevcdsp->put_hevc_epel[0][1] = FUNC(put_hevc_epel_h, depth); \ - hevcdsp->put_hevc_epel[1][0] = FUNC(put_hevc_epel_v, depth); \ - hevcdsp->put_hevc_epel[1][1] = FUNC(put_hevc_epel_hv, depth); \ - \ - hevcdsp->put_unweighted_pred = FUNC(put_unweighted_pred, depth); \ - hevcdsp->put_weighted_pred_avg = FUNC(put_weighted_pred_avg, depth); \ - \ - hevcdsp->weighted_pred = FUNC(weighted_pred, depth); \ - hevcdsp->weighted_pred_avg = FUNC(weighted_pred_avg, depth); \ - \ + \ + QPEL_FUNCS(depth); \ + QPEL_UNI_FUNCS(depth); \ + QPEL_BI_FUNCS(depth); \ + EPEL_FUNCS(depth); \ + EPEL_UNI_FUNCS(depth); \ + EPEL_BI_FUNCS(depth); \ + \ hevcdsp->hevc_h_loop_filter_luma = FUNC(hevc_h_loop_filter_luma, depth); \ hevcdsp->hevc_v_loop_filter_luma = FUNC(hevc_v_loop_filter_luma, depth); \ hevcdsp->hevc_h_loop_filter_chroma = FUNC(hevc_h_loop_filter_chroma, depth); \ @@ -175,6 +225,7 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth) hevcdsp->hevc_v_loop_filter_luma_c = FUNC(hevc_v_loop_filter_luma, depth); \ hevcdsp->hevc_h_loop_filter_chroma_c = FUNC(hevc_h_loop_filter_chroma, depth); \ hevcdsp->hevc_v_loop_filter_chroma_c = FUNC(hevc_v_loop_filter_chroma, depth); +int i = 0; switch (bit_depth) { case 9: diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h index 4dd3ebd1ee..678c8c9498 100644 --- a/libavcodec/hevcdsp.h +++ b/libavcodec/hevcdsp.h @@ -58,25 +58,34 @@ typedef struct HEVCDSPContext { int height, int c_idx, uint8_t vert_edge, uint8_t horiz_edge, uint8_t diag_edge); - void (*put_hevc_qpel[4][4])(int16_t *dst, ptrdiff_t dststride, uint8_t *src, - ptrdiff_t srcstride, int width, int height, - int16_t *mcbuffer); - void (*put_hevc_epel[2][2])(int16_t *dst, ptrdiff_t dststride, uint8_t *src, - ptrdiff_t srcstride, int width, int height, - int mx, int my, int16_t *mcbuffer); + void (*put_hevc_qpel[10][2][2])(int16_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, + int height, intptr_t mx, intptr_t my, int width); + void (*put_hevc_qpel_uni[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, + int height, intptr_t mx, intptr_t my, int width); + void (*put_hevc_qpel_uni_w[10][2][2])(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, + int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width); - void (*put_unweighted_pred)(uint8_t *dst, ptrdiff_t dststride, int16_t *src, - ptrdiff_t srcstride, int width, int height); - void (*put_weighted_pred_avg)(uint8_t *dst, ptrdiff_t dststride, - int16_t *src1, int16_t *src2, - ptrdiff_t srcstride, int width, int height); - void (*weighted_pred)(uint8_t denom, int16_t wlxFlag, int16_t olxFlag, - uint8_t *dst, ptrdiff_t dststride, int16_t *src, - ptrdiff_t srcstride, int width, int height); - void (*weighted_pred_avg)(uint8_t denom, int16_t wl0Flag, int16_t wl1Flag, - int16_t ol0Flag, int16_t ol1Flag, uint8_t *dst, - ptrdiff_t dststride, int16_t *src1, int16_t *src2, - ptrdiff_t srcstride, int width, int height); + void (*put_hevc_qpel_bi[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, + int16_t *src2, ptrdiff_t src2stride, + int height, intptr_t mx, intptr_t my, int width); + void (*put_hevc_qpel_bi_w[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, + int16_t *src2, ptrdiff_t src2stride, + int height, int denom, int wx0, int wx1, + int ox0, int ox1, intptr_t mx, intptr_t my, int width); + void (*put_hevc_epel[10][2][2])(int16_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, + int height, intptr_t mx, intptr_t my, int width); + + void (*put_hevc_epel_uni[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, + int height, intptr_t mx, intptr_t my, int width); + void (*put_hevc_epel_uni_w[10][2][2])(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, + int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width); + void (*put_hevc_epel_bi[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, + int16_t *src2, ptrdiff_t src2stride, + int height, intptr_t mx, intptr_t my, int width); + void (*put_hevc_epel_bi_w[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, + int16_t *src2, ptrdiff_t src2stride, + int height, int denom, int wx0, int ox0, int wx1, + int ox1, intptr_t mx, intptr_t my, int width); void (*hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, int *beta, int *tc, @@ -104,6 +113,8 @@ typedef struct HEVCDSPContext { void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth); -extern const int8_t ff_hevc_epel_filters[7][16]; +extern const int8_t ff_hevc_epel_filters[7][4]; +extern const int8_t ff_hevc_qpel_filters[3][16]; + #endif /* AVCODEC_HEVCDSP_H */ diff --git a/libavcodec/hevcdsp_template.c b/libavcodec/hevcdsp_template.c index 8ceb093dfa..be90c7002b 100644 --- a/libavcodec/hevcdsp_template.c +++ b/libavcodec/hevcdsp_template.c @@ -804,9 +804,13 @@ static void FUNC(sao_edge_filter_3)(uint8_t *_dst, uint8_t *_src, #undef TR_16 #undef TR_32 -static void FUNC(put_hevc_qpel_pixels)(int16_t *dst, ptrdiff_t dststride, - uint8_t *_src, ptrdiff_t _srcstride, - int width, int height, int16_t* mcbuffer) + +//////////////////////////////////////////////////////////////////////////////// +// +//////////////////////////////////////////////////////////////////////////////// +static void FUNC(put_hevc_pel_pixels)(int16_t *dst, ptrdiff_t dststride, + uint8_t *_src, ptrdiff_t _srcstride, + int height, intptr_t mx, intptr_t my, int width) { int x, y; pixel *src = (pixel *)_src; @@ -820,157 +824,573 @@ static void FUNC(put_hevc_qpel_pixels)(int16_t *dst, ptrdiff_t dststride, } } -#define QPEL_FILTER_1(src, stride) \ - (1 * -src[x - 3 * stride] + \ - 4 * src[x - 2 * stride] - \ - 10 * src[x - stride] + \ - 58 * src[x] + \ - 17 * src[x + stride] - \ - 5 * src[x + 2 * stride] + \ - 1 * src[x + 3 * stride]) - -#define QPEL_FILTER_2(src, stride) \ - (1 * -src[x - 3 * stride] + \ - 4 * src[x - 2 * stride] - \ - 11 * src[x - stride] + \ - 40 * src[x] + \ - 40 * src[x + stride] - \ - 11 * src[x + 2 * stride] + \ - 4 * src[x + 3 * stride] - \ - 1 * src[x + 4 * stride]) - -#define QPEL_FILTER_3(src, stride) \ - (1 * src[x - 2 * stride] - \ - 5 * src[x - stride] + \ - 17 * src[x] + \ - 58 * src[x + stride] - \ - 10 * src[x + 2 * stride] + \ - 4 * src[x + 3 * stride] - \ - 1 * src[x + 4 * stride]) - - -#define PUT_HEVC_QPEL_H(H) \ -static void FUNC(put_hevc_qpel_h ## H)(int16_t *dst, ptrdiff_t dststride, \ - uint8_t *_src, ptrdiff_t _srcstride, \ - int width, int height, \ - int16_t* mcbuffer) \ -{ \ - int x, y; \ - pixel *src = (pixel*)_src; \ - ptrdiff_t srcstride = _srcstride / sizeof(pixel); \ - \ - for (y = 0; y < height; y++) { \ - for (x = 0; x < width; x++) \ - dst[x] = QPEL_FILTER_ ## H(src, 1) >> (BIT_DEPTH - 8); \ - src += srcstride; \ - dst += dststride; \ - } \ -} - -#define PUT_HEVC_QPEL_V(V) \ -static void FUNC(put_hevc_qpel_v ## V)(int16_t *dst, ptrdiff_t dststride, \ - uint8_t *_src, ptrdiff_t _srcstride, \ - int width, int height, \ - int16_t* mcbuffer) \ -{ \ - int x, y; \ - pixel *src = (pixel*)_src; \ - ptrdiff_t srcstride = _srcstride / sizeof(pixel); \ - \ - for (y = 0; y < height; y++) { \ - for (x = 0; x < width; x++) \ - dst[x] = QPEL_FILTER_ ## V(src, srcstride) >> (BIT_DEPTH - 8); \ - src += srcstride; \ - dst += dststride; \ - } \ -} - -#define PUT_HEVC_QPEL_HV(H, V) \ -static void FUNC(put_hevc_qpel_h ## H ## v ## V)(int16_t *dst, \ - ptrdiff_t dststride, \ - uint8_t *_src, \ - ptrdiff_t _srcstride, \ - int width, int height, \ - int16_t* mcbuffer) \ -{ \ - int x, y; \ - pixel *src = (pixel*)_src; \ - ptrdiff_t srcstride = _srcstride / sizeof(pixel); \ - \ - int16_t tmp_array[(MAX_PB_SIZE + 7) * MAX_PB_SIZE]; \ - int16_t *tmp = tmp_array; \ - \ - src -= ff_hevc_qpel_extra_before[V] * srcstride; \ - \ - for (y = 0; y < height + ff_hevc_qpel_extra[V]; y++) { \ - for (x = 0; x < width; x++) \ - tmp[x] = QPEL_FILTER_ ## H(src, 1) >> (BIT_DEPTH - 8); \ - src += srcstride; \ - tmp += MAX_PB_SIZE; \ - } \ - \ - tmp = tmp_array + ff_hevc_qpel_extra_before[V] * MAX_PB_SIZE; \ - \ - for (y = 0; y < height; y++) { \ - for (x = 0; x < width; x++) \ - dst[x] = QPEL_FILTER_ ## V(tmp, MAX_PB_SIZE) >> 6; \ - tmp += MAX_PB_SIZE; \ - dst += dststride; \ - } \ -} - -PUT_HEVC_QPEL_H(1) -PUT_HEVC_QPEL_H(2) -PUT_HEVC_QPEL_H(3) -PUT_HEVC_QPEL_V(1) -PUT_HEVC_QPEL_V(2) -PUT_HEVC_QPEL_V(3) -PUT_HEVC_QPEL_HV(1, 1) -PUT_HEVC_QPEL_HV(1, 2) -PUT_HEVC_QPEL_HV(1, 3) -PUT_HEVC_QPEL_HV(2, 1) -PUT_HEVC_QPEL_HV(2, 2) -PUT_HEVC_QPEL_HV(2, 3) -PUT_HEVC_QPEL_HV(3, 1) -PUT_HEVC_QPEL_HV(3, 2) -PUT_HEVC_QPEL_HV(3, 3) - -static void FUNC(put_hevc_epel_pixels)(int16_t *dst, ptrdiff_t dststride, - uint8_t *_src, ptrdiff_t _srcstride, - int width, int height, int mx, int my, - int16_t* mcbuffer) +static void FUNC(put_hevc_pel_uni_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, + int height, intptr_t mx, intptr_t my, int width) { - int x, y; + int y; pixel *src = (pixel *)_src; ptrdiff_t srcstride = _srcstride / sizeof(pixel); + pixel *dst = (pixel *)_dst; + ptrdiff_t dststride = _dststride / sizeof(pixel); for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) - dst[x] = src[x] << (14 - BIT_DEPTH); + memcpy(dst, src, width * sizeof(pixel)); src += srcstride; dst += dststride; } } -#define EPEL_FILTER(src, stride) \ - (filter_0 * src[x - stride] + \ - filter_1 * src[x] + \ - filter_2 * src[x + stride] + \ - filter_3 * src[x + 2 * stride]) +static void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, + int16_t *src2, ptrdiff_t src2stride, + int height, intptr_t mx, intptr_t my, int width) +{ + int x, y; + pixel *src = (pixel *)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + pixel *dst = (pixel *)_dst; + ptrdiff_t dststride = _dststride / sizeof(pixel); + + int shift = 14 + 1 - BIT_DEPTH; +#if BIT_DEPTH < 14 + int offset = 1 << (shift - 1); +#else + int offset = 0; +#endif + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + dst[x] = av_clip_pixel(((src[x] << (14 - BIT_DEPTH)) + src2[x] + offset) >> shift); + src += srcstride; + dst += dststride; + src2 += src2stride; + } +} + +static void FUNC(put_hevc_pel_uni_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, + int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width) +{ + int x, y; + pixel *src = (pixel *)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + pixel *dst = (pixel *)_dst; + ptrdiff_t dststride = _dststride / sizeof(pixel); + int shift = denom + 14 - BIT_DEPTH; +#if BIT_DEPTH < 14 + int offset = 1 << (shift - 1); +#else + int offset = 0; +#endif + + ox = ox * (1 << (BIT_DEPTH - 8)); + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + dst[x] = av_clip_pixel((((src[x] << (14 - BIT_DEPTH)) * wx + offset) >> shift) + ox); + src += srcstride; + dst += dststride; + } +} + +static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, + int16_t *src2, ptrdiff_t src2stride, + int height, int denom, int wx0, int wx1, + int ox0, int ox1, intptr_t mx, intptr_t my, int width) +{ + int x, y; + pixel *src = (pixel *)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + pixel *dst = (pixel *)_dst; + ptrdiff_t dststride = _dststride / sizeof(pixel); + + int shift = 14 + 1 - BIT_DEPTH; + int log2Wd = denom + shift - 1; + + ox0 = ox0 * (1 << (BIT_DEPTH - 8)); + ox1 = ox1 * (1 << (BIT_DEPTH - 8)); + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1)); + } + src += srcstride; + dst += dststride; + src2 += src2stride; + } +} + +//////////////////////////////////////////////////////////////////////////////// +// +//////////////////////////////////////////////////////////////////////////////// +#define QPEL_FILTER(src, stride) \ + (filter[0] * src[x - 3 * stride] + \ + filter[1] * src[x - 2 * stride] + \ + filter[2] * src[x - stride] + \ + filter[3] * src[x ] + \ + filter[4] * src[x + stride] + \ + filter[5] * src[x + 2 * stride] + \ + filter[6] * src[x + 3 * stride] + \ + filter[7] * src[x + 4 * stride]) + +static void FUNC(put_hevc_qpel_h)(int16_t *dst, ptrdiff_t dststride, + uint8_t *_src, ptrdiff_t _srcstride, + int height, intptr_t mx, intptr_t my, int width) +{ + int x, y; + pixel *src = (pixel*)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + dst[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); + src += srcstride; + dst += dststride; + } +} + +static void FUNC(put_hevc_qpel_v)(int16_t *dst, ptrdiff_t dststride, + uint8_t *_src, ptrdiff_t _srcstride, + int height, intptr_t mx, intptr_t my, int width) +{ + int x, y; + pixel *src = (pixel*)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + const int8_t *filter = ff_hevc_qpel_filters[my - 1]; + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + dst[x] = QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8); + src += srcstride; + dst += dststride; + } +} + +static void FUNC(put_hevc_qpel_hv)(int16_t *dst, + ptrdiff_t dststride, + uint8_t *_src, + ptrdiff_t _srcstride, + int height, intptr_t mx, + intptr_t my, int width) +{ + int x, y; + const int8_t *filter; + pixel *src = (pixel*)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; + int16_t *tmp = tmp_array; + + src -= QPEL_EXTRA_BEFORE * srcstride; + filter = ff_hevc_qpel_filters[mx - 1]; + for (y = 0; y < height + QPEL_EXTRA; y++) { + for (x = 0; x < width; x++) + tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); + src += srcstride; + tmp += MAX_PB_SIZE; + } + + tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE; + filter = ff_hevc_qpel_filters[my - 1]; + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + dst[x] = QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6; + tmp += MAX_PB_SIZE; + dst += dststride; + } +} + +static void FUNC(put_hevc_qpel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride, + uint8_t *_src, ptrdiff_t _srcstride, + int height, intptr_t mx, intptr_t my, int width) +{ + int x, y; + pixel *src = (pixel*)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + pixel *dst = (pixel *)_dst; + ptrdiff_t dststride = _dststride / sizeof(pixel); + const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; + int shift = 14 - BIT_DEPTH; + +#if BIT_DEPTH < 14 + int offset = 1 << (shift - 1); +#else + int offset = 0; +#endif + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift); + src += srcstride; + dst += dststride; + } +} + +static void FUNC(put_hevc_qpel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, + int16_t *src2, ptrdiff_t src2stride, + int height, intptr_t mx, intptr_t my, int width) +{ + int x, y; + pixel *src = (pixel*)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + pixel *dst = (pixel *)_dst; + ptrdiff_t dststride = _dststride / sizeof(pixel); + + const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; + + int shift = 14 + 1 - BIT_DEPTH; +#if BIT_DEPTH < 14 + int offset = 1 << (shift - 1); +#else + int offset = 0; +#endif + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift); + src += srcstride; + dst += dststride; + src2 += src2stride; + } +} + +static void FUNC(put_hevc_qpel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride, + uint8_t *_src, ptrdiff_t _srcstride, + int height, intptr_t mx, intptr_t my, int width) +{ + int x, y; + pixel *src = (pixel*)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + pixel *dst = (pixel *)_dst; + ptrdiff_t dststride = _dststride / sizeof(pixel); + const int8_t *filter = ff_hevc_qpel_filters[my - 1]; + int shift = 14 - BIT_DEPTH; + +#if BIT_DEPTH < 14 + int offset = 1 << (shift - 1); +#else + int offset = 0; +#endif + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift); + src += srcstride; + dst += dststride; + } +} + + +static void FUNC(put_hevc_qpel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, + int16_t *src2, ptrdiff_t src2stride, + int height, intptr_t mx, intptr_t my, int width) +{ + int x, y; + pixel *src = (pixel*)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + pixel *dst = (pixel *)_dst; + ptrdiff_t dststride = _dststride / sizeof(pixel); + + const int8_t *filter = ff_hevc_qpel_filters[my - 1]; + + int shift = 14 + 1 - BIT_DEPTH; +#if BIT_DEPTH < 14 + int offset = 1 << (shift - 1); +#else + int offset = 0; +#endif + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift); + src += srcstride; + dst += dststride; + src2 += src2stride; + } +} + +static void FUNC(put_hevc_qpel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride, + uint8_t *_src, ptrdiff_t _srcstride, + int height, intptr_t mx, intptr_t my, int width) +{ + int x, y; + const int8_t *filter; + pixel *src = (pixel*)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + pixel *dst = (pixel *)_dst; + ptrdiff_t dststride = _dststride / sizeof(pixel); + int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; + int16_t *tmp = tmp_array; + int shift = 14 - BIT_DEPTH; + +#if BIT_DEPTH < 14 + int offset = 1 << (shift - 1); +#else + int offset = 0; +#endif + + src -= QPEL_EXTRA_BEFORE * srcstride; + filter = ff_hevc_qpel_filters[mx - 1]; + for (y = 0; y < height + QPEL_EXTRA; y++) { + for (x = 0; x < width; x++) + tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); + src += srcstride; + tmp += MAX_PB_SIZE; + } + + tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE; + filter = ff_hevc_qpel_filters[my - 1]; + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift); + tmp += MAX_PB_SIZE; + dst += dststride; + } +} + +static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, + int16_t *src2, ptrdiff_t src2stride, + int height, intptr_t mx, intptr_t my, int width) +{ + int x, y; + const int8_t *filter; + pixel *src = (pixel*)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + pixel *dst = (pixel *)_dst; + ptrdiff_t dststride = _dststride / sizeof(pixel); + int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; + int16_t *tmp = tmp_array; + int shift = 14 + 1 - BIT_DEPTH; +#if BIT_DEPTH < 14 + int offset = 1 << (shift - 1); +#else + int offset = 0; +#endif + + src -= QPEL_EXTRA_BEFORE * srcstride; + filter = ff_hevc_qpel_filters[mx - 1]; + for (y = 0; y < height + QPEL_EXTRA; y++) { + for (x = 0; x < width; x++) + tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); + src += srcstride; + tmp += MAX_PB_SIZE; + } + + tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE; + filter = ff_hevc_qpel_filters[my - 1]; + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift); + tmp += MAX_PB_SIZE; + dst += dststride; + src2 += src2stride; + } +} + +static void FUNC(put_hevc_qpel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride, + uint8_t *_src, ptrdiff_t _srcstride, + int height, int denom, int wx, int ox, + intptr_t mx, intptr_t my, int width) +{ + int x, y; + pixel *src = (pixel*)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + pixel *dst = (pixel *)_dst; + ptrdiff_t dststride = _dststride / sizeof(pixel); + const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; + int shift = denom + 14 - BIT_DEPTH; +#if BIT_DEPTH < 14 + int offset = 1 << (shift - 1); +#else + int offset = 0; +#endif + + ox = ox * (1 << (BIT_DEPTH - 8)); + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + dst[x] = av_clip_pixel((((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); + src += srcstride; + dst += dststride; + } +} + +static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, + int16_t *src2, ptrdiff_t src2stride, + int height, int denom, int wx0, int wx1, + int ox0, int ox1, intptr_t mx, intptr_t my, int width) +{ + int x, y; + pixel *src = (pixel*)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + pixel *dst = (pixel *)_dst; + ptrdiff_t dststride = _dststride / sizeof(pixel); + + const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; + + int shift = 14 + 1 - BIT_DEPTH; + int log2Wd = denom + shift - 1; + + ox0 = ox0 * (1 << (BIT_DEPTH - 8)); + ox1 = ox1 * (1 << (BIT_DEPTH - 8)); + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + + ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1)); + src += srcstride; + dst += dststride; + src2 += src2stride; + } +} + +static void FUNC(put_hevc_qpel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride, + uint8_t *_src, ptrdiff_t _srcstride, + int height, int denom, int wx, int ox, + intptr_t mx, intptr_t my, int width) +{ + int x, y; + pixel *src = (pixel*)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + pixel *dst = (pixel *)_dst; + ptrdiff_t dststride = _dststride / sizeof(pixel); + const int8_t *filter = ff_hevc_qpel_filters[my - 1]; + int shift = denom + 14 - BIT_DEPTH; +#if BIT_DEPTH < 14 + int offset = 1 << (shift - 1); +#else + int offset = 0; +#endif + + ox = ox * (1 << (BIT_DEPTH - 8)); + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + dst[x] = av_clip_pixel((((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); + src += srcstride; + dst += dststride; + } +} + +static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, + int16_t *src2, ptrdiff_t src2stride, + int height, int denom, int wx0, int wx1, + int ox0, int ox1, intptr_t mx, intptr_t my, int width) +{ + int x, y; + pixel *src = (pixel*)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + pixel *dst = (pixel *)_dst; + ptrdiff_t dststride = _dststride / sizeof(pixel); + + const int8_t *filter = ff_hevc_qpel_filters[my - 1]; + + int shift = 14 + 1 - BIT_DEPTH; + int log2Wd = denom + shift - 1; + + ox0 = ox0 * (1 << (BIT_DEPTH - 8)); + ox1 = ox1 * (1 << (BIT_DEPTH - 8)); + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + + ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1)); + src += srcstride; + dst += dststride; + src2 += src2stride; + } +} + +static void FUNC(put_hevc_qpel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, + uint8_t *_src, ptrdiff_t _srcstride, + int height, int denom, int wx, int ox, + intptr_t mx, intptr_t my, int width) +{ + int x, y; + const int8_t *filter; + pixel *src = (pixel*)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + pixel *dst = (pixel *)_dst; + ptrdiff_t dststride = _dststride / sizeof(pixel); + int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; + int16_t *tmp = tmp_array; + int shift = denom + 14 - BIT_DEPTH; +#if BIT_DEPTH < 14 + int offset = 1 << (shift - 1); +#else + int offset = 0; +#endif + + src -= QPEL_EXTRA_BEFORE * srcstride; + filter = ff_hevc_qpel_filters[mx - 1]; + for (y = 0; y < height + QPEL_EXTRA; y++) { + for (x = 0; x < width; x++) + tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); + src += srcstride; + tmp += MAX_PB_SIZE; + } + + tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE; + filter = ff_hevc_qpel_filters[my - 1]; + + ox = ox * (1 << (BIT_DEPTH - 8)); + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + dst[x] = av_clip_pixel((((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); + tmp += MAX_PB_SIZE; + dst += dststride; + } +} + +static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, + int16_t *src2, ptrdiff_t src2stride, + int height, int denom, int wx0, int wx1, + int ox0, int ox1, intptr_t mx, intptr_t my, int width) +{ + int x, y; + const int8_t *filter; + pixel *src = (pixel*)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + pixel *dst = (pixel *)_dst; + ptrdiff_t dststride = _dststride / sizeof(pixel); + int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; + int16_t *tmp = tmp_array; + int shift = 14 + 1 - BIT_DEPTH; + int log2Wd = denom + shift - 1; + + src -= QPEL_EXTRA_BEFORE * srcstride; + filter = ff_hevc_qpel_filters[mx - 1]; + for (y = 0; y < height + QPEL_EXTRA; y++) { + for (x = 0; x < width; x++) + tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); + src += srcstride; + tmp += MAX_PB_SIZE; + } + + tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE; + filter = ff_hevc_qpel_filters[my - 1]; + + ox0 = ox0 * (1 << (BIT_DEPTH - 8)); + ox1 = ox1 * (1 << (BIT_DEPTH - 8)); + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 + + ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1)); + tmp += MAX_PB_SIZE; + dst += dststride; + src2 += src2stride; + } +} + +//////////////////////////////////////////////////////////////////////////////// +// +//////////////////////////////////////////////////////////////////////////////// +#define EPEL_FILTER(src, stride) \ + (filter[0] * src[x - stride] + \ + filter[1] * src[x] + \ + filter[2] * src[x + stride] + \ + filter[3] * src[x + 2 * stride]) static void FUNC(put_hevc_epel_h)(int16_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, - int width, int height, int mx, int my, - int16_t* mcbuffer) + int height, intptr_t mx, intptr_t my, int width) { int x, y; pixel *src = (pixel *)_src; ptrdiff_t srcstride = _srcstride / sizeof(pixel); const int8_t *filter = ff_hevc_epel_filters[mx - 1]; - int8_t filter_0 = filter[0]; - int8_t filter_1 = filter[1]; - int8_t filter_2 = filter[2]; - int8_t filter_3 = filter[3]; for (y = 0; y < height; y++) { for (x = 0; x < width; x++) dst[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); @@ -981,17 +1401,12 @@ static void FUNC(put_hevc_epel_h)(int16_t *dst, ptrdiff_t dststride, static void FUNC(put_hevc_epel_v)(int16_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, - int width, int height, int mx, int my, - int16_t* mcbuffer) + int height, intptr_t mx, intptr_t my, int width) { int x, y; pixel *src = (pixel *)_src; ptrdiff_t srcstride = _srcstride / sizeof(pixel); const int8_t *filter = ff_hevc_epel_filters[my - 1]; - int8_t filter_0 = filter[0]; - int8_t filter_1 = filter[1]; - int8_t filter_2 = filter[2]; - int8_t filter_3 = filter[3]; for (y = 0; y < height; y++) { for (x = 0; x < width; x++) @@ -1003,19 +1418,13 @@ static void FUNC(put_hevc_epel_v)(int16_t *dst, ptrdiff_t dststride, static void FUNC(put_hevc_epel_hv)(int16_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, - int width, int height, int mx, int my, - int16_t* mcbuffer) + int height, intptr_t mx, intptr_t my, int width) { int x, y; pixel *src = (pixel *)_src; ptrdiff_t srcstride = _srcstride / sizeof(pixel); - const int8_t *filter_h = ff_hevc_epel_filters[mx - 1]; - const int8_t *filter_v = ff_hevc_epel_filters[my - 1]; - int8_t filter_0 = filter_h[0]; - int8_t filter_1 = filter_h[1]; - int8_t filter_2 = filter_h[2]; - int8_t filter_3 = filter_h[3]; - int16_t tmp_array[(MAX_PB_SIZE + 3) * MAX_PB_SIZE]; + const int8_t *filter = ff_hevc_epel_filters[mx - 1]; + int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; int16_t *tmp = tmp_array; src -= EPEL_EXTRA_BEFORE * srcstride; @@ -1028,10 +1437,8 @@ static void FUNC(put_hevc_epel_hv)(int16_t *dst, ptrdiff_t dststride, } tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE; - filter_0 = filter_v[0]; - filter_1 = filter_v[1]; - filter_2 = filter_v[2]; - filter_3 = filter_v[3]; + filter = ff_hevc_epel_filters[my - 1]; + for (y = 0; y < height; y++) { for (x = 0; x < width; x++) dst[x] = EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6; @@ -1040,37 +1447,91 @@ static void FUNC(put_hevc_epel_hv)(int16_t *dst, ptrdiff_t dststride, } } -static void FUNC(put_unweighted_pred)(uint8_t *_dst, ptrdiff_t _dststride, - int16_t *src, ptrdiff_t srcstride, - int width, int height) +static void FUNC(put_hevc_epel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, + int height, intptr_t mx, intptr_t my, int width) { int x, y; + pixel *src = (pixel *)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); pixel *dst = (pixel *)_dst; ptrdiff_t dststride = _dststride / sizeof(pixel); - + const int8_t *filter = ff_hevc_epel_filters[mx - 1]; int shift = 14 - BIT_DEPTH; #if BIT_DEPTH < 14 int offset = 1 << (shift - 1); #else int offset = 0; #endif + for (y = 0; y < height; y++) { for (x = 0; x < width; x++) - dst[x] = av_clip_pixel((src[x] + offset) >> shift); - dst += dststride; + dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift); src += srcstride; + dst += dststride; } } -static void FUNC(put_weighted_pred_avg)(uint8_t *_dst, ptrdiff_t _dststride, - int16_t *src1, int16_t *src2, - ptrdiff_t srcstride, - int width, int height) +static void FUNC(put_hevc_epel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, + int16_t *src2, ptrdiff_t src2stride, + int height, intptr_t mx, intptr_t my, int width) { int x, y; + pixel *src = (pixel *)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); pixel *dst = (pixel *)_dst; ptrdiff_t dststride = _dststride / sizeof(pixel); + const int8_t *filter = ff_hevc_epel_filters[mx - 1]; + int shift = 14 + 1 - BIT_DEPTH; +#if BIT_DEPTH < 14 + int offset = 1 << (shift - 1); +#else + int offset = 0; +#endif + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift); + } + dst += dststride; + src += srcstride; + src2 += src2stride; + } +} + +static void FUNC(put_hevc_epel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, + int height, intptr_t mx, intptr_t my, int width) +{ + int x, y; + pixel *src = (pixel *)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + pixel *dst = (pixel *)_dst; + ptrdiff_t dststride = _dststride / sizeof(pixel); + const int8_t *filter = ff_hevc_epel_filters[my - 1]; + int shift = 14 - BIT_DEPTH; +#if BIT_DEPTH < 14 + int offset = 1 << (shift - 1); +#else + int offset = 0; +#endif + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift); + src += srcstride; + dst += dststride; + } +} + +static void FUNC(put_hevc_epel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, + int16_t *src2, ptrdiff_t src2stride, + int height, intptr_t mx, intptr_t my, int width) +{ + int x, y; + pixel *src = (pixel *)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + const int8_t *filter = ff_hevc_epel_filters[my - 1]; + pixel *dst = (pixel *)_dst; + ptrdiff_t dststride = _dststride / sizeof(pixel); int shift = 14 + 1 - BIT_DEPTH; #if BIT_DEPTH < 14 int offset = 1 << (shift - 1); @@ -1080,71 +1541,273 @@ static void FUNC(put_weighted_pred_avg)(uint8_t *_dst, ptrdiff_t _dststride, for (y = 0; y < height; y++) { for (x = 0; x < width; x++) - dst[x] = av_clip_pixel((src1[x] + src2[x] + offset) >> shift); + dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift); dst += dststride; - src1 += srcstride; - src2 += srcstride; + src += srcstride; + src2 += src2stride; } } -static void FUNC(weighted_pred)(uint8_t denom, int16_t wlxFlag, int16_t olxFlag, - uint8_t *_dst, ptrdiff_t _dststride, - int16_t *src, ptrdiff_t srcstride, - int width, int height) +static void FUNC(put_hevc_epel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, + int height, intptr_t mx, intptr_t my, int width) { - int shift, log2Wd, wx, ox, x, y, offset; + int x, y; + pixel *src = (pixel *)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); pixel *dst = (pixel *)_dst; ptrdiff_t dststride = _dststride / sizeof(pixel); + const int8_t *filter = ff_hevc_epel_filters[mx - 1]; + int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; + int16_t *tmp = tmp_array; + int shift = 14 - BIT_DEPTH; +#if BIT_DEPTH < 14 + int offset = 1 << (shift - 1); +#else + int offset = 0; +#endif - shift = 14 - BIT_DEPTH; - log2Wd = denom + shift; - offset = 1 << (log2Wd - 1); - wx = wlxFlag; - ox = olxFlag * (1 << (BIT_DEPTH - 8)); + src -= EPEL_EXTRA_BEFORE * srcstride; + + for (y = 0; y < height + EPEL_EXTRA; y++) { + for (x = 0; x < width; x++) + tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); + src += srcstride; + tmp += MAX_PB_SIZE; + } + + tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE; + filter = ff_hevc_epel_filters[my - 1]; + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift); + tmp += MAX_PB_SIZE; + dst += dststride; + } +} + +static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, + int16_t *src2, ptrdiff_t src2stride, + int height, intptr_t mx, intptr_t my, int width) +{ + int x, y; + pixel *src = (pixel *)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + pixel *dst = (pixel *)_dst; + ptrdiff_t dststride = _dststride / sizeof(pixel); + const int8_t *filter = ff_hevc_epel_filters[mx - 1]; + int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; + int16_t *tmp = tmp_array; + int shift = 14 + 1 - BIT_DEPTH; +#if BIT_DEPTH < 14 + int offset = 1 << (shift - 1); +#else + int offset = 0; +#endif + + src -= EPEL_EXTRA_BEFORE * srcstride; + + for (y = 0; y < height + EPEL_EXTRA; y++) { + for (x = 0; x < width; x++) + tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); + src += srcstride; + tmp += MAX_PB_SIZE; + } + + tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE; + filter = ff_hevc_epel_filters[my - 1]; + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift); + tmp += MAX_PB_SIZE; + dst += dststride; + src2 += src2stride; + } +} + +static void FUNC(put_hevc_epel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, + int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width) +{ + int x, y; + pixel *src = (pixel *)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + pixel *dst = (pixel *)_dst; + ptrdiff_t dststride = _dststride / sizeof(pixel); + const int8_t *filter = ff_hevc_epel_filters[mx - 1]; + int shift = denom + 14 - BIT_DEPTH; +#if BIT_DEPTH < 14 + int offset = 1 << (shift - 1); +#else + int offset = 0; +#endif + + ox = ox * (1 << (BIT_DEPTH - 8)); for (y = 0; y < height; y++) { for (x = 0; x < width; x++) { - if (log2Wd >= 1) { - dst[x] = av_clip_pixel(((src[x] * wx + offset) >> log2Wd) + ox); - } else { - dst[x] = av_clip_pixel(src[x] * wx + ox); - } + dst[x] = av_clip_pixel((((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); } dst += dststride; src += srcstride; } } -static void FUNC(weighted_pred_avg)(uint8_t denom, - int16_t wl0Flag, int16_t wl1Flag, - int16_t ol0Flag, int16_t ol1Flag, - uint8_t *_dst, ptrdiff_t _dststride, - int16_t *src1, int16_t *src2, - ptrdiff_t srcstride, - int width, int height) +static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, + int16_t *src2, ptrdiff_t src2stride, + int height, int denom, int wx0, int wx1, + int ox0, int ox1, intptr_t mx, intptr_t my, int width) { - int shift, log2Wd, w0, w1, o0, o1, x, y; - pixel *dst = (pixel *)_dst; + int x, y; + pixel *src = (pixel *)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + pixel *dst = (pixel *)_dst; ptrdiff_t dststride = _dststride / sizeof(pixel); + const int8_t *filter = ff_hevc_epel_filters[mx - 1]; + int shift = 14 + 1 - BIT_DEPTH; + int log2Wd = denom + shift - 1; - shift = 14 - BIT_DEPTH; - log2Wd = denom + shift; - w0 = wl0Flag; - w1 = wl1Flag; - o0 = ol0Flag * (1 << (BIT_DEPTH - 8)); - o1 = ol1Flag * (1 << (BIT_DEPTH - 8)); - + ox0 = ox0 * (1 << (BIT_DEPTH - 8)); + ox1 = ox1 * (1 << (BIT_DEPTH - 8)); for (y = 0; y < height; y++) { for (x = 0; x < width; x++) - dst[x] = av_clip_pixel((src1[x] * w0 + src2[x] * w1 + - ((o0 + o1 + 1) << log2Wd)) >> (log2Wd + 1)); + dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + + ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1)); + src += srcstride; dst += dststride; - src1 += srcstride; - src2 += srcstride; + src2 += src2stride; } } -// line zero +static void FUNC(put_hevc_epel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, + int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width) +{ + int x, y; + pixel *src = (pixel *)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + pixel *dst = (pixel *)_dst; + ptrdiff_t dststride = _dststride / sizeof(pixel); + const int8_t *filter = ff_hevc_epel_filters[my - 1]; + int shift = denom + 14 - BIT_DEPTH; +#if BIT_DEPTH < 14 + int offset = 1 << (shift - 1); +#else + int offset = 0; +#endif + + ox = ox * (1 << (BIT_DEPTH - 8)); + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + dst[x] = av_clip_pixel((((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); + } + dst += dststride; + src += srcstride; + } +} + +static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, + int16_t *src2, ptrdiff_t src2stride, + int height, int denom, int wx0, int wx1, + int ox0, int ox1, intptr_t mx, intptr_t my, int width) +{ + int x, y; + pixel *src = (pixel *)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + const int8_t *filter = ff_hevc_epel_filters[my - 1]; + pixel *dst = (pixel *)_dst; + ptrdiff_t dststride = _dststride / sizeof(pixel); + int shift = 14 + 1 - BIT_DEPTH; + int log2Wd = denom + shift - 1; + + ox0 = ox0 * (1 << (BIT_DEPTH - 8)); + ox1 = ox1 * (1 << (BIT_DEPTH - 8)); + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + + ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1)); + src += srcstride; + dst += dststride; + src2 += src2stride; + } +} + +static void FUNC(put_hevc_epel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, + int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width) +{ + int x, y; + pixel *src = (pixel *)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + pixel *dst = (pixel *)_dst; + ptrdiff_t dststride = _dststride / sizeof(pixel); + const int8_t *filter = ff_hevc_epel_filters[mx - 1]; + int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; + int16_t *tmp = tmp_array; + int shift = denom + 14 - BIT_DEPTH; +#if BIT_DEPTH < 14 + int offset = 1 << (shift - 1); +#else + int offset = 0; +#endif + + src -= EPEL_EXTRA_BEFORE * srcstride; + + for (y = 0; y < height + EPEL_EXTRA; y++) { + for (x = 0; x < width; x++) + tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); + src += srcstride; + tmp += MAX_PB_SIZE; + } + + tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE; + filter = ff_hevc_epel_filters[my - 1]; + + ox = ox * (1 << (BIT_DEPTH - 8)); + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + dst[x] = av_clip_pixel((((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); + tmp += MAX_PB_SIZE; + dst += dststride; + } +} + +static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, + int16_t *src2, ptrdiff_t src2stride, + int height, int denom, int wx0, int wx1, + int ox0, int ox1, intptr_t mx, intptr_t my, int width) +{ + int x, y; + pixel *src = (pixel *)_src; + ptrdiff_t srcstride = _srcstride / sizeof(pixel); + pixel *dst = (pixel *)_dst; + ptrdiff_t dststride = _dststride / sizeof(pixel); + const int8_t *filter = ff_hevc_epel_filters[mx - 1]; + int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; + int16_t *tmp = tmp_array; + int shift = 14 + 1 - BIT_DEPTH; + int log2Wd = denom + shift - 1; + + src -= EPEL_EXTRA_BEFORE * srcstride; + + for (y = 0; y < height + EPEL_EXTRA; y++) { + for (x = 0; x < width; x++) + tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); + src += srcstride; + tmp += MAX_PB_SIZE; + } + + tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE; + filter = ff_hevc_epel_filters[my - 1]; + + ox0 = ox0 * (1 << (BIT_DEPTH - 8)); + ox1 = ox1 * (1 << (BIT_DEPTH - 8)); + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) + dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 + + ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1)); + tmp += MAX_PB_SIZE; + dst += dststride; + src2 += src2stride; + } +}// line zero #define P3 pix[-4 * xstride] #define P2 pix[-3 * xstride] #define P1 pix[-2 * xstride] diff --git a/libavcodec/hevcpred_template.c b/libavcodec/hevcpred_template.c index 1d83249355..5dc8226097 100644 --- a/libavcodec/hevcpred_template.c +++ b/libavcodec/hevcpred_template.c @@ -36,7 +36,7 @@ static void FUNC(intra_pred)(HEVCContext *s, int x0, int y0, int log2_size, int #define MVF_PU(x, y) \ MVF(PU(x0 + ((x) << hshift)), PU(y0 + ((y) << vshift))) #define IS_INTRA(x, y) \ - MVF_PU(x, y).is_intra + (MVF_PU(x, y).pred_flag == PF_INTRA) #define MIN_TB_ADDR_ZS(x, y) \ s->pps->min_tb_addr_zs[(y) * s->sps->min_tb_width + (x)] #define EXTEND_LEFT(ptr, start, length) \ @@ -120,7 +120,7 @@ static void FUNC(intra_pred)(HEVCContext *s, int x0, int y0, int log2_size, int int max = FFMIN(size_in_luma_pu, s->sps->min_pu_height - y_bottom_pu); cand_bottom_left = 0; for (i = 0; i < max; i++) - cand_bottom_left |= MVF(x_left_pu, y_bottom_pu + i).is_intra; + cand_bottom_left |= (MVF(x_left_pu, y_bottom_pu + i).pred_flag == PF_INTRA); } if (cand_left == 1 && on_pu_edge_x) { int x_left_pu = PU(x0 - 1); @@ -128,12 +128,12 @@ static void FUNC(intra_pred)(HEVCContext *s, int x0, int y0, int log2_size, int int max = FFMIN(size_in_luma_pu, s->sps->min_pu_height - y_left_pu); cand_left = 0; for (i = 0; i < max; i++) - cand_left |= MVF(x_left_pu, y_left_pu + i).is_intra; + cand_left |= (MVF(x_left_pu, y_left_pu + i).pred_flag == PF_INTRA); } if (cand_up_left == 1) { int x_left_pu = PU(x0 - 1); int y_top_pu = PU(y0 - 1); - cand_up_left = MVF(x_left_pu, y_top_pu).is_intra; + cand_up_left = MVF(x_left_pu, y_top_pu).pred_flag == PF_INTRA; } if (cand_up == 1 && on_pu_edge_y) { int x_top_pu = PU(x0); @@ -141,7 +141,7 @@ static void FUNC(intra_pred)(HEVCContext *s, int x0, int y0, int log2_size, int int max = FFMIN(size_in_luma_pu, s->sps->min_pu_width - x_top_pu); cand_up = 0; for (i = 0; i < max; i++) - cand_up |= MVF(x_top_pu + i, y_top_pu).is_intra; + cand_up |= (MVF(x_top_pu + i, y_top_pu).pred_flag == PF_INTRA); } if (cand_up_right == 1 && on_pu_edge_y) { int y_top_pu = PU(y0 - 1); @@ -149,7 +149,7 @@ static void FUNC(intra_pred)(HEVCContext *s, int x0, int y0, int log2_size, int int max = FFMIN(size_in_luma_pu, s->sps->min_pu_width - x_right_pu); cand_up_right = 0; for (i = 0; i < max; i++) - cand_up_right |= MVF(x_right_pu + i, y_top_pu).is_intra; + cand_up_right |= (MVF(x_right_pu + i, y_top_pu).pred_flag == PF_INTRA); } for (i = 0; i < 2 * MAX_TB_SIZE; i++) { left[i] = 128;