mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-26 19:01:44 +02:00
36d04801ba
Also change the method for allocating them. Instead of two possible alloc calls from different places, just ensure they are allocated at the start of each slice. This should be simpler and less bug-prone than the previous method.
832 lines
38 KiB
C
832 lines
38 KiB
C
/*
|
|
* H.26L/H.264/AVC/JVT/14496-10/... decoder
|
|
* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
|
|
*
|
|
* This file is part of Libav.
|
|
*
|
|
* Libav is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* Libav is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with Libav; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
/**
|
|
* @file
|
|
* H.264 / AVC / MPEG4 part10 macroblock decoding
|
|
*/
|
|
|
|
#include <stdint.h>
|
|
|
|
#include "config.h"
|
|
|
|
#include "libavutil/common.h"
|
|
#include "libavutil/intreadwrite.h"
|
|
#include "avcodec.h"
|
|
#include "h264.h"
|
|
#include "qpeldsp.h"
|
|
#include "svq3.h"
|
|
#include "thread.h"
|
|
|
|
static inline int get_lowest_part_list_y(H264SliceContext *sl,
|
|
H264Picture *pic, int n,
|
|
int height, int y_offset, int list)
|
|
{
|
|
int raw_my = sl->mv_cache[list][scan8[n]][1];
|
|
int filter_height_up = (raw_my & 3) ? 2 : 0;
|
|
int filter_height_down = (raw_my & 3) ? 3 : 0;
|
|
int full_my = (raw_my >> 2) + y_offset;
|
|
int top = full_my - filter_height_up;
|
|
int bottom = full_my + filter_height_down + height;
|
|
|
|
return FFMAX(abs(top), bottom);
|
|
}
|
|
|
|
static inline void get_lowest_part_y(const H264Context *h, H264SliceContext *sl,
|
|
int refs[2][48], int n,
|
|
int height, int y_offset, int list0,
|
|
int list1, int *nrefs)
|
|
{
|
|
int my;
|
|
|
|
y_offset += 16 * (sl->mb_y >> MB_FIELD(sl));
|
|
|
|
if (list0) {
|
|
int ref_n = sl->ref_cache[0][scan8[n]];
|
|
H264Picture *ref = &sl->ref_list[0][ref_n];
|
|
|
|
// Error resilience puts the current picture in the ref list.
|
|
// Don't try to wait on these as it will cause a deadlock.
|
|
// Fields can wait on each other, though.
|
|
if (ref->tf.progress->data != h->cur_pic.tf.progress->data ||
|
|
(ref->reference & 3) != h->picture_structure) {
|
|
my = get_lowest_part_list_y(sl, ref, n, height, y_offset, 0);
|
|
if (refs[0][ref_n] < 0)
|
|
nrefs[0] += 1;
|
|
refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
|
|
}
|
|
}
|
|
|
|
if (list1) {
|
|
int ref_n = sl->ref_cache[1][scan8[n]];
|
|
H264Picture *ref = &sl->ref_list[1][ref_n];
|
|
|
|
if (ref->tf.progress->data != h->cur_pic.tf.progress->data ||
|
|
(ref->reference & 3) != h->picture_structure) {
|
|
my = get_lowest_part_list_y(sl, ref, n, height, y_offset, 1);
|
|
if (refs[1][ref_n] < 0)
|
|
nrefs[1] += 1;
|
|
refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Wait until all reference frames are available for MC operations.
|
|
*
|
|
* @param h the H264 context
|
|
*/
|
|
static void await_references(const H264Context *h, H264SliceContext *sl)
|
|
{
|
|
const int mb_xy = sl->mb_xy;
|
|
const int mb_type = h->cur_pic.mb_type[mb_xy];
|
|
int refs[2][48];
|
|
int nrefs[2] = { 0 };
|
|
int ref, list;
|
|
|
|
memset(refs, -1, sizeof(refs));
|
|
|
|
if (IS_16X16(mb_type)) {
|
|
get_lowest_part_y(h, sl, refs, 0, 16, 0,
|
|
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
|
|
} else if (IS_16X8(mb_type)) {
|
|
get_lowest_part_y(h, sl, refs, 0, 8, 0,
|
|
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
|
|
get_lowest_part_y(h, sl, refs, 8, 8, 8,
|
|
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
|
|
} else if (IS_8X16(mb_type)) {
|
|
get_lowest_part_y(h, sl, refs, 0, 16, 0,
|
|
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
|
|
get_lowest_part_y(h, sl, refs, 4, 16, 0,
|
|
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
|
|
} else {
|
|
int i;
|
|
|
|
assert(IS_8X8(mb_type));
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
const int sub_mb_type = sl->sub_mb_type[i];
|
|
const int n = 4 * i;
|
|
int y_offset = (i & 2) << 2;
|
|
|
|
if (IS_SUB_8X8(sub_mb_type)) {
|
|
get_lowest_part_y(h, sl, refs, n, 8, y_offset,
|
|
IS_DIR(sub_mb_type, 0, 0),
|
|
IS_DIR(sub_mb_type, 0, 1),
|
|
nrefs);
|
|
} else if (IS_SUB_8X4(sub_mb_type)) {
|
|
get_lowest_part_y(h, sl, refs, n, 4, y_offset,
|
|
IS_DIR(sub_mb_type, 0, 0),
|
|
IS_DIR(sub_mb_type, 0, 1),
|
|
nrefs);
|
|
get_lowest_part_y(h, sl, refs, n + 2, 4, y_offset + 4,
|
|
IS_DIR(sub_mb_type, 0, 0),
|
|
IS_DIR(sub_mb_type, 0, 1),
|
|
nrefs);
|
|
} else if (IS_SUB_4X8(sub_mb_type)) {
|
|
get_lowest_part_y(h, sl, refs, n, 8, y_offset,
|
|
IS_DIR(sub_mb_type, 0, 0),
|
|
IS_DIR(sub_mb_type, 0, 1),
|
|
nrefs);
|
|
get_lowest_part_y(h, sl, refs, n + 1, 8, y_offset,
|
|
IS_DIR(sub_mb_type, 0, 0),
|
|
IS_DIR(sub_mb_type, 0, 1),
|
|
nrefs);
|
|
} else {
|
|
int j;
|
|
assert(IS_SUB_4X4(sub_mb_type));
|
|
for (j = 0; j < 4; j++) {
|
|
int sub_y_offset = y_offset + 2 * (j & 2);
|
|
get_lowest_part_y(h, sl, refs, n + j, 4, sub_y_offset,
|
|
IS_DIR(sub_mb_type, 0, 0),
|
|
IS_DIR(sub_mb_type, 0, 1),
|
|
nrefs);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
for (list = sl->list_count - 1; list >= 0; list--)
|
|
for (ref = 0; ref < 48 && nrefs[list]; ref++) {
|
|
int row = refs[list][ref];
|
|
if (row >= 0) {
|
|
H264Picture *ref_pic = &sl->ref_list[list][ref];
|
|
int ref_field = ref_pic->reference - 1;
|
|
int ref_field_picture = ref_pic->field_picture;
|
|
int pic_height = 16 * h->mb_height >> ref_field_picture;
|
|
|
|
row <<= MB_MBAFF(sl);
|
|
nrefs[list]--;
|
|
|
|
if (!FIELD_PICTURE(h) && ref_field_picture) { // frame referencing two fields
|
|
ff_thread_await_progress(&ref_pic->tf,
|
|
FFMIN((row >> 1) - !(row & 1),
|
|
pic_height - 1),
|
|
1);
|
|
ff_thread_await_progress(&ref_pic->tf,
|
|
FFMIN((row >> 1), pic_height - 1),
|
|
0);
|
|
} else if (FIELD_PICTURE(h) && !ref_field_picture) { // field referencing one field of a frame
|
|
ff_thread_await_progress(&ref_pic->tf,
|
|
FFMIN(row * 2 + ref_field,
|
|
pic_height - 1),
|
|
0);
|
|
} else if (FIELD_PICTURE(h)) {
|
|
ff_thread_await_progress(&ref_pic->tf,
|
|
FFMIN(row, pic_height - 1),
|
|
ref_field);
|
|
} else {
|
|
ff_thread_await_progress(&ref_pic->tf,
|
|
FFMIN(row, pic_height - 1),
|
|
0);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext *sl,
|
|
H264Picture *pic,
|
|
int n, int square, int height,
|
|
int delta, int list,
|
|
uint8_t *dest_y, uint8_t *dest_cb,
|
|
uint8_t *dest_cr,
|
|
int src_x_offset, int src_y_offset,
|
|
const qpel_mc_func *qpix_op,
|
|
h264_chroma_mc_func chroma_op,
|
|
int pixel_shift, int chroma_idc)
|
|
{
|
|
const int mx = sl->mv_cache[list][scan8[n]][0] + src_x_offset * 8;
|
|
int my = sl->mv_cache[list][scan8[n]][1] + src_y_offset * 8;
|
|
const int luma_xy = (mx & 3) + ((my & 3) << 2);
|
|
ptrdiff_t offset = ((mx >> 2) << pixel_shift) + (my >> 2) * sl->mb_linesize;
|
|
uint8_t *src_y = pic->f.data[0] + offset;
|
|
uint8_t *src_cb, *src_cr;
|
|
int extra_width = 0;
|
|
int extra_height = 0;
|
|
int emu = 0;
|
|
const int full_mx = mx >> 2;
|
|
const int full_my = my >> 2;
|
|
const int pic_width = 16 * h->mb_width;
|
|
const int pic_height = 16 * h->mb_height >> MB_FIELD(sl);
|
|
int ysh;
|
|
|
|
if (mx & 7)
|
|
extra_width -= 3;
|
|
if (my & 7)
|
|
extra_height -= 3;
|
|
|
|
if (full_mx < 0 - extra_width ||
|
|
full_my < 0 - extra_height ||
|
|
full_mx + 16 /*FIXME*/ > pic_width + extra_width ||
|
|
full_my + 16 /*FIXME*/ > pic_height + extra_height) {
|
|
h->vdsp.emulated_edge_mc(sl->edge_emu_buffer,
|
|
src_y - (2 << pixel_shift) - 2 * sl->mb_linesize,
|
|
sl->mb_linesize, sl->mb_linesize,
|
|
16 + 5, 16 + 5 /*FIXME*/, full_mx - 2,
|
|
full_my - 2, pic_width, pic_height);
|
|
src_y = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize;
|
|
emu = 1;
|
|
}
|
|
|
|
qpix_op[luma_xy](dest_y, src_y, sl->mb_linesize); // FIXME try variable height perhaps?
|
|
if (!square)
|
|
qpix_op[luma_xy](dest_y + delta, src_y + delta, sl->mb_linesize);
|
|
|
|
if (CONFIG_GRAY && h->flags & CODEC_FLAG_GRAY)
|
|
return;
|
|
|
|
if (chroma_idc == 3 /* yuv444 */) {
|
|
src_cb = pic->f.data[1] + offset;
|
|
if (emu) {
|
|
h->vdsp.emulated_edge_mc(sl->edge_emu_buffer,
|
|
src_cb - (2 << pixel_shift) - 2 * sl->mb_linesize,
|
|
sl->mb_linesize, sl->mb_linesize,
|
|
16 + 5, 16 + 5 /*FIXME*/,
|
|
full_mx - 2, full_my - 2,
|
|
pic_width, pic_height);
|
|
src_cb = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize;
|
|
}
|
|
qpix_op[luma_xy](dest_cb, src_cb, sl->mb_linesize); // FIXME try variable height perhaps?
|
|
if (!square)
|
|
qpix_op[luma_xy](dest_cb + delta, src_cb + delta, sl->mb_linesize);
|
|
|
|
src_cr = pic->f.data[2] + offset;
|
|
if (emu) {
|
|
h->vdsp.emulated_edge_mc(sl->edge_emu_buffer,
|
|
src_cr - (2 << pixel_shift) - 2 * sl->mb_linesize,
|
|
sl->mb_linesize, sl->mb_linesize,
|
|
16 + 5, 16 + 5 /*FIXME*/,
|
|
full_mx - 2, full_my - 2,
|
|
pic_width, pic_height);
|
|
src_cr = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize;
|
|
}
|
|
qpix_op[luma_xy](dest_cr, src_cr, sl->mb_linesize); // FIXME try variable height perhaps?
|
|
if (!square)
|
|
qpix_op[luma_xy](dest_cr + delta, src_cr + delta, sl->mb_linesize);
|
|
return;
|
|
}
|
|
|
|
ysh = 3 - (chroma_idc == 2 /* yuv422 */);
|
|
if (chroma_idc == 1 /* yuv420 */ && MB_FIELD(sl)) {
|
|
// chroma offset when predicting from a field of opposite parity
|
|
my += 2 * ((sl->mb_y & 1) - (pic->reference - 1));
|
|
emu |= (my >> 3) < 0 || (my >> 3) + 8 >= (pic_height >> 1);
|
|
}
|
|
|
|
src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) +
|
|
(my >> ysh) * sl->mb_uvlinesize;
|
|
src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) +
|
|
(my >> ysh) * sl->mb_uvlinesize;
|
|
|
|
if (emu) {
|
|
h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src_cb,
|
|
sl->mb_uvlinesize, sl->mb_uvlinesize,
|
|
9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
|
|
pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
|
|
src_cb = sl->edge_emu_buffer;
|
|
}
|
|
chroma_op(dest_cb, src_cb, sl->mb_uvlinesize,
|
|
height >> (chroma_idc == 1 /* yuv420 */),
|
|
mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7);
|
|
|
|
if (emu) {
|
|
h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src_cr,
|
|
sl->mb_uvlinesize, sl->mb_uvlinesize,
|
|
9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
|
|
pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
|
|
src_cr = sl->edge_emu_buffer;
|
|
}
|
|
chroma_op(dest_cr, src_cr, sl->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
|
|
mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7);
|
|
}
|
|
|
|
static av_always_inline void mc_part_std(const H264Context *h, H264SliceContext *sl,
|
|
int n, int square,
|
|
int height, int delta,
|
|
uint8_t *dest_y, uint8_t *dest_cb,
|
|
uint8_t *dest_cr,
|
|
int x_offset, int y_offset,
|
|
const qpel_mc_func *qpix_put,
|
|
h264_chroma_mc_func chroma_put,
|
|
const qpel_mc_func *qpix_avg,
|
|
h264_chroma_mc_func chroma_avg,
|
|
int list0, int list1,
|
|
int pixel_shift, int chroma_idc)
|
|
{
|
|
const qpel_mc_func *qpix_op = qpix_put;
|
|
h264_chroma_mc_func chroma_op = chroma_put;
|
|
|
|
dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize;
|
|
if (chroma_idc == 3 /* yuv444 */) {
|
|
dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize;
|
|
dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize;
|
|
} else if (chroma_idc == 2 /* yuv422 */) {
|
|
dest_cb += (x_offset << pixel_shift) + 2 * y_offset * sl->mb_uvlinesize;
|
|
dest_cr += (x_offset << pixel_shift) + 2 * y_offset * sl->mb_uvlinesize;
|
|
} else { /* yuv420 */
|
|
dest_cb += (x_offset << pixel_shift) + y_offset * sl->mb_uvlinesize;
|
|
dest_cr += (x_offset << pixel_shift) + y_offset * sl->mb_uvlinesize;
|
|
}
|
|
x_offset += 8 * sl->mb_x;
|
|
y_offset += 8 * (sl->mb_y >> MB_FIELD(sl));
|
|
|
|
if (list0) {
|
|
H264Picture *ref = &sl->ref_list[0][sl->ref_cache[0][scan8[n]]];
|
|
mc_dir_part(h, sl, ref, n, square, height, delta, 0,
|
|
dest_y, dest_cb, dest_cr, x_offset, y_offset,
|
|
qpix_op, chroma_op, pixel_shift, chroma_idc);
|
|
|
|
qpix_op = qpix_avg;
|
|
chroma_op = chroma_avg;
|
|
}
|
|
|
|
if (list1) {
|
|
H264Picture *ref = &sl->ref_list[1][sl->ref_cache[1][scan8[n]]];
|
|
mc_dir_part(h, sl, ref, n, square, height, delta, 1,
|
|
dest_y, dest_cb, dest_cr, x_offset, y_offset,
|
|
qpix_op, chroma_op, pixel_shift, chroma_idc);
|
|
}
|
|
}
|
|
|
|
static av_always_inline void mc_part_weighted(const H264Context *h, H264SliceContext *sl,
|
|
int n, int square,
|
|
int height, int delta,
|
|
uint8_t *dest_y, uint8_t *dest_cb,
|
|
uint8_t *dest_cr,
|
|
int x_offset, int y_offset,
|
|
const qpel_mc_func *qpix_put,
|
|
h264_chroma_mc_func chroma_put,
|
|
h264_weight_func luma_weight_op,
|
|
h264_weight_func chroma_weight_op,
|
|
h264_biweight_func luma_weight_avg,
|
|
h264_biweight_func chroma_weight_avg,
|
|
int list0, int list1,
|
|
int pixel_shift, int chroma_idc)
|
|
{
|
|
int chroma_height;
|
|
|
|
dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize;
|
|
if (chroma_idc == 3 /* yuv444 */) {
|
|
chroma_height = height;
|
|
chroma_weight_avg = luma_weight_avg;
|
|
chroma_weight_op = luma_weight_op;
|
|
dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize;
|
|
dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize;
|
|
} else if (chroma_idc == 2 /* yuv422 */) {
|
|
chroma_height = height;
|
|
dest_cb += (x_offset << pixel_shift) + 2 * y_offset * sl->mb_uvlinesize;
|
|
dest_cr += (x_offset << pixel_shift) + 2 * y_offset * sl->mb_uvlinesize;
|
|
} else { /* yuv420 */
|
|
chroma_height = height >> 1;
|
|
dest_cb += (x_offset << pixel_shift) + y_offset * sl->mb_uvlinesize;
|
|
dest_cr += (x_offset << pixel_shift) + y_offset * sl->mb_uvlinesize;
|
|
}
|
|
x_offset += 8 * sl->mb_x;
|
|
y_offset += 8 * (sl->mb_y >> MB_FIELD(sl));
|
|
|
|
if (list0 && list1) {
|
|
/* don't optimize for luma-only case, since B-frames usually
|
|
* use implicit weights => chroma too. */
|
|
uint8_t *tmp_cb = sl->bipred_scratchpad;
|
|
uint8_t *tmp_cr = sl->bipred_scratchpad + (16 << pixel_shift);
|
|
uint8_t *tmp_y = sl->bipred_scratchpad + 16 * sl->mb_uvlinesize;
|
|
int refn0 = sl->ref_cache[0][scan8[n]];
|
|
int refn1 = sl->ref_cache[1][scan8[n]];
|
|
|
|
mc_dir_part(h, sl, &sl->ref_list[0][refn0], n, square, height, delta, 0,
|
|
dest_y, dest_cb, dest_cr,
|
|
x_offset, y_offset, qpix_put, chroma_put,
|
|
pixel_shift, chroma_idc);
|
|
mc_dir_part(h, sl, &sl->ref_list[1][refn1], n, square, height, delta, 1,
|
|
tmp_y, tmp_cb, tmp_cr,
|
|
x_offset, y_offset, qpix_put, chroma_put,
|
|
pixel_shift, chroma_idc);
|
|
|
|
if (sl->use_weight == 2) {
|
|
int weight0 = sl->implicit_weight[refn0][refn1][sl->mb_y & 1];
|
|
int weight1 = 64 - weight0;
|
|
luma_weight_avg(dest_y, tmp_y, sl->mb_linesize,
|
|
height, 5, weight0, weight1, 0);
|
|
chroma_weight_avg(dest_cb, tmp_cb, sl->mb_uvlinesize,
|
|
chroma_height, 5, weight0, weight1, 0);
|
|
chroma_weight_avg(dest_cr, tmp_cr, sl->mb_uvlinesize,
|
|
chroma_height, 5, weight0, weight1, 0);
|
|
} else {
|
|
luma_weight_avg(dest_y, tmp_y, sl->mb_linesize, height,
|
|
sl->luma_log2_weight_denom,
|
|
sl->luma_weight[refn0][0][0],
|
|
sl->luma_weight[refn1][1][0],
|
|
sl->luma_weight[refn0][0][1] +
|
|
sl->luma_weight[refn1][1][1]);
|
|
chroma_weight_avg(dest_cb, tmp_cb, sl->mb_uvlinesize, chroma_height,
|
|
sl->chroma_log2_weight_denom,
|
|
sl->chroma_weight[refn0][0][0][0],
|
|
sl->chroma_weight[refn1][1][0][0],
|
|
sl->chroma_weight[refn0][0][0][1] +
|
|
sl->chroma_weight[refn1][1][0][1]);
|
|
chroma_weight_avg(dest_cr, tmp_cr, sl->mb_uvlinesize, chroma_height,
|
|
sl->chroma_log2_weight_denom,
|
|
sl->chroma_weight[refn0][0][1][0],
|
|
sl->chroma_weight[refn1][1][1][0],
|
|
sl->chroma_weight[refn0][0][1][1] +
|
|
sl->chroma_weight[refn1][1][1][1]);
|
|
}
|
|
} else {
|
|
int list = list1 ? 1 : 0;
|
|
int refn = sl->ref_cache[list][scan8[n]];
|
|
H264Picture *ref = &sl->ref_list[list][refn];
|
|
mc_dir_part(h, sl, ref, n, square, height, delta, list,
|
|
dest_y, dest_cb, dest_cr, x_offset, y_offset,
|
|
qpix_put, chroma_put, pixel_shift, chroma_idc);
|
|
|
|
luma_weight_op(dest_y, sl->mb_linesize, height,
|
|
sl->luma_log2_weight_denom,
|
|
sl->luma_weight[refn][list][0],
|
|
sl->luma_weight[refn][list][1]);
|
|
if (sl->use_weight_chroma) {
|
|
chroma_weight_op(dest_cb, sl->mb_uvlinesize, chroma_height,
|
|
sl->chroma_log2_weight_denom,
|
|
sl->chroma_weight[refn][list][0][0],
|
|
sl->chroma_weight[refn][list][0][1]);
|
|
chroma_weight_op(dest_cr, sl->mb_uvlinesize, chroma_height,
|
|
sl->chroma_log2_weight_denom,
|
|
sl->chroma_weight[refn][list][1][0],
|
|
sl->chroma_weight[refn][list][1][1]);
|
|
}
|
|
}
|
|
}
|
|
|
|
static av_always_inline void prefetch_motion(const H264Context *h, H264SliceContext *sl,
|
|
int list, int pixel_shift,
|
|
int chroma_idc)
|
|
{
|
|
/* fetch pixels for estimated mv 4 macroblocks ahead
|
|
* optimized for 64byte cache lines */
|
|
const int refn = sl->ref_cache[list][scan8[0]];
|
|
if (refn >= 0) {
|
|
const int mx = (sl->mv_cache[list][scan8[0]][0] >> 2) + 16 * sl->mb_x + 8;
|
|
const int my = (sl->mv_cache[list][scan8[0]][1] >> 2) + 16 * sl->mb_y;
|
|
uint8_t **src = sl->ref_list[list][refn].f.data;
|
|
int off = (mx << pixel_shift) +
|
|
(my + (sl->mb_x & 3) * 4) * sl->mb_linesize +
|
|
(64 << pixel_shift);
|
|
h->vdsp.prefetch(src[0] + off, h->linesize, 4);
|
|
if (chroma_idc == 3 /* yuv444 */) {
|
|
h->vdsp.prefetch(src[1] + off, h->linesize, 4);
|
|
h->vdsp.prefetch(src[2] + off, h->linesize, 4);
|
|
} else {
|
|
off = ((mx >> 1) << pixel_shift) +
|
|
((my >> 1) + (sl->mb_x & 7)) * h->uvlinesize +
|
|
(64 << pixel_shift);
|
|
h->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
|
|
}
|
|
}
|
|
}
|
|
|
|
static av_always_inline void xchg_mb_border(const H264Context *h, H264SliceContext *sl,
|
|
uint8_t *src_y,
|
|
uint8_t *src_cb, uint8_t *src_cr,
|
|
int linesize, int uvlinesize,
|
|
int xchg, int chroma444,
|
|
int simple, int pixel_shift)
|
|
{
|
|
int deblock_topleft;
|
|
int deblock_top;
|
|
int top_idx = 1;
|
|
uint8_t *top_border_m1;
|
|
uint8_t *top_border;
|
|
|
|
if (!simple && FRAME_MBAFF(h)) {
|
|
if (sl->mb_y & 1) {
|
|
if (!MB_MBAFF(sl))
|
|
return;
|
|
} else {
|
|
top_idx = MB_MBAFF(sl) ? 0 : 1;
|
|
}
|
|
}
|
|
|
|
if (sl->deblocking_filter == 2) {
|
|
deblock_topleft = h->slice_table[sl->mb_xy - 1 - h->mb_stride] == sl->slice_num;
|
|
deblock_top = sl->top_type;
|
|
} else {
|
|
deblock_topleft = (sl->mb_x > 0);
|
|
deblock_top = (sl->mb_y > !!MB_FIELD(sl));
|
|
}
|
|
|
|
src_y -= linesize + 1 + pixel_shift;
|
|
src_cb -= uvlinesize + 1 + pixel_shift;
|
|
src_cr -= uvlinesize + 1 + pixel_shift;
|
|
|
|
top_border_m1 = h->top_borders[top_idx][sl->mb_x - 1];
|
|
top_border = h->top_borders[top_idx][sl->mb_x];
|
|
|
|
#define XCHG(a, b, xchg) \
|
|
if (pixel_shift) { \
|
|
if (xchg) { \
|
|
AV_SWAP64(b + 0, a + 0); \
|
|
AV_SWAP64(b + 8, a + 8); \
|
|
} else { \
|
|
AV_COPY128(b, a); \
|
|
} \
|
|
} else if (xchg) \
|
|
AV_SWAP64(b, a); \
|
|
else \
|
|
AV_COPY64(b, a);
|
|
|
|
if (deblock_top) {
|
|
if (deblock_topleft) {
|
|
XCHG(top_border_m1 + (8 << pixel_shift),
|
|
src_y - (7 << pixel_shift), 1);
|
|
}
|
|
XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
|
|
XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
|
|
if (sl->mb_x + 1 < h->mb_width) {
|
|
XCHG(h->top_borders[top_idx][sl->mb_x + 1],
|
|
src_y + (17 << pixel_shift), 1);
|
|
}
|
|
}
|
|
if (simple || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) {
|
|
if (chroma444) {
|
|
if (deblock_top) {
|
|
if (deblock_topleft) {
|
|
XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
|
|
XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
|
|
}
|
|
XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
|
|
XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
|
|
XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
|
|
XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
|
|
if (sl->mb_x + 1 < h->mb_width) {
|
|
XCHG(h->top_borders[top_idx][sl->mb_x + 1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
|
|
XCHG(h->top_borders[top_idx][sl->mb_x + 1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
|
|
}
|
|
}
|
|
} else {
|
|
if (deblock_top) {
|
|
if (deblock_topleft) {
|
|
XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
|
|
XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
|
|
}
|
|
XCHG(top_border + (16 << pixel_shift), src_cb + 1 + pixel_shift, 1);
|
|
XCHG(top_border + (24 << pixel_shift), src_cr + 1 + pixel_shift, 1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static av_always_inline int dctcoef_get(int16_t *mb, int high_bit_depth,
|
|
int index)
|
|
{
|
|
if (high_bit_depth) {
|
|
return AV_RN32A(((int32_t *)mb) + index);
|
|
} else
|
|
return AV_RN16A(mb + index);
|
|
}
|
|
|
|
static av_always_inline void dctcoef_set(int16_t *mb, int high_bit_depth,
|
|
int index, int value)
|
|
{
|
|
if (high_bit_depth) {
|
|
AV_WN32A(((int32_t *)mb) + index, value);
|
|
} else
|
|
AV_WN16A(mb + index, value);
|
|
}
|
|
|
|
static av_always_inline void hl_decode_mb_predict_luma(const H264Context *h,
|
|
H264SliceContext *sl,
|
|
int mb_type, int is_h264,
|
|
int simple,
|
|
int transform_bypass,
|
|
int pixel_shift,
|
|
const int *block_offset,
|
|
int linesize,
|
|
uint8_t *dest_y, int p)
|
|
{
|
|
void (*idct_add)(uint8_t *dst, int16_t *block, int stride);
|
|
void (*idct_dc_add)(uint8_t *dst, int16_t *block, int stride);
|
|
int i;
|
|
int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1];
|
|
block_offset += 16 * p;
|
|
if (IS_INTRA4x4(mb_type)) {
|
|
if (IS_8x8DCT(mb_type)) {
|
|
if (transform_bypass) {
|
|
idct_dc_add =
|
|
idct_add = h->h264dsp.h264_add_pixels8_clear;
|
|
} else {
|
|
idct_dc_add = h->h264dsp.h264_idct8_dc_add;
|
|
idct_add = h->h264dsp.h264_idct8_add;
|
|
}
|
|
for (i = 0; i < 16; i += 4) {
|
|
uint8_t *const ptr = dest_y + block_offset[i];
|
|
const int dir = sl->intra4x4_pred_mode_cache[scan8[i]];
|
|
if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) {
|
|
h->hpc.pred8x8l_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize);
|
|
} else {
|
|
const int nnz = sl->non_zero_count_cache[scan8[i + p * 16]];
|
|
h->hpc.pred8x8l[dir](ptr, (sl->topleft_samples_available << i) & 0x8000,
|
|
(sl->topright_samples_available << i) & 0x4000, linesize);
|
|
if (nnz) {
|
|
if (nnz == 1 && dctcoef_get(sl->mb, pixel_shift, i * 16 + p * 256))
|
|
idct_dc_add(ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize);
|
|
else
|
|
idct_add(ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize);
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
if (transform_bypass) {
|
|
idct_dc_add =
|
|
idct_add = h->h264dsp.h264_add_pixels4_clear;
|
|
} else {
|
|
idct_dc_add = h->h264dsp.h264_idct_dc_add;
|
|
idct_add = h->h264dsp.h264_idct_add;
|
|
}
|
|
for (i = 0; i < 16; i++) {
|
|
uint8_t *const ptr = dest_y + block_offset[i];
|
|
const int dir = sl->intra4x4_pred_mode_cache[scan8[i]];
|
|
|
|
if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) {
|
|
h->hpc.pred4x4_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize);
|
|
} else {
|
|
uint8_t *topright;
|
|
int nnz, tr;
|
|
uint64_t tr_high;
|
|
if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) {
|
|
const int topright_avail = (sl->topright_samples_available << i) & 0x8000;
|
|
assert(sl->mb_y || linesize <= block_offset[i]);
|
|
if (!topright_avail) {
|
|
if (pixel_shift) {
|
|
tr_high = ((uint16_t *)ptr)[3 - linesize / 2] * 0x0001000100010001ULL;
|
|
topright = (uint8_t *)&tr_high;
|
|
} else {
|
|
tr = ptr[3 - linesize] * 0x01010101u;
|
|
topright = (uint8_t *)&tr;
|
|
}
|
|
} else
|
|
topright = ptr + (4 << pixel_shift) - linesize;
|
|
} else
|
|
topright = NULL;
|
|
|
|
h->hpc.pred4x4[dir](ptr, topright, linesize);
|
|
nnz = sl->non_zero_count_cache[scan8[i + p * 16]];
|
|
if (nnz) {
|
|
if (is_h264) {
|
|
if (nnz == 1 && dctcoef_get(sl->mb, pixel_shift, i * 16 + p * 256))
|
|
idct_dc_add(ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize);
|
|
else
|
|
idct_add(ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize);
|
|
} else if (CONFIG_SVQ3_DECODER)
|
|
ff_svq3_add_idct_c(ptr, sl->mb + i * 16 + p * 256, linesize, qscale, 0);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
h->hpc.pred16x16[sl->intra16x16_pred_mode](dest_y, linesize);
|
|
if (is_h264) {
|
|
if (sl->non_zero_count_cache[scan8[LUMA_DC_BLOCK_INDEX + p]]) {
|
|
if (!transform_bypass)
|
|
h->h264dsp.h264_luma_dc_dequant_idct(sl->mb + (p * 256 << pixel_shift),
|
|
sl->mb_luma_dc[p],
|
|
h->dequant4_coeff[p][qscale][0]);
|
|
else {
|
|
static const uint8_t dc_mapping[16] = {
|
|
0 * 16, 1 * 16, 4 * 16, 5 * 16,
|
|
2 * 16, 3 * 16, 6 * 16, 7 * 16,
|
|
8 * 16, 9 * 16, 12 * 16, 13 * 16,
|
|
10 * 16, 11 * 16, 14 * 16, 15 * 16
|
|
};
|
|
for (i = 0; i < 16; i++)
|
|
dctcoef_set(sl->mb + (p * 256 << pixel_shift),
|
|
pixel_shift, dc_mapping[i],
|
|
dctcoef_get(sl->mb_luma_dc[p],
|
|
pixel_shift, i));
|
|
}
|
|
}
|
|
} else if (CONFIG_SVQ3_DECODER)
|
|
ff_svq3_luma_dc_dequant_idct_c(sl->mb + p * 256,
|
|
sl->mb_luma_dc[p], qscale);
|
|
}
|
|
}
|
|
|
|
static av_always_inline void hl_decode_mb_idct_luma(const H264Context *h, H264SliceContext *sl,
|
|
int mb_type,
|
|
int is_h264, int simple,
|
|
int transform_bypass,
|
|
int pixel_shift,
|
|
const int *block_offset,
|
|
int linesize,
|
|
uint8_t *dest_y, int p)
|
|
{
|
|
void (*idct_add)(uint8_t *dst, int16_t *block, int stride);
|
|
int i;
|
|
block_offset += 16 * p;
|
|
if (!IS_INTRA4x4(mb_type)) {
|
|
if (is_h264) {
|
|
if (IS_INTRA16x16(mb_type)) {
|
|
if (transform_bypass) {
|
|
if (h->sps.profile_idc == 244 &&
|
|
(sl->intra16x16_pred_mode == VERT_PRED8x8 ||
|
|
sl->intra16x16_pred_mode == HOR_PRED8x8)) {
|
|
h->hpc.pred16x16_add[sl->intra16x16_pred_mode](dest_y, block_offset,
|
|
sl->mb + (p * 256 << pixel_shift),
|
|
linesize);
|
|
} else {
|
|
for (i = 0; i < 16; i++)
|
|
if (sl->non_zero_count_cache[scan8[i + p * 16]] ||
|
|
dctcoef_get(sl->mb, pixel_shift, i * 16 + p * 256))
|
|
h->h264dsp.h264_add_pixels4_clear(dest_y + block_offset[i],
|
|
sl->mb + (i * 16 + p * 256 << pixel_shift),
|
|
linesize);
|
|
}
|
|
} else {
|
|
h->h264dsp.h264_idct_add16intra(dest_y, block_offset,
|
|
sl->mb + (p * 256 << pixel_shift),
|
|
linesize,
|
|
sl->non_zero_count_cache + p * 5 * 8);
|
|
}
|
|
} else if (sl->cbp & 15) {
|
|
if (transform_bypass) {
|
|
const int di = IS_8x8DCT(mb_type) ? 4 : 1;
|
|
idct_add = IS_8x8DCT(mb_type) ? h->h264dsp.h264_add_pixels8_clear
|
|
: h->h264dsp.h264_add_pixels4_clear;
|
|
for (i = 0; i < 16; i += di)
|
|
if (sl->non_zero_count_cache[scan8[i + p * 16]])
|
|
idct_add(dest_y + block_offset[i],
|
|
sl->mb + (i * 16 + p * 256 << pixel_shift),
|
|
linesize);
|
|
} else {
|
|
if (IS_8x8DCT(mb_type))
|
|
h->h264dsp.h264_idct8_add4(dest_y, block_offset,
|
|
sl->mb + (p * 256 << pixel_shift),
|
|
linesize,
|
|
sl->non_zero_count_cache + p * 5 * 8);
|
|
else
|
|
h->h264dsp.h264_idct_add16(dest_y, block_offset,
|
|
sl->mb + (p * 256 << pixel_shift),
|
|
linesize,
|
|
sl->non_zero_count_cache + p * 5 * 8);
|
|
}
|
|
}
|
|
} else if (CONFIG_SVQ3_DECODER) {
|
|
for (i = 0; i < 16; i++)
|
|
if (sl->non_zero_count_cache[scan8[i + p * 16]] || sl->mb[i * 16 + p * 256]) {
|
|
// FIXME benchmark weird rule, & below
|
|
uint8_t *const ptr = dest_y + block_offset[i];
|
|
ff_svq3_add_idct_c(ptr, sl->mb + i * 16 + p * 256, linesize,
|
|
sl->qscale, IS_INTRA(mb_type) ? 1 : 0);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#define BITS 8
|
|
#define SIMPLE 1
|
|
#include "h264_mb_template.c"
|
|
|
|
#undef BITS
|
|
#define BITS 16
|
|
#include "h264_mb_template.c"
|
|
|
|
#undef SIMPLE
|
|
#define SIMPLE 0
|
|
#include "h264_mb_template.c"
|
|
|
|
void ff_h264_hl_decode_mb(const H264Context *h, H264SliceContext *sl)
|
|
{
|
|
const int mb_xy = sl->mb_xy;
|
|
const int mb_type = h->cur_pic.mb_type[mb_xy];
|
|
int is_complex = CONFIG_SMALL || sl->is_complex ||
|
|
IS_INTRA_PCM(mb_type) || sl->qscale == 0;
|
|
|
|
if (CHROMA444(h)) {
|
|
if (is_complex || h->pixel_shift)
|
|
hl_decode_mb_444_complex(h, sl);
|
|
else
|
|
hl_decode_mb_444_simple_8(h, sl);
|
|
} else if (is_complex) {
|
|
hl_decode_mb_complex(h, sl);
|
|
} else if (h->pixel_shift) {
|
|
hl_decode_mb_simple_16(h, sl);
|
|
} else
|
|
hl_decode_mb_simple_8(h, sl);
|
|
}
|