mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-03 05:10:03 +02:00
Merge commit '36d04801ba9d8622c2d759c172aea18561bac74d'
* commit '36d04801ba9d8622c2d759c172aea18561bac74d': h264: move the scratch buffers into the per-slice context Conflicts: libavcodec/h264.h libavcodec/h264_slice.c libavcodec/svq3.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
commit
3d1d8e1f95
@ -403,8 +403,6 @@ void ff_h264_free_tables(H264Context *h, int free_rbsp)
|
||||
continue;
|
||||
av_freep(&hx->top_borders[1]);
|
||||
av_freep(&hx->top_borders[0]);
|
||||
av_freep(&hx->bipred_scratchpad);
|
||||
av_freep(&hx->edge_emu_buffer);
|
||||
av_freep(&hx->dc_val_base);
|
||||
av_freep(&hx->er.mb_index2xy);
|
||||
av_freep(&hx->er.error_status_table);
|
||||
@ -419,6 +417,16 @@ void ff_h264_free_tables(H264Context *h, int free_rbsp)
|
||||
if (i)
|
||||
av_freep(&h->thread_context[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < h->nb_slice_ctx; i++) {
|
||||
H264SliceContext *sl = &h->slice_ctx[i];
|
||||
|
||||
av_freep(&sl->bipred_scratchpad);
|
||||
av_freep(&sl->edge_emu_buffer);
|
||||
|
||||
sl->bipred_scratchpad_allocated = 0;
|
||||
sl->edge_emu_buffer_allocated = 0;
|
||||
}
|
||||
}
|
||||
|
||||
int ff_h264_alloc_tables(H264Context *h)
|
||||
|
@ -436,6 +436,11 @@ typedef struct H264SliceContext {
|
||||
|
||||
const uint8_t *intra_pcm_ptr;
|
||||
|
||||
uint8_t *bipred_scratchpad;
|
||||
uint8_t *edge_emu_buffer;
|
||||
int bipred_scratchpad_allocated;
|
||||
int edge_emu_buffer_allocated;
|
||||
|
||||
/**
|
||||
* non zero coeff count cache.
|
||||
* is 64 if not available.
|
||||
@ -763,14 +768,11 @@ typedef struct H264Context {
|
||||
int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs
|
||||
|
||||
int cur_chroma_format_idc;
|
||||
uint8_t *bipred_scratchpad;
|
||||
|
||||
int16_t slice_row[MAX_SLICES]; ///< to detect when MAX_SLICES is too low
|
||||
|
||||
uint8_t parse_history[6];
|
||||
int parse_history_count;
|
||||
int parse_last_mb;
|
||||
uint8_t *edge_emu_buffer;
|
||||
int16_t *dc_val_base;
|
||||
|
||||
AVBufferPool *qscale_table_pool;
|
||||
|
@ -237,12 +237,12 @@ static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext
|
||||
full_my < 0 - extra_height ||
|
||||
full_mx + 16 /*FIXME*/ > pic_width + extra_width ||
|
||||
full_my + 16 /*FIXME*/ > pic_height + extra_height) {
|
||||
h->vdsp.emulated_edge_mc(h->edge_emu_buffer,
|
||||
h->vdsp.emulated_edge_mc(sl->edge_emu_buffer,
|
||||
src_y - (2 << pixel_shift) - 2 * sl->mb_linesize,
|
||||
sl->mb_linesize, sl->mb_linesize,
|
||||
16 + 5, 16 + 5 /*FIXME*/, full_mx - 2,
|
||||
full_my - 2, pic_width, pic_height);
|
||||
src_y = h->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize;
|
||||
src_y = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize;
|
||||
emu = 1;
|
||||
}
|
||||
|
||||
@ -256,13 +256,13 @@ static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext
|
||||
if (chroma_idc == 3 /* yuv444 */) {
|
||||
src_cb = pic->f.data[1] + offset;
|
||||
if (emu) {
|
||||
h->vdsp.emulated_edge_mc(h->edge_emu_buffer,
|
||||
h->vdsp.emulated_edge_mc(sl->edge_emu_buffer,
|
||||
src_cb - (2 << pixel_shift) - 2 * sl->mb_linesize,
|
||||
sl->mb_linesize, sl->mb_linesize,
|
||||
16 + 5, 16 + 5 /*FIXME*/,
|
||||
full_mx - 2, full_my - 2,
|
||||
pic_width, pic_height);
|
||||
src_cb = h->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize;
|
||||
src_cb = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize;
|
||||
}
|
||||
qpix_op[luma_xy](dest_cb, src_cb, sl->mb_linesize); // FIXME try variable height perhaps?
|
||||
if (!square)
|
||||
@ -270,13 +270,13 @@ static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext
|
||||
|
||||
src_cr = pic->f.data[2] + offset;
|
||||
if (emu) {
|
||||
h->vdsp.emulated_edge_mc(h->edge_emu_buffer,
|
||||
h->vdsp.emulated_edge_mc(sl->edge_emu_buffer,
|
||||
src_cr - (2 << pixel_shift) - 2 * sl->mb_linesize,
|
||||
sl->mb_linesize, sl->mb_linesize,
|
||||
16 + 5, 16 + 5 /*FIXME*/,
|
||||
full_mx - 2, full_my - 2,
|
||||
pic_width, pic_height);
|
||||
src_cr = h->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize;
|
||||
src_cr = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize;
|
||||
}
|
||||
qpix_op[luma_xy](dest_cr, src_cr, sl->mb_linesize); // FIXME try variable height perhaps?
|
||||
if (!square)
|
||||
@ -297,22 +297,22 @@ static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext
|
||||
(my >> ysh) * sl->mb_uvlinesize;
|
||||
|
||||
if (emu) {
|
||||
h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src_cb,
|
||||
h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src_cb,
|
||||
sl->mb_uvlinesize, sl->mb_uvlinesize,
|
||||
9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
|
||||
pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
|
||||
src_cb = h->edge_emu_buffer;
|
||||
src_cb = sl->edge_emu_buffer;
|
||||
}
|
||||
chroma_op(dest_cb, src_cb, sl->mb_uvlinesize,
|
||||
height >> (chroma_idc == 1 /* yuv420 */),
|
||||
mx & 7, ((unsigned)my << (chroma_idc == 2 /* yuv422 */)) & 7);
|
||||
|
||||
if (emu) {
|
||||
h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src_cr,
|
||||
h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src_cr,
|
||||
sl->mb_uvlinesize, sl->mb_uvlinesize,
|
||||
9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
|
||||
pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
|
||||
src_cr = h->edge_emu_buffer;
|
||||
src_cr = sl->edge_emu_buffer;
|
||||
}
|
||||
chroma_op(dest_cr, src_cr, sl->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
|
||||
mx & 7, ((unsigned)my << (chroma_idc == 2 /* yuv422 */)) & 7);
|
||||
@ -405,9 +405,9 @@ static av_always_inline void mc_part_weighted(const H264Context *h, H264SliceCon
|
||||
if (list0 && list1) {
|
||||
/* don't optimize for luma-only case, since B-frames usually
|
||||
* use implicit weights => chroma too. */
|
||||
uint8_t *tmp_cb = h->bipred_scratchpad;
|
||||
uint8_t *tmp_cr = h->bipred_scratchpad + (16 << pixel_shift);
|
||||
uint8_t *tmp_y = h->bipred_scratchpad + 16 * sl->mb_uvlinesize;
|
||||
uint8_t *tmp_cb = sl->bipred_scratchpad;
|
||||
uint8_t *tmp_cr = sl->bipred_scratchpad + (16 << pixel_shift);
|
||||
uint8_t *tmp_y = sl->bipred_scratchpad + 16 * sl->mb_uvlinesize;
|
||||
int refn0 = sl->ref_cache[0][scan8[n]];
|
||||
int refn1 = sl->ref_cache[1][scan8[n]];
|
||||
|
||||
|
@ -161,21 +161,20 @@ static void release_unused_pictures(H264Context *h, int remove_current)
|
||||
}
|
||||
}
|
||||
|
||||
static int alloc_scratch_buffers(H264Context *h, int linesize)
|
||||
static int alloc_scratch_buffers(H264SliceContext *sl, int linesize)
|
||||
{
|
||||
int alloc_size = FFALIGN(FFABS(linesize) + 32, 32);
|
||||
|
||||
if (h->bipred_scratchpad)
|
||||
return 0;
|
||||
|
||||
h->bipred_scratchpad = av_malloc(16 * 6 * alloc_size);
|
||||
av_fast_malloc(&sl->bipred_scratchpad, &sl->bipred_scratchpad_allocated, 16 * 6 * alloc_size);
|
||||
// edge emu needs blocksize + filter length - 1
|
||||
// (= 21x21 for h264)
|
||||
h->edge_emu_buffer = av_mallocz(alloc_size * 2 * 21);
|
||||
av_fast_malloc(&sl->edge_emu_buffer, &sl->edge_emu_buffer_allocated, alloc_size * 2 * 21);
|
||||
|
||||
if (!h->bipred_scratchpad || !h->edge_emu_buffer) {
|
||||
av_freep(&h->bipred_scratchpad);
|
||||
av_freep(&h->edge_emu_buffer);
|
||||
if (!sl->bipred_scratchpad || !sl->edge_emu_buffer) {
|
||||
av_freep(&sl->bipred_scratchpad);
|
||||
av_freep(&sl->edge_emu_buffer);
|
||||
sl->bipred_scratchpad_allocated = 0;
|
||||
sl->edge_emu_buffer_allocated = 0;
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
@ -402,8 +401,6 @@ static void clone_tables(H264Context *dst, H264SliceContext *sl,
|
||||
dst->DPB = src->DPB;
|
||||
dst->cur_pic_ptr = src->cur_pic_ptr;
|
||||
dst->cur_pic = src->cur_pic;
|
||||
dst->bipred_scratchpad = NULL;
|
||||
dst->edge_emu_buffer = NULL;
|
||||
ff_h264_pred_init(&dst->hpc, src->avctx->codec_id, src->sps.bit_depth_luma,
|
||||
src->sps.chroma_format_idc);
|
||||
}
|
||||
@ -481,8 +478,6 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
|
||||
* the current value */
|
||||
h->avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
|
||||
|
||||
av_freep(&h->bipred_scratchpad);
|
||||
|
||||
h->width = h1->width;
|
||||
h->height = h1->height;
|
||||
h->mb_height = h1->mb_height;
|
||||
@ -587,8 +582,6 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
|
||||
}
|
||||
}
|
||||
|
||||
h->bipred_scratchpad = NULL;
|
||||
h->edge_emu_buffer = NULL;
|
||||
|
||||
h->thread_context[0] = h;
|
||||
h->context_initialized = h1->context_initialized;
|
||||
@ -1736,14 +1729,6 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl, H264Contex
|
||||
if (h != h0 && (ret = clone_slice(h, h0)) < 0)
|
||||
return ret;
|
||||
|
||||
/* can't be in alloc_tables because linesize isn't known there.
|
||||
* FIXME: redo bipred weight to not require extra buffer? */
|
||||
for (i = 0; i < h->slice_context_count; i++)
|
||||
if (h->thread_context[i]) {
|
||||
ret = alloc_scratch_buffers(h->thread_context[i], h->linesize);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
h->cur_pic_ptr->frame_num = h->frame_num; // FIXME frame_num cleanup
|
||||
|
||||
@ -2397,6 +2382,11 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
|
||||
H264SliceContext *sl = arg;
|
||||
H264Context *h = sl->h264;
|
||||
int lf_x_start = sl->mb_x;
|
||||
int ret;
|
||||
|
||||
ret = alloc_scratch_buffers(sl, h->linesize);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
sl->mb_skip_run = -1;
|
||||
|
||||
|
@ -303,6 +303,7 @@ static inline void svq3_mc_dir_part(SVQ3Context *s,
|
||||
int thirdpel, int dir, int avg)
|
||||
{
|
||||
H264Context *h = &s->h;
|
||||
H264SliceContext *sl = &h->slice_ctx[0];
|
||||
const H264Picture *pic = (dir == 0) ? s->last_pic : s->next_pic;
|
||||
uint8_t *src, *dest;
|
||||
int i, emu = 0;
|
||||
@ -323,11 +324,11 @@ static inline void svq3_mc_dir_part(SVQ3Context *s,
|
||||
src = pic->f.data[0] + mx + my * h->linesize;
|
||||
|
||||
if (emu) {
|
||||
h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src,
|
||||
h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src,
|
||||
h->linesize, h->linesize,
|
||||
width + 1, height + 1,
|
||||
mx, my, s->h_edge_pos, s->v_edge_pos);
|
||||
src = h->edge_emu_buffer;
|
||||
src = sl->edge_emu_buffer;
|
||||
}
|
||||
if (thirdpel)
|
||||
(avg ? s->tdsp.avg_tpel_pixels_tab
|
||||
@ -350,12 +351,12 @@ static inline void svq3_mc_dir_part(SVQ3Context *s,
|
||||
src = pic->f.data[i] + mx + my * h->uvlinesize;
|
||||
|
||||
if (emu) {
|
||||
h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src,
|
||||
h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src,
|
||||
h->uvlinesize, h->uvlinesize,
|
||||
width + 1, height + 1,
|
||||
mx, my, (s->h_edge_pos >> 1),
|
||||
s->v_edge_pos >> 1);
|
||||
src = h->edge_emu_buffer;
|
||||
src = sl->edge_emu_buffer;
|
||||
}
|
||||
if (thirdpel)
|
||||
(avg ? s->tdsp.avg_tpel_pixels_tab
|
||||
@ -1082,6 +1083,7 @@ static int get_buffer(AVCodecContext *avctx, H264Picture *pic)
|
||||
{
|
||||
SVQ3Context *s = avctx->priv_data;
|
||||
H264Context *h = &s->h;
|
||||
H264SliceContext *sl = &h->slice_ctx[0];
|
||||
const int big_mb_num = h->mb_stride * (h->mb_height + 1) + 1;
|
||||
const int mb_array_size = h->mb_stride * h->mb_height;
|
||||
const int b4_stride = h->mb_width * 4 + 1;
|
||||
@ -1115,9 +1117,9 @@ static int get_buffer(AVCodecContext *avctx, H264Picture *pic)
|
||||
if (ret < 0)
|
||||
goto fail;
|
||||
|
||||
if (!h->edge_emu_buffer) {
|
||||
h->edge_emu_buffer = av_mallocz_array(pic->f.linesize[0], 17);
|
||||
if (!h->edge_emu_buffer)
|
||||
if (!sl->edge_emu_buffer) {
|
||||
sl->edge_emu_buffer = av_mallocz_array(pic->f.linesize[0], 17);
|
||||
if (!sl->edge_emu_buffer)
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
@ -1373,7 +1375,6 @@ static av_cold int svq3_decode_end(AVCodecContext *avctx)
|
||||
|
||||
av_freep(&s->buf);
|
||||
s->buf_size = 0;
|
||||
av_freep(&h->edge_emu_buffer);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user