mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
vp8: implement sliced threading
Testing gives 25-30% gain on HD clips with two threads and up to 50% gain with eight threads. Sliced threading uses more memory than single or frame threading. Frame threading and single threading keep the previous memory layout. Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
This commit is contained in:
parent
17343e3952
commit
951455c1c1
510
libavcodec/vp8.c
510
libavcodec/vp8.c
@ -4,6 +4,7 @@
|
|||||||
* Copyright (C) 2010 David Conrad
|
* Copyright (C) 2010 David Conrad
|
||||||
* Copyright (C) 2010 Ronald S. Bultje
|
* Copyright (C) 2010 Ronald S. Bultje
|
||||||
* Copyright (C) 2010 Jason Garrett-Glaser
|
* Copyright (C) 2010 Jason Garrett-Glaser
|
||||||
|
* Copyright (C) 2012 Daniel Kang
|
||||||
*
|
*
|
||||||
* This file is part of Libav.
|
* This file is part of Libav.
|
||||||
*
|
*
|
||||||
@ -30,17 +31,28 @@
|
|||||||
#include "rectangle.h"
|
#include "rectangle.h"
|
||||||
#include "thread.h"
|
#include "thread.h"
|
||||||
|
|
||||||
|
#if HAVE_PTHREADS
|
||||||
|
#include <pthread.h>
|
||||||
|
#elif HAVE_W32THREADS
|
||||||
|
#include "w32pthreads.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#if ARCH_ARM
|
#if ARCH_ARM
|
||||||
# include "arm/vp8.h"
|
# include "arm/vp8.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void free_buffers(VP8Context *s)
|
static void free_buffers(VP8Context *s)
|
||||||
{
|
{
|
||||||
|
int i;
|
||||||
|
if (s->thread_data)
|
||||||
|
for (i = 0; i < MAX_THREADS; i++) {
|
||||||
|
av_freep(&s->thread_data[i].filter_strength);
|
||||||
|
av_freep(&s->thread_data[i].edge_emu_buffer);
|
||||||
|
}
|
||||||
|
av_freep(&s->thread_data);
|
||||||
av_freep(&s->macroblocks_base);
|
av_freep(&s->macroblocks_base);
|
||||||
av_freep(&s->filter_strength);
|
|
||||||
av_freep(&s->intra4x4_pred_mode_top);
|
av_freep(&s->intra4x4_pred_mode_top);
|
||||||
av_freep(&s->top_nnz);
|
av_freep(&s->top_nnz);
|
||||||
av_freep(&s->edge_emu_buffer);
|
|
||||||
av_freep(&s->top_border);
|
av_freep(&s->top_border);
|
||||||
|
|
||||||
s->macroblocks = NULL;
|
s->macroblocks = NULL;
|
||||||
@ -108,6 +120,9 @@ static void vp8_decode_flush(AVCodecContext *avctx)
|
|||||||
|
|
||||||
static int update_dimensions(VP8Context *s, int width, int height)
|
static int update_dimensions(VP8Context *s, int width, int height)
|
||||||
{
|
{
|
||||||
|
AVCodecContext *avctx = s->avctx;
|
||||||
|
int i;
|
||||||
|
|
||||||
if (width != s->avctx->width ||
|
if (width != s->avctx->width ||
|
||||||
height != s->avctx->height) {
|
height != s->avctx->height) {
|
||||||
if (av_image_check_size(width, height, 0, s->avctx))
|
if (av_image_check_size(width, height, 0, s->avctx))
|
||||||
@ -121,14 +136,25 @@ static int update_dimensions(VP8Context *s, int width, int height)
|
|||||||
s->mb_width = (s->avctx->coded_width +15) / 16;
|
s->mb_width = (s->avctx->coded_width +15) / 16;
|
||||||
s->mb_height = (s->avctx->coded_height+15) / 16;
|
s->mb_height = (s->avctx->coded_height+15) / 16;
|
||||||
|
|
||||||
s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
|
s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
|
||||||
s->filter_strength = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
|
if (!s->mb_layout) { // Frame threading and one thread
|
||||||
s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
|
s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
|
||||||
s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
|
s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
|
||||||
s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
|
}
|
||||||
|
else // Sliced threading
|
||||||
|
s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
|
||||||
|
s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
|
||||||
|
s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
|
||||||
|
s->thread_data = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
|
||||||
|
|
||||||
if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
|
for (i = 0; i < MAX_THREADS; i++) {
|
||||||
!s->top_nnz || !s->top_border)
|
s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
|
||||||
|
pthread_mutex_init(&s->thread_data[i].lock, NULL);
|
||||||
|
pthread_cond_init(&s->thread_data[i].cond, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
|
||||||
|
(!s->intra4x4_pred_mode_top && !s->mb_layout))
|
||||||
return AVERROR(ENOMEM);
|
return AVERROR(ENOMEM);
|
||||||
|
|
||||||
s->macroblocks = s->macroblocks_base + 1;
|
s->macroblocks = s->macroblocks_base + 1;
|
||||||
@ -332,12 +358,6 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
|
|||||||
memset(&s->segmentation, 0, sizeof(s->segmentation));
|
memset(&s->segmentation, 0, sizeof(s->segmentation));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!s->macroblocks_base || /* first frame */
|
|
||||||
width != s->avctx->width || height != s->avctx->height) {
|
|
||||||
if ((ret = update_dimensions(s, width, height)) < 0)
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
ff_vp56_init_range_decoder(c, buf, header_size);
|
ff_vp56_init_range_decoder(c, buf, header_size);
|
||||||
buf += header_size;
|
buf += header_size;
|
||||||
buf_size -= header_size;
|
buf_size -= header_size;
|
||||||
@ -366,6 +386,12 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
|
|||||||
return AVERROR_INVALIDDATA;
|
return AVERROR_INVALIDDATA;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!s->macroblocks_base || /* first frame */
|
||||||
|
width != s->avctx->width || height != s->avctx->height) {
|
||||||
|
if ((ret = update_dimensions(s, width, height)) < 0)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
get_quants(s);
|
get_quants(s);
|
||||||
|
|
||||||
if (!s->keyframe) {
|
if (!s->keyframe) {
|
||||||
@ -468,19 +494,26 @@ const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
|
|||||||
* @returns the number of motion vectors parsed (2, 4 or 16)
|
* @returns the number of motion vectors parsed (2, 4 or 16)
|
||||||
*/
|
*/
|
||||||
static av_always_inline
|
static av_always_inline
|
||||||
int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
|
int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
|
||||||
{
|
{
|
||||||
int part_idx;
|
int part_idx;
|
||||||
int n, num;
|
int n, num;
|
||||||
VP8Macroblock *top_mb = &mb[2];
|
VP8Macroblock *top_mb;
|
||||||
VP8Macroblock *left_mb = &mb[-1];
|
VP8Macroblock *left_mb = &mb[-1];
|
||||||
const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
|
const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
|
||||||
*mbsplits_top = vp8_mbsplits[top_mb->partitioning],
|
*mbsplits_top,
|
||||||
*mbsplits_cur, *firstidx;
|
*mbsplits_cur, *firstidx;
|
||||||
VP56mv *top_mv = top_mb->bmv;
|
VP56mv *top_mv;
|
||||||
VP56mv *left_mv = left_mb->bmv;
|
VP56mv *left_mv = left_mb->bmv;
|
||||||
VP56mv *cur_mv = mb->bmv;
|
VP56mv *cur_mv = mb->bmv;
|
||||||
|
|
||||||
|
if (!layout) // layout is inlined, s->mb_layout is not
|
||||||
|
top_mb = &mb[2];
|
||||||
|
else
|
||||||
|
top_mb = &mb[-s->mb_width-1];
|
||||||
|
mbsplits_top = vp8_mbsplits[top_mb->partitioning];
|
||||||
|
top_mv = top_mb->bmv;
|
||||||
|
|
||||||
if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
|
if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
|
||||||
if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
|
if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
|
||||||
part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
|
part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
|
||||||
@ -532,11 +565,11 @@ int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static av_always_inline
|
static av_always_inline
|
||||||
void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
|
void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
|
||||||
{
|
{
|
||||||
VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
|
VP8Macroblock *mb_edge[3] = { 0 /* top */,
|
||||||
mb - 1 /* left */,
|
mb - 1 /* left */,
|
||||||
mb + 1 /* top-left */ };
|
0 /* top-left */ };
|
||||||
enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
|
enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
|
||||||
enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
|
enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
|
||||||
int idx = CNT_ZERO;
|
int idx = CNT_ZERO;
|
||||||
@ -546,6 +579,15 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
|
|||||||
uint8_t cnt[4] = { 0 };
|
uint8_t cnt[4] = { 0 };
|
||||||
VP56RangeCoder *c = &s->c;
|
VP56RangeCoder *c = &s->c;
|
||||||
|
|
||||||
|
if (!layout) { // layout is inlined (s->mb_layout is not)
|
||||||
|
mb_edge[0] = mb + 2;
|
||||||
|
mb_edge[2] = mb + 1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
mb_edge[0] = mb - s->mb_width-1;
|
||||||
|
mb_edge[2] = mb - s->mb_width-2;
|
||||||
|
}
|
||||||
|
|
||||||
AV_ZERO32(&near_mv[0]);
|
AV_ZERO32(&near_mv[0]);
|
||||||
AV_ZERO32(&near_mv[1]);
|
AV_ZERO32(&near_mv[1]);
|
||||||
AV_ZERO32(&near_mv[2]);
|
AV_ZERO32(&near_mv[2]);
|
||||||
@ -600,7 +642,7 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
|
|||||||
|
|
||||||
if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
|
if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
|
||||||
mb->mode = VP8_MVMODE_SPLIT;
|
mb->mode = VP8_MVMODE_SPLIT;
|
||||||
mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
|
mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
|
||||||
} else {
|
} else {
|
||||||
mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
|
mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
|
||||||
mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
|
mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
|
||||||
@ -623,14 +665,22 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
|
|||||||
|
|
||||||
static av_always_inline
|
static av_always_inline
|
||||||
void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
|
void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
|
||||||
int mb_x, int keyframe)
|
int mb_x, int keyframe, int layout)
|
||||||
{
|
{
|
||||||
uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
|
uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
|
||||||
|
|
||||||
|
if (layout == 1) {
|
||||||
|
VP8Macroblock *mb_top = mb - s->mb_width - 1;
|
||||||
|
memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
|
||||||
|
}
|
||||||
if (keyframe) {
|
if (keyframe) {
|
||||||
int x, y;
|
int x, y;
|
||||||
uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
|
uint8_t* top;
|
||||||
uint8_t* const left = s->intra4x4_pred_mode_left;
|
uint8_t* const left = s->intra4x4_pred_mode_left;
|
||||||
|
if (layout == 1)
|
||||||
|
top = mb->intra4x4_pred_mode_top;
|
||||||
|
else
|
||||||
|
top = s->intra4x4_pred_mode_top + 4 * mb_x;
|
||||||
for (y = 0; y < 4; y++) {
|
for (y = 0; y < 4; y++) {
|
||||||
for (x = 0; x < 4; x++) {
|
for (x = 0; x < 4; x++) {
|
||||||
const uint8_t *ctx;
|
const uint8_t *ctx;
|
||||||
@ -648,7 +698,8 @@ void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static av_always_inline
|
static av_always_inline
|
||||||
void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref)
|
void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
|
||||||
|
uint8_t *segment, uint8_t *ref, int layout)
|
||||||
{
|
{
|
||||||
VP56RangeCoder *c = &s->c;
|
VP56RangeCoder *c = &s->c;
|
||||||
|
|
||||||
@ -664,11 +715,14 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_
|
|||||||
mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
|
mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
|
||||||
|
|
||||||
if (mb->mode == MODE_I4x4) {
|
if (mb->mode == MODE_I4x4) {
|
||||||
decode_intra4x4_modes(s, c, mb, mb_x, 1);
|
decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
|
||||||
} else {
|
} else {
|
||||||
const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
|
const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
|
||||||
AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
|
if (s->mb_layout == 1)
|
||||||
AV_WN32A(s->intra4x4_pred_mode_left, modes);
|
AV_WN32A(mb->intra4x4_pred_mode_top, modes);
|
||||||
|
else
|
||||||
|
AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
|
||||||
|
AV_WN32A( s->intra4x4_pred_mode_left, modes);
|
||||||
}
|
}
|
||||||
|
|
||||||
mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
|
mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
|
||||||
@ -683,13 +737,13 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_
|
|||||||
s->ref_count[mb->ref_frame-1]++;
|
s->ref_count[mb->ref_frame-1]++;
|
||||||
|
|
||||||
// motion vectors, 16.3
|
// motion vectors, 16.3
|
||||||
decode_mvs(s, mb, mb_x, mb_y);
|
decode_mvs(s, mb, mb_x, mb_y, layout);
|
||||||
} else {
|
} else {
|
||||||
// intra MB, 16.1
|
// intra MB, 16.1
|
||||||
mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
|
mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
|
||||||
|
|
||||||
if (mb->mode == MODE_I4x4)
|
if (mb->mode == MODE_I4x4)
|
||||||
decode_intra4x4_modes(s, c, mb, mb_x, 0);
|
decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
|
||||||
|
|
||||||
mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
|
mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
|
||||||
mb->ref_frame = VP56_FRAME_CURRENT;
|
mb->ref_frame = VP56_FRAME_CURRENT;
|
||||||
@ -787,7 +841,7 @@ int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
|
|||||||
}
|
}
|
||||||
|
|
||||||
static av_always_inline
|
static av_always_inline
|
||||||
void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
|
void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
|
||||||
uint8_t t_nnz[9], uint8_t l_nnz[9])
|
uint8_t t_nnz[9], uint8_t l_nnz[9])
|
||||||
{
|
{
|
||||||
int i, x, y, luma_start = 0, luma_ctx = 3;
|
int i, x, y, luma_start = 0, luma_ctx = 3;
|
||||||
@ -799,16 +853,16 @@ void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
|
|||||||
nnz_pred = t_nnz[8] + l_nnz[8];
|
nnz_pred = t_nnz[8] + l_nnz[8];
|
||||||
|
|
||||||
// decode DC values and do hadamard
|
// decode DC values and do hadamard
|
||||||
nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
|
nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
|
||||||
s->qmat[segment].luma_dc_qmul);
|
s->qmat[segment].luma_dc_qmul);
|
||||||
l_nnz[8] = t_nnz[8] = !!nnz;
|
l_nnz[8] = t_nnz[8] = !!nnz;
|
||||||
if (nnz) {
|
if (nnz) {
|
||||||
nnz_total += nnz;
|
nnz_total += nnz;
|
||||||
block_dc = 1;
|
block_dc = 1;
|
||||||
if (nnz == 1)
|
if (nnz == 1)
|
||||||
s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
|
s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
|
||||||
else
|
else
|
||||||
s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
|
s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
|
||||||
}
|
}
|
||||||
luma_start = 1;
|
luma_start = 1;
|
||||||
luma_ctx = 0;
|
luma_ctx = 0;
|
||||||
@ -818,10 +872,10 @@ void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
|
|||||||
for (y = 0; y < 4; y++)
|
for (y = 0; y < 4; y++)
|
||||||
for (x = 0; x < 4; x++) {
|
for (x = 0; x < 4; x++) {
|
||||||
nnz_pred = l_nnz[y] + t_nnz[x];
|
nnz_pred = l_nnz[y] + t_nnz[x];
|
||||||
nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
|
nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
|
||||||
nnz_pred, s->qmat[segment].luma_qmul);
|
nnz_pred, s->qmat[segment].luma_qmul);
|
||||||
// nnz+block_dc may be one more than the actual last index, but we don't care
|
// nnz+block_dc may be one more than the actual last index, but we don't care
|
||||||
s->non_zero_count_cache[y][x] = nnz + block_dc;
|
td->non_zero_count_cache[y][x] = nnz + block_dc;
|
||||||
t_nnz[x] = l_nnz[y] = !!nnz;
|
t_nnz[x] = l_nnz[y] = !!nnz;
|
||||||
nnz_total += nnz;
|
nnz_total += nnz;
|
||||||
}
|
}
|
||||||
@ -833,9 +887,9 @@ void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
|
|||||||
for (y = 0; y < 2; y++)
|
for (y = 0; y < 2; y++)
|
||||||
for (x = 0; x < 2; x++) {
|
for (x = 0; x < 2; x++) {
|
||||||
nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
|
nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
|
||||||
nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
|
nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
|
||||||
nnz_pred, s->qmat[segment].chroma_qmul);
|
nnz_pred, s->qmat[segment].chroma_qmul);
|
||||||
s->non_zero_count_cache[i][(y<<1)+x] = nnz;
|
td->non_zero_count_cache[i][(y<<1)+x] = nnz;
|
||||||
t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
|
t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
|
||||||
nnz_total += nnz;
|
nnz_total += nnz;
|
||||||
}
|
}
|
||||||
@ -980,8 +1034,8 @@ int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf
|
|||||||
}
|
}
|
||||||
|
|
||||||
static av_always_inline
|
static av_always_inline
|
||||||
void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
|
void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
|
||||||
int mb_x, int mb_y)
|
VP8Macroblock *mb, int mb_x, int mb_y)
|
||||||
{
|
{
|
||||||
AVCodecContext *avctx = s->avctx;
|
AVCodecContext *avctx = s->avctx;
|
||||||
int x, y, mode, nnz;
|
int x, y, mode, nnz;
|
||||||
@ -989,7 +1043,7 @@ void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
|
|||||||
|
|
||||||
// for the first row, we need to run xchg_mb_border to init the top edge to 127
|
// for the first row, we need to run xchg_mb_border to init the top edge to 127
|
||||||
// otherwise, skip it if we aren't going to deblock
|
// otherwise, skip it if we aren't going to deblock
|
||||||
if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
|
if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
|
||||||
xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
|
xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
|
||||||
s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
|
s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
|
||||||
s->filter.simple, 1);
|
s->filter.simple, 1);
|
||||||
@ -1019,7 +1073,7 @@ void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (mb->skip)
|
if (mb->skip)
|
||||||
AV_ZERO128(s->non_zero_count_cache);
|
AV_ZERO128(td->non_zero_count_cache);
|
||||||
|
|
||||||
for (y = 0; y < 4; y++) {
|
for (y = 0; y < 4; y++) {
|
||||||
uint8_t *topright = ptr + 4 - s->linesize;
|
uint8_t *topright = ptr + 4 - s->linesize;
|
||||||
@ -1072,12 +1126,12 @@ void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
|
|||||||
AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
|
AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
|
||||||
}
|
}
|
||||||
|
|
||||||
nnz = s->non_zero_count_cache[y][x];
|
nnz = td->non_zero_count_cache[y][x];
|
||||||
if (nnz) {
|
if (nnz) {
|
||||||
if (nnz == 1)
|
if (nnz == 1)
|
||||||
s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
|
s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
|
||||||
else
|
else
|
||||||
s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
|
s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
|
||||||
}
|
}
|
||||||
topright += 4;
|
topright += 4;
|
||||||
}
|
}
|
||||||
@ -1095,7 +1149,7 @@ void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
|
|||||||
s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
|
s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
|
||||||
s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
|
s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
|
||||||
|
|
||||||
if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
|
if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
|
||||||
xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
|
xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
|
||||||
s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
|
s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
|
||||||
s->filter.simple, 0);
|
s->filter.simple, 0);
|
||||||
@ -1125,7 +1179,8 @@ static const uint8_t subpel_idx[3][8] = {
|
|||||||
* @param mc_func motion compensation function pointers (bilinear or sixtap MC)
|
* @param mc_func motion compensation function pointers (bilinear or sixtap MC)
|
||||||
*/
|
*/
|
||||||
static av_always_inline
|
static av_always_inline
|
||||||
void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
|
void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
|
||||||
|
AVFrame *ref, const VP56mv *mv,
|
||||||
int x_off, int y_off, int block_w, int block_h,
|
int x_off, int y_off, int block_w, int block_h,
|
||||||
int width, int height, int linesize,
|
int width, int height, int linesize,
|
||||||
vp8_mc_func mc_func[3][3])
|
vp8_mc_func mc_func[3][3])
|
||||||
@ -1145,10 +1200,10 @@ void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
|
|||||||
src += y_off * linesize + x_off;
|
src += y_off * linesize + x_off;
|
||||||
if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
|
if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
|
||||||
y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
|
y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
|
||||||
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
|
s->dsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
|
||||||
block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
|
block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
|
||||||
x_off - mx_idx, y_off - my_idx, width, height);
|
x_off - mx_idx, y_off - my_idx, width, height);
|
||||||
src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
|
src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
|
||||||
}
|
}
|
||||||
mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
|
mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
|
||||||
} else {
|
} else {
|
||||||
@ -1175,8 +1230,8 @@ void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
|
|||||||
* @param mc_func motion compensation function pointers (bilinear or sixtap MC)
|
* @param mc_func motion compensation function pointers (bilinear or sixtap MC)
|
||||||
*/
|
*/
|
||||||
static av_always_inline
|
static av_always_inline
|
||||||
void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
|
void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
|
||||||
const VP56mv *mv, int x_off, int y_off,
|
AVFrame *ref, const VP56mv *mv, int x_off, int y_off,
|
||||||
int block_w, int block_h, int width, int height, int linesize,
|
int block_w, int block_h, int width, int height, int linesize,
|
||||||
vp8_mc_func mc_func[3][3])
|
vp8_mc_func mc_func[3][3])
|
||||||
{
|
{
|
||||||
@ -1195,16 +1250,16 @@ void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
|
|||||||
ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
|
ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
|
||||||
if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
|
if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
|
||||||
y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
|
y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
|
||||||
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
|
s->dsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
|
||||||
block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
|
block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
|
||||||
x_off - mx_idx, y_off - my_idx, width, height);
|
x_off - mx_idx, y_off - my_idx, width, height);
|
||||||
src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
|
src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
|
||||||
mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
|
mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
|
||||||
|
|
||||||
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
|
s->dsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
|
||||||
block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
|
block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
|
||||||
x_off - mx_idx, y_off - my_idx, width, height);
|
x_off - mx_idx, y_off - my_idx, width, height);
|
||||||
src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
|
src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
|
||||||
mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
|
mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
|
||||||
} else {
|
} else {
|
||||||
mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
|
mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
|
||||||
@ -1218,7 +1273,7 @@ void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static av_always_inline
|
static av_always_inline
|
||||||
void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
|
void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
|
||||||
AVFrame *ref_frame, int x_off, int y_off,
|
AVFrame *ref_frame, int x_off, int y_off,
|
||||||
int bx_off, int by_off,
|
int bx_off, int by_off,
|
||||||
int block_w, int block_h,
|
int block_w, int block_h,
|
||||||
@ -1227,7 +1282,7 @@ void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
|
|||||||
VP56mv uvmv = *mv;
|
VP56mv uvmv = *mv;
|
||||||
|
|
||||||
/* Y */
|
/* Y */
|
||||||
vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
|
vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
|
||||||
ref_frame, mv, x_off + bx_off, y_off + by_off,
|
ref_frame, mv, x_off + bx_off, y_off + by_off,
|
||||||
block_w, block_h, width, height, s->linesize,
|
block_w, block_h, width, height, s->linesize,
|
||||||
s->put_pixels_tab[block_w == 8]);
|
s->put_pixels_tab[block_w == 8]);
|
||||||
@ -1241,7 +1296,7 @@ void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
|
|||||||
bx_off >>= 1; by_off >>= 1;
|
bx_off >>= 1; by_off >>= 1;
|
||||||
width >>= 1; height >>= 1;
|
width >>= 1; height >>= 1;
|
||||||
block_w >>= 1; block_h >>= 1;
|
block_w >>= 1; block_h >>= 1;
|
||||||
vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
|
vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
|
||||||
dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
|
dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
|
||||||
&uvmv, x_off + bx_off, y_off + by_off,
|
&uvmv, x_off + bx_off, y_off + by_off,
|
||||||
block_w, block_h, width, height, s->uvlinesize,
|
block_w, block_h, width, height, s->uvlinesize,
|
||||||
@ -1272,8 +1327,8 @@ static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, i
|
|||||||
* Apply motion vectors to prediction buffer, chapter 18.
|
* Apply motion vectors to prediction buffer, chapter 18.
|
||||||
*/
|
*/
|
||||||
static av_always_inline
|
static av_always_inline
|
||||||
void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
|
void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
|
||||||
int mb_x, int mb_y)
|
VP8Macroblock *mb, int mb_x, int mb_y)
|
||||||
{
|
{
|
||||||
int x_off = mb_x << 4, y_off = mb_y << 4;
|
int x_off = mb_x << 4, y_off = mb_y << 4;
|
||||||
int width = 16*s->mb_width, height = 16*s->mb_height;
|
int width = 16*s->mb_width, height = 16*s->mb_height;
|
||||||
@ -1282,7 +1337,7 @@ void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
|
|||||||
|
|
||||||
switch (mb->partitioning) {
|
switch (mb->partitioning) {
|
||||||
case VP8_SPLITMVMODE_NONE:
|
case VP8_SPLITMVMODE_NONE:
|
||||||
vp8_mc_part(s, dst, ref, x_off, y_off,
|
vp8_mc_part(s, td, dst, ref, x_off, y_off,
|
||||||
0, 0, 16, 16, width, height, &mb->mv);
|
0, 0, 16, 16, width, height, &mb->mv);
|
||||||
break;
|
break;
|
||||||
case VP8_SPLITMVMODE_4x4: {
|
case VP8_SPLITMVMODE_4x4: {
|
||||||
@ -1292,7 +1347,7 @@ void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
|
|||||||
/* Y */
|
/* Y */
|
||||||
for (y = 0; y < 4; y++) {
|
for (y = 0; y < 4; y++) {
|
||||||
for (x = 0; x < 4; x++) {
|
for (x = 0; x < 4; x++) {
|
||||||
vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
|
vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
|
||||||
ref, &bmv[4*y + x],
|
ref, &bmv[4*y + x],
|
||||||
4*x + x_off, 4*y + y_off, 4, 4,
|
4*x + x_off, 4*y + y_off, 4, 4,
|
||||||
width, height, s->linesize,
|
width, height, s->linesize,
|
||||||
@ -1318,7 +1373,7 @@ void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
|
|||||||
uvmv.x &= ~7;
|
uvmv.x &= ~7;
|
||||||
uvmv.y &= ~7;
|
uvmv.y &= ~7;
|
||||||
}
|
}
|
||||||
vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
|
vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
|
||||||
dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
|
dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
|
||||||
4*x + x_off, 4*y + y_off, 4, 4,
|
4*x + x_off, 4*y + y_off, 4, 4,
|
||||||
width, height, s->uvlinesize,
|
width, height, s->uvlinesize,
|
||||||
@ -1328,51 +1383,52 @@ void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case VP8_SPLITMVMODE_16x8:
|
case VP8_SPLITMVMODE_16x8:
|
||||||
vp8_mc_part(s, dst, ref, x_off, y_off,
|
vp8_mc_part(s, td, dst, ref, x_off, y_off,
|
||||||
0, 0, 16, 8, width, height, &bmv[0]);
|
0, 0, 16, 8, width, height, &bmv[0]);
|
||||||
vp8_mc_part(s, dst, ref, x_off, y_off,
|
vp8_mc_part(s, td, dst, ref, x_off, y_off,
|
||||||
0, 8, 16, 8, width, height, &bmv[1]);
|
0, 8, 16, 8, width, height, &bmv[1]);
|
||||||
break;
|
break;
|
||||||
case VP8_SPLITMVMODE_8x16:
|
case VP8_SPLITMVMODE_8x16:
|
||||||
vp8_mc_part(s, dst, ref, x_off, y_off,
|
vp8_mc_part(s, td, dst, ref, x_off, y_off,
|
||||||
0, 0, 8, 16, width, height, &bmv[0]);
|
0, 0, 8, 16, width, height, &bmv[0]);
|
||||||
vp8_mc_part(s, dst, ref, x_off, y_off,
|
vp8_mc_part(s, td, dst, ref, x_off, y_off,
|
||||||
8, 0, 8, 16, width, height, &bmv[1]);
|
8, 0, 8, 16, width, height, &bmv[1]);
|
||||||
break;
|
break;
|
||||||
case VP8_SPLITMVMODE_8x8:
|
case VP8_SPLITMVMODE_8x8:
|
||||||
vp8_mc_part(s, dst, ref, x_off, y_off,
|
vp8_mc_part(s, td, dst, ref, x_off, y_off,
|
||||||
0, 0, 8, 8, width, height, &bmv[0]);
|
0, 0, 8, 8, width, height, &bmv[0]);
|
||||||
vp8_mc_part(s, dst, ref, x_off, y_off,
|
vp8_mc_part(s, td, dst, ref, x_off, y_off,
|
||||||
8, 0, 8, 8, width, height, &bmv[1]);
|
8, 0, 8, 8, width, height, &bmv[1]);
|
||||||
vp8_mc_part(s, dst, ref, x_off, y_off,
|
vp8_mc_part(s, td, dst, ref, x_off, y_off,
|
||||||
0, 8, 8, 8, width, height, &bmv[2]);
|
0, 8, 8, 8, width, height, &bmv[2]);
|
||||||
vp8_mc_part(s, dst, ref, x_off, y_off,
|
vp8_mc_part(s, td, dst, ref, x_off, y_off,
|
||||||
8, 8, 8, 8, width, height, &bmv[3]);
|
8, 8, 8, 8, width, height, &bmv[3]);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
|
static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
|
||||||
|
uint8_t *dst[3], VP8Macroblock *mb)
|
||||||
{
|
{
|
||||||
int x, y, ch;
|
int x, y, ch;
|
||||||
|
|
||||||
if (mb->mode != MODE_I4x4) {
|
if (mb->mode != MODE_I4x4) {
|
||||||
uint8_t *y_dst = dst[0];
|
uint8_t *y_dst = dst[0];
|
||||||
for (y = 0; y < 4; y++) {
|
for (y = 0; y < 4; y++) {
|
||||||
uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
|
uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
|
||||||
if (nnz4) {
|
if (nnz4) {
|
||||||
if (nnz4&~0x01010101) {
|
if (nnz4&~0x01010101) {
|
||||||
for (x = 0; x < 4; x++) {
|
for (x = 0; x < 4; x++) {
|
||||||
if ((uint8_t)nnz4 == 1)
|
if ((uint8_t)nnz4 == 1)
|
||||||
s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
|
s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
|
||||||
else if((uint8_t)nnz4 > 1)
|
else if((uint8_t)nnz4 > 1)
|
||||||
s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
|
s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
|
||||||
nnz4 >>= 8;
|
nnz4 >>= 8;
|
||||||
if (!nnz4)
|
if (!nnz4)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
|
s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
y_dst += 4*s->linesize;
|
y_dst += 4*s->linesize;
|
||||||
@ -1380,16 +1436,16 @@ static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblo
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (ch = 0; ch < 2; ch++) {
|
for (ch = 0; ch < 2; ch++) {
|
||||||
uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
|
uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
|
||||||
if (nnz4) {
|
if (nnz4) {
|
||||||
uint8_t *ch_dst = dst[1+ch];
|
uint8_t *ch_dst = dst[1+ch];
|
||||||
if (nnz4&~0x01010101) {
|
if (nnz4&~0x01010101) {
|
||||||
for (y = 0; y < 2; y++) {
|
for (y = 0; y < 2; y++) {
|
||||||
for (x = 0; x < 2; x++) {
|
for (x = 0; x < 2; x++) {
|
||||||
if ((uint8_t)nnz4 == 1)
|
if ((uint8_t)nnz4 == 1)
|
||||||
s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
|
s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
|
||||||
else if((uint8_t)nnz4 > 1)
|
else if((uint8_t)nnz4 > 1)
|
||||||
s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
|
s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
|
||||||
nnz4 >>= 8;
|
nnz4 >>= 8;
|
||||||
if (!nnz4)
|
if (!nnz4)
|
||||||
goto chroma_idct_end;
|
goto chroma_idct_end;
|
||||||
@ -1397,7 +1453,7 @@ static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblo
|
|||||||
ch_dst += 4*s->uvlinesize;
|
ch_dst += 4*s->uvlinesize;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
|
s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
chroma_idct_end: ;
|
chroma_idct_end: ;
|
||||||
@ -1535,38 +1591,6 @@ static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8Fi
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void filter_mb_row(VP8Context *s, AVFrame *curframe, int mb_y)
|
|
||||||
{
|
|
||||||
VP8FilterStrength *f = s->filter_strength;
|
|
||||||
uint8_t *dst[3] = {
|
|
||||||
curframe->data[0] + 16*mb_y*s->linesize,
|
|
||||||
curframe->data[1] + 8*mb_y*s->uvlinesize,
|
|
||||||
curframe->data[2] + 8*mb_y*s->uvlinesize
|
|
||||||
};
|
|
||||||
int mb_x;
|
|
||||||
|
|
||||||
for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
|
|
||||||
backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
|
|
||||||
filter_mb(s, dst, f++, mb_x, mb_y);
|
|
||||||
dst[0] += 16;
|
|
||||||
dst[1] += 8;
|
|
||||||
dst[2] += 8;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
|
|
||||||
{
|
|
||||||
VP8FilterStrength *f = s->filter_strength;
|
|
||||||
uint8_t *dst = curframe->data[0] + 16*mb_y*s->linesize;
|
|
||||||
int mb_x;
|
|
||||||
|
|
||||||
for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
|
|
||||||
backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
|
|
||||||
filter_mb_simple(s, dst, f++, mb_x, mb_y);
|
|
||||||
dst += 16;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void release_queued_segmaps(VP8Context *s, int is_close)
|
static void release_queued_segmaps(VP8Context *s, int is_close)
|
||||||
{
|
{
|
||||||
int leave_behind = is_close ? 0 : !s->maps_are_invalid;
|
int leave_behind = is_close ? 0 : !s->maps_are_invalid;
|
||||||
@ -1576,70 +1600,160 @@ static void release_queued_segmaps(VP8Context *s, int is_close)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define MARGIN (16 << 2)
|
#define MARGIN (16 << 2)
|
||||||
static void vp8_decode_mb_row(AVCodecContext *avctx, AVFrame *curframe,
|
static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, AVFrame *curframe,
|
||||||
AVFrame *prev_frame, int mb_y)
|
AVFrame *prev_frame)
|
||||||
{
|
{
|
||||||
VP8Context *s = avctx->priv_data;
|
VP8Context *s = avctx->priv_data;
|
||||||
VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
|
int mb_x, mb_y;
|
||||||
VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
|
|
||||||
|
s->mv_min.y = -MARGIN;
|
||||||
|
s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
|
||||||
|
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
|
||||||
|
VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
|
||||||
|
int mb_xy = mb_y*s->mb_width;
|
||||||
|
|
||||||
|
AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
|
||||||
|
|
||||||
|
s->mv_min.x = -MARGIN;
|
||||||
|
s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
|
||||||
|
for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
|
||||||
|
if (mb_y == 0)
|
||||||
|
AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
|
||||||
|
decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
|
||||||
|
prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 1);
|
||||||
|
s->mv_min.x -= 64;
|
||||||
|
s->mv_max.x -= 64;
|
||||||
|
}
|
||||||
|
s->mv_min.y -= 64;
|
||||||
|
s->mv_max.y -= 64;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
|
||||||
|
do {\
|
||||||
|
int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
|
||||||
|
if (otd->thread_mb_pos < tmp) {\
|
||||||
|
pthread_mutex_lock(&otd->lock);\
|
||||||
|
td->wait_mb_pos = tmp;\
|
||||||
|
do {\
|
||||||
|
if (otd->thread_mb_pos >= tmp)\
|
||||||
|
break;\
|
||||||
|
pthread_cond_wait(&otd->cond, &otd->lock);\
|
||||||
|
} while (1);\
|
||||||
|
td->wait_mb_pos = INT_MAX;\
|
||||||
|
pthread_mutex_unlock(&otd->lock);\
|
||||||
|
}\
|
||||||
|
} while(0);
|
||||||
|
|
||||||
|
#define update_pos(td, mb_y, mb_x)\
|
||||||
|
do {\
|
||||||
|
int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
|
||||||
|
int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
|
||||||
|
int is_null = (next_td == NULL) || (prev_td == NULL);\
|
||||||
|
int pos_check = (is_null) ? 1 :\
|
||||||
|
(next_td != td && pos >= next_td->wait_mb_pos) ||\
|
||||||
|
(prev_td != td && pos >= prev_td->wait_mb_pos);\
|
||||||
|
td->thread_mb_pos = pos;\
|
||||||
|
if (sliced_threading && pos_check) {\
|
||||||
|
pthread_mutex_lock(&td->lock);\
|
||||||
|
pthread_cond_broadcast(&td->cond);\
|
||||||
|
pthread_mutex_unlock(&td->lock);\
|
||||||
|
}\
|
||||||
|
} while(0);
|
||||||
|
|
||||||
|
static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
|
||||||
|
int jobnr, int threadnr)
|
||||||
|
{
|
||||||
|
VP8Context *s = avctx->priv_data;
|
||||||
|
VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
|
||||||
|
int mb_y = td->thread_mb_pos>>16;
|
||||||
int i, y, mb_x, mb_xy = mb_y*s->mb_width;
|
int i, y, mb_x, mb_xy = mb_y*s->mb_width;
|
||||||
|
int num_jobs = s->num_jobs;
|
||||||
|
AVFrame *curframe = s->curframe, *prev_frame = s->prev_frame;
|
||||||
|
VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
|
||||||
|
VP8Macroblock *mb;
|
||||||
uint8_t *dst[3] = {
|
uint8_t *dst[3] = {
|
||||||
curframe->data[0] + 16*mb_y*s->linesize,
|
curframe->data[0] + 16*mb_y*s->linesize,
|
||||||
curframe->data[1] + 8*mb_y*s->uvlinesize,
|
curframe->data[1] + 8*mb_y*s->uvlinesize,
|
||||||
curframe->data[2] + 8*mb_y*s->uvlinesize
|
curframe->data[2] + 8*mb_y*s->uvlinesize
|
||||||
};
|
};
|
||||||
|
if (mb_y == 0) prev_td = td;
|
||||||
|
else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
|
||||||
|
if (mb_y == s->mb_height-1) next_td = td;
|
||||||
|
else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
|
||||||
|
if (s->mb_layout == 1)
|
||||||
|
mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
|
||||||
|
else {
|
||||||
|
mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
|
||||||
|
memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
|
||||||
|
AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
|
||||||
|
}
|
||||||
|
|
||||||
memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
|
memset(td->left_nnz, 0, sizeof(td->left_nnz));
|
||||||
memset(s->left_nnz, 0, sizeof(s->left_nnz));
|
|
||||||
AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
|
|
||||||
|
|
||||||
// left edge of 129 for intra prediction
|
// left edge of 129 for intra prediction
|
||||||
if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
|
if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
|
||||||
for (i = 0; i < 3; i++)
|
for (i = 0; i < 3; i++)
|
||||||
for (y = 0; y < 16>>!!i; y++)
|
for (y = 0; y < 16>>!!i; y++)
|
||||||
dst[i][y*curframe->linesize[i]-1] = 129;
|
dst[i][y*curframe->linesize[i]-1] = 129;
|
||||||
if (mb_y == 1) // top left edge is also 129
|
if (mb_y == 1) {
|
||||||
s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
|
s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
s->mv_min.x = -MARGIN;
|
s->mv_min.x = -MARGIN;
|
||||||
s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
|
s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
|
||||||
|
|
||||||
for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
|
for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
|
||||||
/* Prefetch the current frame, 4 MBs ahead */
|
// Wait for previous thread to read mb_x+2, and reach mb_y-1.
|
||||||
|
if (prev_td != td) {
|
||||||
|
if (threadnr != 0) {
|
||||||
|
check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
|
||||||
|
} else {
|
||||||
|
check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
|
s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
|
||||||
s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
|
s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
|
||||||
|
|
||||||
decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
|
if (!s->mb_layout)
|
||||||
prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL);
|
decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
|
||||||
|
prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 0);
|
||||||
|
|
||||||
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
|
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
|
||||||
|
|
||||||
if (!mb->skip)
|
if (!mb->skip)
|
||||||
decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
|
decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
|
||||||
|
|
||||||
if (mb->mode <= MODE_I4x4)
|
if (mb->mode <= MODE_I4x4)
|
||||||
intra_predict(s, dst, mb, mb_x, mb_y);
|
intra_predict(s, td, dst, mb, mb_x, mb_y);
|
||||||
else
|
else
|
||||||
inter_predict(s, dst, mb, mb_x, mb_y);
|
inter_predict(s, td, dst, mb, mb_x, mb_y);
|
||||||
|
|
||||||
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
|
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
|
||||||
|
|
||||||
if (!mb->skip) {
|
if (!mb->skip) {
|
||||||
idct_mb(s, dst, mb);
|
idct_mb(s, td, dst, mb);
|
||||||
} else {
|
} else {
|
||||||
AV_ZERO64(s->left_nnz);
|
AV_ZERO64(td->left_nnz);
|
||||||
AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
|
AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
|
||||||
|
|
||||||
// Reset DC block predictors if they would exist if the mb had coefficients
|
// Reset DC block predictors if they would exist if the mb had coefficients
|
||||||
if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
|
if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
|
||||||
s->left_nnz[8] = 0;
|
td->left_nnz[8] = 0;
|
||||||
s->top_nnz[mb_x][8] = 0;
|
s->top_nnz[mb_x][8] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (s->deblock_filter)
|
if (s->deblock_filter)
|
||||||
filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
|
filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
|
||||||
|
|
||||||
|
if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
|
||||||
|
if (s->filter.simple)
|
||||||
|
backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
|
||||||
|
else
|
||||||
|
backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
|
||||||
|
}
|
||||||
|
|
||||||
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
|
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
|
||||||
|
|
||||||
@ -1648,22 +1762,101 @@ static void vp8_decode_mb_row(AVCodecContext *avctx, AVFrame *curframe,
|
|||||||
dst[2] += 8;
|
dst[2] += 8;
|
||||||
s->mv_min.x -= 64;
|
s->mv_min.x -= 64;
|
||||||
s->mv_max.x -= 64;
|
s->mv_max.x -= 64;
|
||||||
|
|
||||||
|
if (mb_x == s->mb_width+1) {
|
||||||
|
update_pos(td, mb_y, s->mb_width+3);
|
||||||
|
} else {
|
||||||
|
update_pos(td, mb_y, mb_x);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (s->deblock_filter) {
|
}
|
||||||
|
|
||||||
|
static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
|
||||||
|
int jobnr, int threadnr)
|
||||||
|
{
|
||||||
|
VP8Context *s = avctx->priv_data;
|
||||||
|
VP8ThreadData *td = &s->thread_data[threadnr];
|
||||||
|
int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
|
||||||
|
AVFrame *curframe = s->curframe;
|
||||||
|
VP8Macroblock *mb;
|
||||||
|
VP8ThreadData *prev_td, *next_td;
|
||||||
|
uint8_t *dst[3] = {
|
||||||
|
curframe->data[0] + 16*mb_y*s->linesize,
|
||||||
|
curframe->data[1] + 8*mb_y*s->uvlinesize,
|
||||||
|
curframe->data[2] + 8*mb_y*s->uvlinesize
|
||||||
|
};
|
||||||
|
|
||||||
|
if (s->mb_layout == 1)
|
||||||
|
mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
|
||||||
|
else
|
||||||
|
mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
|
||||||
|
|
||||||
|
if (mb_y == 0) prev_td = td;
|
||||||
|
else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
|
||||||
|
if (mb_y == s->mb_height-1) next_td = td;
|
||||||
|
else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
|
||||||
|
|
||||||
|
for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
|
||||||
|
VP8FilterStrength *f = &td->filter_strength[mb_x];
|
||||||
|
if (prev_td != td) {
|
||||||
|
check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
|
||||||
|
}
|
||||||
|
if (next_td != td)
|
||||||
|
if (next_td != &s->thread_data[0]) {
|
||||||
|
check_thread_pos(td, next_td, mb_x+1, mb_y+1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (num_jobs == 1) {
|
||||||
|
if (s->filter.simple)
|
||||||
|
backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
|
||||||
|
else
|
||||||
|
backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
|
||||||
|
}
|
||||||
|
|
||||||
if (s->filter.simple)
|
if (s->filter.simple)
|
||||||
filter_mb_row_simple(s, curframe, mb_y);
|
filter_mb_simple(s, dst[0], f, mb_x, mb_y);
|
||||||
else
|
else
|
||||||
filter_mb_row(s, curframe, mb_y);
|
filter_mb(s, dst, f, mb_x, mb_y);
|
||||||
|
dst[0] += 16;
|
||||||
|
dst[1] += 8;
|
||||||
|
dst[2] += 8;
|
||||||
|
|
||||||
|
update_pos(td, mb_y, (s->mb_width+3) + mb_x);
|
||||||
}
|
}
|
||||||
s->mv_min.y -= 64;
|
}
|
||||||
s->mv_max.y -= 64;
|
|
||||||
|
static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
|
||||||
|
int jobnr, int threadnr)
|
||||||
|
{
|
||||||
|
VP8Context *s = avctx->priv_data;
|
||||||
|
VP8ThreadData *td = &s->thread_data[jobnr];
|
||||||
|
VP8ThreadData *next_td = NULL, *prev_td = NULL;
|
||||||
|
AVFrame *curframe = s->curframe;
|
||||||
|
int mb_y, num_jobs = s->num_jobs;
|
||||||
|
td->thread_nr = threadnr;
|
||||||
|
for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
|
||||||
|
if (mb_y >= s->mb_height) break;
|
||||||
|
td->thread_mb_pos = mb_y<<16;
|
||||||
|
vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
|
||||||
|
if (s->deblock_filter)
|
||||||
|
vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
|
||||||
|
update_pos(td, mb_y, INT_MAX & 0xFFFF);
|
||||||
|
|
||||||
|
s->mv_min.y -= 64;
|
||||||
|
s->mv_max.y -= 64;
|
||||||
|
|
||||||
|
if (avctx->active_thread_type == FF_THREAD_FRAME)
|
||||||
|
ff_thread_report_progress(curframe, mb_y, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
|
static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
|
||||||
AVPacket *avpkt)
|
AVPacket *avpkt)
|
||||||
{
|
{
|
||||||
VP8Context *s = avctx->priv_data;
|
VP8Context *s = avctx->priv_data;
|
||||||
int ret, mb_y, i, referenced;
|
int ret, i, referenced, num_jobs;
|
||||||
enum AVDiscard skip_thresh;
|
enum AVDiscard skip_thresh;
|
||||||
AVFrame *av_uninit(curframe), *prev_frame;
|
AVFrame *av_uninit(curframe), *prev_frame;
|
||||||
|
|
||||||
@ -1754,13 +1947,16 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
|
|||||||
s->linesize = curframe->linesize[0];
|
s->linesize = curframe->linesize[0];
|
||||||
s->uvlinesize = curframe->linesize[1];
|
s->uvlinesize = curframe->linesize[1];
|
||||||
|
|
||||||
if (!s->edge_emu_buffer)
|
if (!s->thread_data[0].edge_emu_buffer)
|
||||||
s->edge_emu_buffer = av_malloc(21*s->linesize);
|
for (i = 0; i < MAX_THREADS; i++)
|
||||||
|
s->thread_data[i].edge_emu_buffer = av_malloc(21*s->linesize);
|
||||||
|
|
||||||
memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
|
memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
|
||||||
|
|
||||||
/* Zero macroblock structures for top/top-left prediction from outside the frame. */
|
/* Zero macroblock structures for top/top-left prediction from outside the frame. */
|
||||||
memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
|
if (!s->mb_layout)
|
||||||
|
memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
|
||||||
|
if (!s->mb_layout && s->keyframe)
|
||||||
|
memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
|
||||||
|
|
||||||
// top edge of 127 for intra prediction
|
// top edge of 127 for intra prediction
|
||||||
if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
|
if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
|
||||||
@ -1768,20 +1964,30 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
|
|||||||
memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
|
memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
|
||||||
}
|
}
|
||||||
memset(s->ref_count, 0, sizeof(s->ref_count));
|
memset(s->ref_count, 0, sizeof(s->ref_count));
|
||||||
if (s->keyframe)
|
|
||||||
memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
|
|
||||||
|
|
||||||
s->mv_min.y = -MARGIN;
|
|
||||||
s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
|
|
||||||
|
|
||||||
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
|
// Make sure the previous frame has read its segmentation map,
|
||||||
if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
|
// if we re-use the same map.
|
||||||
ff_thread_await_progress(prev_frame, mb_y, 0);
|
if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
|
||||||
|
ff_thread_await_progress(prev_frame, 1, 0);
|
||||||
|
|
||||||
vp8_decode_mb_row(avctx, curframe, prev_frame, mb_y);
|
if (s->mb_layout == 1)
|
||||||
|
vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
|
||||||
|
|
||||||
ff_thread_report_progress(curframe, mb_y, 0);
|
if (avctx->active_thread_type == FF_THREAD_FRAME)
|
||||||
|
num_jobs = 1;
|
||||||
|
else
|
||||||
|
num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
|
||||||
|
s->num_jobs = num_jobs;
|
||||||
|
s->curframe = curframe;
|
||||||
|
s->prev_frame = prev_frame;
|
||||||
|
s->mv_min.y = -MARGIN;
|
||||||
|
s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
|
||||||
|
for (i = 0; i < MAX_THREADS; i++) {
|
||||||
|
s->thread_data[i].thread_mb_pos = 0;
|
||||||
|
s->thread_data[i].wait_mb_pos = INT_MAX;
|
||||||
}
|
}
|
||||||
|
avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
|
||||||
|
|
||||||
ff_thread_report_progress(curframe, INT_MAX, 0);
|
ff_thread_report_progress(curframe, INT_MAX, 0);
|
||||||
memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
|
memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
|
||||||
@ -1870,7 +2076,7 @@ AVCodec ff_vp8_decoder = {
|
|||||||
.init = vp8_decode_init,
|
.init = vp8_decode_init,
|
||||||
.close = vp8_decode_free,
|
.close = vp8_decode_free,
|
||||||
.decode = vp8_decode_frame,
|
.decode = vp8_decode_frame,
|
||||||
.capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
|
.capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
|
||||||
.flush = vp8_decode_flush,
|
.flush = vp8_decode_flush,
|
||||||
.long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
|
.long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
|
||||||
.init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
|
.init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
* Copyright (C) 2010 David Conrad
|
* Copyright (C) 2010 David Conrad
|
||||||
* Copyright (C) 2010 Ronald S. Bultje
|
* Copyright (C) 2010 Ronald S. Bultje
|
||||||
* Copyright (C) 2010 Jason Garrett-Glaser
|
* Copyright (C) 2010 Jason Garrett-Glaser
|
||||||
|
* Copyright (C) 2012 Daniel Kang
|
||||||
*
|
*
|
||||||
* This file is part of Libav.
|
* This file is part of Libav.
|
||||||
*
|
*
|
||||||
@ -88,10 +89,40 @@ typedef struct {
|
|||||||
} VP8Macroblock;
|
} VP8Macroblock;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
pthread_mutex_t lock;
|
||||||
|
pthread_cond_t cond;
|
||||||
|
int thread_nr;
|
||||||
|
int thread_mb_pos; // (mb_y << 16) | (mb_x & 0xFFFF)
|
||||||
|
int wait_mb_pos; // What the current thread is waiting on.
|
||||||
|
uint8_t *edge_emu_buffer;
|
||||||
|
/**
|
||||||
|
* For coeff decode, we need to know whether the above block had non-zero
|
||||||
|
* coefficients. This means for each macroblock, we need data for 4 luma
|
||||||
|
* blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9
|
||||||
|
* per macroblock. We keep the last row in top_nnz.
|
||||||
|
*/
|
||||||
|
DECLARE_ALIGNED(8, uint8_t, left_nnz)[9];
|
||||||
|
/**
|
||||||
|
* This is the index plus one of the last non-zero coeff
|
||||||
|
* for each of the blocks in the current macroblock.
|
||||||
|
* So, 0 -> no coeffs
|
||||||
|
* 1 -> dc-only (special transform)
|
||||||
|
* 2+-> full transform
|
||||||
|
*/
|
||||||
|
DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4];
|
||||||
|
DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16];
|
||||||
|
DECLARE_ALIGNED(16, DCTELEM, block_dc)[16];
|
||||||
|
VP8FilterStrength *filter_strength;
|
||||||
|
} VP8ThreadData;
|
||||||
|
|
||||||
|
#define MAX_THREADS 8
|
||||||
|
typedef struct {
|
||||||
|
VP8ThreadData *thread_data;
|
||||||
AVCodecContext *avctx;
|
AVCodecContext *avctx;
|
||||||
AVFrame *framep[4];
|
AVFrame *framep[4];
|
||||||
AVFrame *next_framep[4];
|
AVFrame *next_framep[4];
|
||||||
uint8_t *edge_emu_buffer;
|
AVFrame *curframe;
|
||||||
|
AVFrame *prev_frame;
|
||||||
|
|
||||||
uint16_t mb_width; /* number of horizontal MB */
|
uint16_t mb_width; /* number of horizontal MB */
|
||||||
uint16_t mb_height; /* number of vertical MB */
|
uint16_t mb_height; /* number of vertical MB */
|
||||||
@ -128,7 +159,6 @@ typedef struct {
|
|||||||
} filter;
|
} filter;
|
||||||
|
|
||||||
VP8Macroblock *macroblocks;
|
VP8Macroblock *macroblocks;
|
||||||
VP8FilterStrength *filter_strength;
|
|
||||||
|
|
||||||
uint8_t *intra4x4_pred_mode_top;
|
uint8_t *intra4x4_pred_mode_top;
|
||||||
uint8_t intra4x4_pred_mode_left[4];
|
uint8_t intra4x4_pred_mode_left[4];
|
||||||
@ -169,32 +199,10 @@ typedef struct {
|
|||||||
int8_t ref[4];
|
int8_t ref[4];
|
||||||
} lf_delta;
|
} lf_delta;
|
||||||
|
|
||||||
/**
|
|
||||||
* Cache of the top row needed for intra prediction
|
|
||||||
* 16 for luma, 8 for each chroma plane
|
|
||||||
*/
|
|
||||||
uint8_t (*top_border)[16+8+8];
|
uint8_t (*top_border)[16+8+8];
|
||||||
|
|
||||||
/**
|
|
||||||
* For coeff decode, we need to know whether the above block had non-zero
|
|
||||||
* coefficients. This means for each macroblock, we need data for 4 luma
|
|
||||||
* blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9
|
|
||||||
* per macroblock. We keep the last row in top_nnz.
|
|
||||||
*/
|
|
||||||
uint8_t (*top_nnz)[9];
|
uint8_t (*top_nnz)[9];
|
||||||
DECLARE_ALIGNED(8, uint8_t, left_nnz)[9];
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This is the index plus one of the last non-zero coeff
|
|
||||||
* for each of the blocks in the current macroblock.
|
|
||||||
* So, 0 -> no coeffs
|
|
||||||
* 1 -> dc-only (special transform)
|
|
||||||
* 2+-> full transform
|
|
||||||
*/
|
|
||||||
DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4];
|
|
||||||
VP56RangeCoder c; ///< header context, includes mb modes and motion vectors
|
VP56RangeCoder c; ///< header context, includes mb modes and motion vectors
|
||||||
DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16];
|
|
||||||
DECLARE_ALIGNED(16, DCTELEM, block_dc)[16];
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* These are all of the updatable probabilities for binary decisions.
|
* These are all of the updatable probabilities for binary decisions.
|
||||||
@ -247,6 +255,13 @@ typedef struct {
|
|||||||
uint8_t *segmentation_maps[5];
|
uint8_t *segmentation_maps[5];
|
||||||
int num_maps_to_be_freed;
|
int num_maps_to_be_freed;
|
||||||
int maps_are_invalid;
|
int maps_are_invalid;
|
||||||
|
int num_jobs;
|
||||||
|
/**
|
||||||
|
* This describes the macroblock memory layout.
|
||||||
|
* 0 -> Only width+height*2+1 macroblocks allocated (frame/single thread).
|
||||||
|
* 1 -> Macroblocks for entire frame alloced (sliced thread).
|
||||||
|
*/
|
||||||
|
int mb_layout;
|
||||||
} VP8Context;
|
} VP8Context;
|
||||||
|
|
||||||
#endif /* AVCODEC_VP8_H */
|
#endif /* AVCODEC_VP8_H */
|
||||||
|
Loading…
Reference in New Issue
Block a user