1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-24 13:56:33 +02:00

vp8: make mv_min/max thread-local if using partition threading.

Fixes tsan warnings like this in fate-vp8-test-vector-007:

WARNING: ThreadSanitizer: data race (pid=65909)
  Write of size 4 at 0x7d8c0000e088 by thread T1:
    #0 vp8_decode_mb_row_sliced vp8.c:2519 (ffmpeg:x86_64+0x100995ede)
[..]
  Previous write of size 4 at 0x7d8c0000e088 by thread T2:
    #0 vp8_decode_mb_row_sliced vp8.c:2519 (ffmpeg:x86_64+0x100995ede)
This commit is contained in:
Ronald S. Bultje 2017-04-05 16:19:55 -04:00
parent 9a54c6f243
commit fed92adbb3
2 changed files with 40 additions and 32 deletions

View File

@ -772,7 +772,7 @@ static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_si
} }
static av_always_inline static av_always_inline
void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src) void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)
{ {
dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX), dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
av_clip(s->mv_max.x, INT16_MIN, INT16_MAX)); av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
@ -1031,7 +1031,7 @@ void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
} }
static av_always_inline static av_always_inline
void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb, void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb,
int mb_x, int mb_y, int layout) int mb_x, int mb_y, int layout)
{ {
VP8Macroblock *mb_edge[3] = { 0 /* top */, VP8Macroblock *mb_edge[3] = { 0 /* top */,
@ -1102,7 +1102,7 @@ void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) { if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) { if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
/* Choose the best mv out of 0,0 and the nearest mv */ /* Choose the best mv out of 0,0 and the nearest mv */
clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]); clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) + cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
(mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 + (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
(mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT); (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
@ -1116,11 +1116,11 @@ void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
mb->bmv[0] = mb->mv; mb->bmv[0] = mb->mv;
} }
} else { } else {
clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]); clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
mb->bmv[0] = mb->mv; mb->bmv[0] = mb->mv;
} }
} else { } else {
clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]); clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
mb->bmv[0] = mb->mv; mb->bmv[0] = mb->mv;
} }
} else { } else {
@ -1166,7 +1166,8 @@ void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
} }
static av_always_inline static av_always_inline
void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds,
VP8Macroblock *mb, int mb_x, int mb_y,
uint8_t *segment, uint8_t *ref, int layout, int is_vp7) uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
{ {
VP56RangeCoder *c = &s->c; VP56RangeCoder *c = &s->c;
@ -1230,7 +1231,7 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
if (is_vp7) if (is_vp7)
vp7_decode_mvs(s, mb, mb_x, mb_y, layout); vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
else else
vp8_decode_mvs(s, mb, mb_x, mb_y, layout); vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
} else { } else {
// intra MB, 16.1 // intra MB, 16.1
mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16); mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
@ -2205,8 +2206,8 @@ void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
VP8Context *s = avctx->priv_data; VP8Context *s = avctx->priv_data;
int mb_x, mb_y; int mb_x, mb_y;
s->mv_min.y = -MARGIN; s->mv_bounds.mv_min.y = -MARGIN;
s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
for (mb_y = 0; mb_y < s->mb_height; mb_y++) { for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
VP8Macroblock *mb = s->macroblocks_base + VP8Macroblock *mb = s->macroblocks_base +
((s->mb_width + 1) * (mb_y + 1) + 1); ((s->mb_width + 1) * (mb_y + 1) + 1);
@ -2214,20 +2215,20 @@ void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101); AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
s->mv_min.x = -MARGIN; s->mv_bounds.mv_min.x = -MARGIN;
s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
if (mb_y == 0) if (mb_y == 0)
AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top, AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
DC_PRED * 0x01010101); DC_PRED * 0x01010101);
decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy, decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
prev_frame && prev_frame->seg_map ? prev_frame && prev_frame->seg_map ?
prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7); prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
s->mv_min.x -= 64; s->mv_bounds.mv_min.x -= 64;
s->mv_max.x -= 64; s->mv_bounds.mv_max.x -= 64;
} }
s->mv_min.y -= 64; s->mv_bounds.mv_min.y -= 64;
s->mv_max.y -= 64; s->mv_bounds.mv_max.y -= 64;
} }
} }
@ -2325,8 +2326,8 @@ static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void
if (!is_vp7 || mb_y == 0) if (!is_vp7 || mb_y == 0)
memset(td->left_nnz, 0, sizeof(td->left_nnz)); memset(td->left_nnz, 0, sizeof(td->left_nnz));
s->mv_min.x = -MARGIN; td->mv_bounds.mv_min.x = -MARGIN;
s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
if (c->end <= c->buffer && c->bits >= 0) if (c->end <= c->buffer && c->bits >= 0)
@ -2350,7 +2351,7 @@ static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void
dst[2] - dst[1], 2); dst[2] - dst[1], 2);
if (!s->mb_layout) if (!s->mb_layout)
decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy, decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
prev_frame && prev_frame->seg_map ? prev_frame && prev_frame->seg_map ?
prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7); prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
@ -2397,8 +2398,8 @@ static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void
dst[0] += 16; dst[0] += 16;
dst[1] += 8; dst[1] += 8;
dst[2] += 8; dst[2] += 8;
s->mv_min.x -= 64; td->mv_bounds.mv_min.x -= 64;
s->mv_max.x -= 64; td->mv_bounds.mv_max.x -= 64;
if (mb_x == s->mb_width + 1) { if (mb_x == s->mb_width + 1) {
update_pos(td, mb_y, s->mb_width + 3); update_pos(td, mb_y, s->mb_width + 3);
@ -2504,6 +2505,8 @@ int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
int ret; int ret;
td->thread_nr = threadnr; td->thread_nr = threadnr;
td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr;
td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) { for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
atomic_store(&td->thread_mb_pos, mb_y << 16); atomic_store(&td->thread_mb_pos, mb_y << 16);
ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr); ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
@ -2515,8 +2518,8 @@ int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
s->filter_mb_row(avctx, tdata, jobnr, threadnr); s->filter_mb_row(avctx, tdata, jobnr, threadnr);
update_pos(td, mb_y, INT_MAX & 0xFFFF); update_pos(td, mb_y, INT_MAX & 0xFFFF);
s->mv_min.y -= 64; td->mv_bounds.mv_min.y -= 64 * num_jobs;
s->mv_max.y -= 64; td->mv_bounds.mv_max.y -= 64 * num_jobs;
if (avctx->active_thread_type == FF_THREAD_FRAME) if (avctx->active_thread_type == FF_THREAD_FRAME)
ff_thread_report_progress(&curframe->tf, mb_y, 0); ff_thread_report_progress(&curframe->tf, mb_y, 0);
@ -2662,8 +2665,8 @@ int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
s->num_jobs = num_jobs; s->num_jobs = num_jobs;
s->curframe = curframe; s->curframe = curframe;
s->prev_frame = prev_frame; s->prev_frame = prev_frame;
s->mv_min.y = -MARGIN; s->mv_bounds.mv_min.y = -MARGIN;
s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
for (i = 0; i < MAX_THREADS; i++) { for (i = 0; i < MAX_THREADS; i++) {
VP8ThreadData *td = &s->thread_data[i]; VP8ThreadData *td = &s->thread_data[i];
atomic_init(&td->thread_mb_pos, 0); atomic_init(&td->thread_mb_pos, 0);

View File

@ -93,6 +93,16 @@ typedef struct VP8Macroblock {
VP56mv bmv[16]; VP56mv bmv[16];
} VP8Macroblock; } VP8Macroblock;
typedef struct VP8intmv {
int x;
int y;
} VP8intmv;
typedef struct VP8mvbounds {
VP8intmv mv_min;
VP8intmv mv_max;
} VP8mvbounds;
typedef struct VP8ThreadData { typedef struct VP8ThreadData {
DECLARE_ALIGNED(16, int16_t, block)[6][4][16]; DECLARE_ALIGNED(16, int16_t, block)[6][4][16];
DECLARE_ALIGNED(16, int16_t, block_dc)[16]; DECLARE_ALIGNED(16, int16_t, block_dc)[16];
@ -122,6 +132,7 @@ typedef struct VP8ThreadData {
#define EDGE_EMU_LINESIZE 32 #define EDGE_EMU_LINESIZE 32
DECLARE_ALIGNED(16, uint8_t, edge_emu_buffer)[21 * EDGE_EMU_LINESIZE]; DECLARE_ALIGNED(16, uint8_t, edge_emu_buffer)[21 * EDGE_EMU_LINESIZE];
VP8FilterStrength *filter_strength; VP8FilterStrength *filter_strength;
VP8mvbounds mv_bounds;
} VP8ThreadData; } VP8ThreadData;
typedef struct VP8Frame { typedef struct VP8Frame {
@ -129,11 +140,6 @@ typedef struct VP8Frame {
AVBufferRef *seg_map; AVBufferRef *seg_map;
} VP8Frame; } VP8Frame;
typedef struct VP8intmv {
int x;
int y;
} VP8intmv;
#define MAX_THREADS 8 #define MAX_THREADS 8
typedef struct VP8Context { typedef struct VP8Context {
VP8ThreadData *thread_data; VP8ThreadData *thread_data;
@ -152,8 +158,7 @@ typedef struct VP8Context {
uint8_t deblock_filter; uint8_t deblock_filter;
uint8_t mbskip_enabled; uint8_t mbskip_enabled;
uint8_t profile; uint8_t profile;
VP8intmv mv_min; VP8mvbounds mv_bounds;
VP8intmv mv_max;
int8_t sign_bias[4]; ///< one state [0, 1] per ref frame type int8_t sign_bias[4]; ///< one state [0, 1] per ref frame type
int ref_count[3]; int ref_count[3];