mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-24 13:56:33 +02:00
vp8: make mv_min/max thread-local if using partition threading.
Fixes tsan warnings like this in fate-vp8-test-vector-007: WARNING: ThreadSanitizer: data race (pid=65909) Write of size 4 at 0x7d8c0000e088 by thread T1: #0 vp8_decode_mb_row_sliced vp8.c:2519 (ffmpeg:x86_64+0x100995ede) [..] Previous write of size 4 at 0x7d8c0000e088 by thread T2: #0 vp8_decode_mb_row_sliced vp8.c:2519 (ffmpeg:x86_64+0x100995ede)
This commit is contained in:
parent
9a54c6f243
commit
fed92adbb3
@ -772,7 +772,7 @@ static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_si
|
|||||||
}
|
}
|
||||||
|
|
||||||
static av_always_inline
|
static av_always_inline
|
||||||
void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
|
void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)
|
||||||
{
|
{
|
||||||
dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
|
dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
|
||||||
av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
|
av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
|
||||||
@ -1031,7 +1031,7 @@ void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static av_always_inline
|
static av_always_inline
|
||||||
void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
|
void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb,
|
||||||
int mb_x, int mb_y, int layout)
|
int mb_x, int mb_y, int layout)
|
||||||
{
|
{
|
||||||
VP8Macroblock *mb_edge[3] = { 0 /* top */,
|
VP8Macroblock *mb_edge[3] = { 0 /* top */,
|
||||||
@ -1102,7 +1102,7 @@ void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
|
|||||||
if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
|
if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
|
||||||
if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
|
if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
|
||||||
/* Choose the best mv out of 0,0 and the nearest mv */
|
/* Choose the best mv out of 0,0 and the nearest mv */
|
||||||
clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
|
clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
|
||||||
cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
|
cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
|
||||||
(mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
|
(mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
|
||||||
(mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
|
(mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
|
||||||
@ -1116,11 +1116,11 @@ void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
|
|||||||
mb->bmv[0] = mb->mv;
|
mb->bmv[0] = mb->mv;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
|
clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
|
||||||
mb->bmv[0] = mb->mv;
|
mb->bmv[0] = mb->mv;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
|
clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
|
||||||
mb->bmv[0] = mb->mv;
|
mb->bmv[0] = mb->mv;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -1166,7 +1166,8 @@ void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static av_always_inline
|
static av_always_inline
|
||||||
void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
|
void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds,
|
||||||
|
VP8Macroblock *mb, int mb_x, int mb_y,
|
||||||
uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
|
uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
|
||||||
{
|
{
|
||||||
VP56RangeCoder *c = &s->c;
|
VP56RangeCoder *c = &s->c;
|
||||||
@ -1230,7 +1231,7 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
|
|||||||
if (is_vp7)
|
if (is_vp7)
|
||||||
vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
|
vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
|
||||||
else
|
else
|
||||||
vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
|
vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
|
||||||
} else {
|
} else {
|
||||||
// intra MB, 16.1
|
// intra MB, 16.1
|
||||||
mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
|
mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
|
||||||
@ -2205,8 +2206,8 @@ void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
|
|||||||
VP8Context *s = avctx->priv_data;
|
VP8Context *s = avctx->priv_data;
|
||||||
int mb_x, mb_y;
|
int mb_x, mb_y;
|
||||||
|
|
||||||
s->mv_min.y = -MARGIN;
|
s->mv_bounds.mv_min.y = -MARGIN;
|
||||||
s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
|
s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
|
||||||
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
|
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
|
||||||
VP8Macroblock *mb = s->macroblocks_base +
|
VP8Macroblock *mb = s->macroblocks_base +
|
||||||
((s->mb_width + 1) * (mb_y + 1) + 1);
|
((s->mb_width + 1) * (mb_y + 1) + 1);
|
||||||
@ -2214,20 +2215,20 @@ void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
|
|||||||
|
|
||||||
AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
|
AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
|
||||||
|
|
||||||
s->mv_min.x = -MARGIN;
|
s->mv_bounds.mv_min.x = -MARGIN;
|
||||||
s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
|
s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
|
||||||
for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
|
for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
|
||||||
if (mb_y == 0)
|
if (mb_y == 0)
|
||||||
AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
|
AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
|
||||||
DC_PRED * 0x01010101);
|
DC_PRED * 0x01010101);
|
||||||
decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
|
decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
|
||||||
prev_frame && prev_frame->seg_map ?
|
prev_frame && prev_frame->seg_map ?
|
||||||
prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
|
prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
|
||||||
s->mv_min.x -= 64;
|
s->mv_bounds.mv_min.x -= 64;
|
||||||
s->mv_max.x -= 64;
|
s->mv_bounds.mv_max.x -= 64;
|
||||||
}
|
}
|
||||||
s->mv_min.y -= 64;
|
s->mv_bounds.mv_min.y -= 64;
|
||||||
s->mv_max.y -= 64;
|
s->mv_bounds.mv_max.y -= 64;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2325,8 +2326,8 @@ static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void
|
|||||||
if (!is_vp7 || mb_y == 0)
|
if (!is_vp7 || mb_y == 0)
|
||||||
memset(td->left_nnz, 0, sizeof(td->left_nnz));
|
memset(td->left_nnz, 0, sizeof(td->left_nnz));
|
||||||
|
|
||||||
s->mv_min.x = -MARGIN;
|
td->mv_bounds.mv_min.x = -MARGIN;
|
||||||
s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
|
td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
|
||||||
|
|
||||||
for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
|
for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
|
||||||
if (c->end <= c->buffer && c->bits >= 0)
|
if (c->end <= c->buffer && c->bits >= 0)
|
||||||
@ -2350,7 +2351,7 @@ static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void
|
|||||||
dst[2] - dst[1], 2);
|
dst[2] - dst[1], 2);
|
||||||
|
|
||||||
if (!s->mb_layout)
|
if (!s->mb_layout)
|
||||||
decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
|
decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
|
||||||
prev_frame && prev_frame->seg_map ?
|
prev_frame && prev_frame->seg_map ?
|
||||||
prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
|
prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
|
||||||
|
|
||||||
@ -2397,8 +2398,8 @@ static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void
|
|||||||
dst[0] += 16;
|
dst[0] += 16;
|
||||||
dst[1] += 8;
|
dst[1] += 8;
|
||||||
dst[2] += 8;
|
dst[2] += 8;
|
||||||
s->mv_min.x -= 64;
|
td->mv_bounds.mv_min.x -= 64;
|
||||||
s->mv_max.x -= 64;
|
td->mv_bounds.mv_max.x -= 64;
|
||||||
|
|
||||||
if (mb_x == s->mb_width + 1) {
|
if (mb_x == s->mb_width + 1) {
|
||||||
update_pos(td, mb_y, s->mb_width + 3);
|
update_pos(td, mb_y, s->mb_width + 3);
|
||||||
@ -2504,6 +2505,8 @@ int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
|
|||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
td->thread_nr = threadnr;
|
td->thread_nr = threadnr;
|
||||||
|
td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr;
|
||||||
|
td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
|
||||||
for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
|
for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
|
||||||
atomic_store(&td->thread_mb_pos, mb_y << 16);
|
atomic_store(&td->thread_mb_pos, mb_y << 16);
|
||||||
ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
|
ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
|
||||||
@ -2515,8 +2518,8 @@ int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
|
|||||||
s->filter_mb_row(avctx, tdata, jobnr, threadnr);
|
s->filter_mb_row(avctx, tdata, jobnr, threadnr);
|
||||||
update_pos(td, mb_y, INT_MAX & 0xFFFF);
|
update_pos(td, mb_y, INT_MAX & 0xFFFF);
|
||||||
|
|
||||||
s->mv_min.y -= 64;
|
td->mv_bounds.mv_min.y -= 64 * num_jobs;
|
||||||
s->mv_max.y -= 64;
|
td->mv_bounds.mv_max.y -= 64 * num_jobs;
|
||||||
|
|
||||||
if (avctx->active_thread_type == FF_THREAD_FRAME)
|
if (avctx->active_thread_type == FF_THREAD_FRAME)
|
||||||
ff_thread_report_progress(&curframe->tf, mb_y, 0);
|
ff_thread_report_progress(&curframe->tf, mb_y, 0);
|
||||||
@ -2662,8 +2665,8 @@ int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
|
|||||||
s->num_jobs = num_jobs;
|
s->num_jobs = num_jobs;
|
||||||
s->curframe = curframe;
|
s->curframe = curframe;
|
||||||
s->prev_frame = prev_frame;
|
s->prev_frame = prev_frame;
|
||||||
s->mv_min.y = -MARGIN;
|
s->mv_bounds.mv_min.y = -MARGIN;
|
||||||
s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
|
s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
|
||||||
for (i = 0; i < MAX_THREADS; i++) {
|
for (i = 0; i < MAX_THREADS; i++) {
|
||||||
VP8ThreadData *td = &s->thread_data[i];
|
VP8ThreadData *td = &s->thread_data[i];
|
||||||
atomic_init(&td->thread_mb_pos, 0);
|
atomic_init(&td->thread_mb_pos, 0);
|
||||||
|
@ -93,6 +93,16 @@ typedef struct VP8Macroblock {
|
|||||||
VP56mv bmv[16];
|
VP56mv bmv[16];
|
||||||
} VP8Macroblock;
|
} VP8Macroblock;
|
||||||
|
|
||||||
|
typedef struct VP8intmv {
|
||||||
|
int x;
|
||||||
|
int y;
|
||||||
|
} VP8intmv;
|
||||||
|
|
||||||
|
typedef struct VP8mvbounds {
|
||||||
|
VP8intmv mv_min;
|
||||||
|
VP8intmv mv_max;
|
||||||
|
} VP8mvbounds;
|
||||||
|
|
||||||
typedef struct VP8ThreadData {
|
typedef struct VP8ThreadData {
|
||||||
DECLARE_ALIGNED(16, int16_t, block)[6][4][16];
|
DECLARE_ALIGNED(16, int16_t, block)[6][4][16];
|
||||||
DECLARE_ALIGNED(16, int16_t, block_dc)[16];
|
DECLARE_ALIGNED(16, int16_t, block_dc)[16];
|
||||||
@ -122,6 +132,7 @@ typedef struct VP8ThreadData {
|
|||||||
#define EDGE_EMU_LINESIZE 32
|
#define EDGE_EMU_LINESIZE 32
|
||||||
DECLARE_ALIGNED(16, uint8_t, edge_emu_buffer)[21 * EDGE_EMU_LINESIZE];
|
DECLARE_ALIGNED(16, uint8_t, edge_emu_buffer)[21 * EDGE_EMU_LINESIZE];
|
||||||
VP8FilterStrength *filter_strength;
|
VP8FilterStrength *filter_strength;
|
||||||
|
VP8mvbounds mv_bounds;
|
||||||
} VP8ThreadData;
|
} VP8ThreadData;
|
||||||
|
|
||||||
typedef struct VP8Frame {
|
typedef struct VP8Frame {
|
||||||
@ -129,11 +140,6 @@ typedef struct VP8Frame {
|
|||||||
AVBufferRef *seg_map;
|
AVBufferRef *seg_map;
|
||||||
} VP8Frame;
|
} VP8Frame;
|
||||||
|
|
||||||
typedef struct VP8intmv {
|
|
||||||
int x;
|
|
||||||
int y;
|
|
||||||
} VP8intmv;
|
|
||||||
|
|
||||||
#define MAX_THREADS 8
|
#define MAX_THREADS 8
|
||||||
typedef struct VP8Context {
|
typedef struct VP8Context {
|
||||||
VP8ThreadData *thread_data;
|
VP8ThreadData *thread_data;
|
||||||
@ -152,8 +158,7 @@ typedef struct VP8Context {
|
|||||||
uint8_t deblock_filter;
|
uint8_t deblock_filter;
|
||||||
uint8_t mbskip_enabled;
|
uint8_t mbskip_enabled;
|
||||||
uint8_t profile;
|
uint8_t profile;
|
||||||
VP8intmv mv_min;
|
VP8mvbounds mv_bounds;
|
||||||
VP8intmv mv_max;
|
|
||||||
|
|
||||||
int8_t sign_bias[4]; ///< one state [0, 1] per ref frame type
|
int8_t sign_bias[4]; ///< one state [0, 1] per ref frame type
|
||||||
int ref_count[3];
|
int ref_count[3];
|
||||||
|
Loading…
x
Reference in New Issue
Block a user