You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-04 22:03:09 +02:00
ffv1/vulkan: redo context count tracking and quant_table_idx management
This commit also makes it possible for the encoder to choose a different quantization table on a per-slice basis, as well as adding this capability to the decoder. Also, this commit fully fixes decoding of context=1 encoded files.
This commit is contained in:
@ -49,9 +49,9 @@ typedef struct FFv1VkRCTParameters {
|
||||
} FFv1VkRCTParameters;
|
||||
|
||||
typedef struct FFv1VkResetParameters {
|
||||
uint32_t context_count[MAX_QUANT_TABLES];
|
||||
VkDeviceAddress slice_state;
|
||||
uint32_t plane_state_size;
|
||||
uint32_t context_count;
|
||||
uint8_t codec_planes;
|
||||
uint8_t key_frame;
|
||||
uint8_t version;
|
||||
|
@ -542,10 +542,12 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
|
||||
pd_reset = (FFv1VkResetParameters) {
|
||||
.slice_state = slice_data_buf->address + f->slice_count*256,
|
||||
.plane_state_size = plane_state_size,
|
||||
.context_count = context_count,
|
||||
.codec_planes = f->plane_count,
|
||||
.key_frame = f->key_frame,
|
||||
};
|
||||
for (int i = 0; i < f->quant_table_count; i++)
|
||||
pd_reset.context_count[i] = f->context_count[i];
|
||||
|
||||
ff_vk_shader_update_push_const(&fv->s, exec, &fv->reset,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
0, sizeof(pd_reset), &pd_reset);
|
||||
@ -1071,9 +1073,9 @@ static int init_reset_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
|
||||
GLSLD(ff_source_common_comp);
|
||||
|
||||
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
|
||||
GLSLF(1, uint context_count[%i]; ,MAX_QUANT_TABLES);
|
||||
GLSLC(1, u8buf slice_state; );
|
||||
GLSLC(1, uint plane_state_size; );
|
||||
GLSLC(1, uint context_count; );
|
||||
GLSLC(1, uint8_t codec_planes; );
|
||||
GLSLC(1, uint8_t key_frame; );
|
||||
GLSLC(1, uint8_t version; );
|
||||
|
@ -32,8 +32,7 @@ struct SliceContext {
|
||||
ivec2 slice_dim;
|
||||
ivec2 slice_pos;
|
||||
ivec2 slice_rct_coef;
|
||||
u8vec4 quant_table_idx;
|
||||
uint context_count;
|
||||
u8vec3 quant_table_idx;
|
||||
|
||||
uint hdr_len; // only used for golomb
|
||||
|
||||
|
@ -51,8 +51,8 @@ ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx)
|
||||
(quant_table[quant_table_idx][4][127] != 0)) {
|
||||
TYPE cur2 = TYPE(0);
|
||||
if (off.x > 0) {
|
||||
const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
|
||||
cur2 = TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(-2, 0) + yoff_border2))[0]);
|
||||
const ivec2 yoff_border2 = off.x == 1 ? ivec2(-1, -1) : ivec2(-2, 0);
|
||||
cur2 = TYPE(imageLoad(dec[p], sp + LADDR(off + yoff_border2))[0]);
|
||||
}
|
||||
base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK];
|
||||
|
||||
@ -156,7 +156,7 @@ void decode_line_pcm(inout SliceContext sc, ivec2 sp, int w, int y, int p, int b
|
||||
|
||||
void decode_line(inout SliceContext sc, ivec2 sp, int w,
|
||||
int y, int p, int bits, uint64_t state,
|
||||
const int run_index)
|
||||
uint8_t quant_table_idx, const int run_index)
|
||||
{
|
||||
#ifndef RGB
|
||||
if (p > 0 && p < 3) {
|
||||
@ -167,7 +167,7 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w,
|
||||
|
||||
for (int x = 0; x < w; x++) {
|
||||
ivec2 pr = get_pred(sp, ivec2(x, y), p, w,
|
||||
sc.quant_table_idx[p]);
|
||||
quant_table_idx);
|
||||
|
||||
int diff = get_isymbol(sc.c, state + CONTEXT_SIZE*abs(pr[0]));
|
||||
if (pr[0] < 0)
|
||||
@ -182,7 +182,7 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w,
|
||||
|
||||
void decode_line(inout SliceContext sc, ivec2 sp, int w,
|
||||
int y, int p, int bits, uint64_t state,
|
||||
inout int run_index)
|
||||
uint8_t quant_table_idx, inout int run_index)
|
||||
{
|
||||
#ifndef RGB
|
||||
if (p > 0 && p < 3) {
|
||||
@ -198,7 +198,7 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w,
|
||||
ivec2 pos = sp + ivec2(x, y);
|
||||
int diff;
|
||||
ivec2 pr = get_pred(sp, ivec2(x, y), p, w,
|
||||
sc.quant_table_idx[p]);
|
||||
quant_table_idx);
|
||||
|
||||
VlcState sb = VlcState(state + VLC_STATE_SIZE*abs(pr[0]));
|
||||
|
||||
@ -325,6 +325,7 @@ void decode_slice(inout SliceContext sc, const uint slice_idx)
|
||||
/* Arithmetic coding */
|
||||
#endif
|
||||
{
|
||||
u8vec4 quant_table_idx = sc.quant_table_idx.xyyz;
|
||||
u64vec4 slice_state_off = (uint64_t(slice_state) +
|
||||
slice_idx*plane_state_size*codec_planes) +
|
||||
plane_state_size*uvec4(0, 1, 1, 2);
|
||||
@ -337,13 +338,13 @@ void decode_slice(inout SliceContext sc, const uint slice_idx)
|
||||
|
||||
for (int y = 0; y < h; y++)
|
||||
decode_line(sc, sp, w, y, p, bits,
|
||||
slice_state_off[p], run_index);
|
||||
slice_state_off[p], quant_table_idx[p], run_index);
|
||||
}
|
||||
#else
|
||||
for (int y = 0; y < sc.slice_dim.y; y++) {
|
||||
for (int p = 0; p < color_planes; p++)
|
||||
decode_line(sc, sp, w, y, p, bits,
|
||||
slice_state_off[p], run_index);
|
||||
slice_state_off[p], quant_table_idx[p], run_index);
|
||||
|
||||
writeout_rgb(sc, sp, w, y, true);
|
||||
}
|
||||
|
@ -76,7 +76,6 @@ bool decode_slice_header(inout SliceContext sc, uint64_t state)
|
||||
if (idx >= quant_table_count)
|
||||
return true;
|
||||
sc.quant_table_idx[i] = uint8_t(idx);
|
||||
sc.context_count = context_count[idx];
|
||||
}
|
||||
|
||||
get_usymbol(sc.c, state);
|
||||
|
@ -38,6 +38,7 @@ void init_slice(out SliceContext sc, const uint slice_idx)
|
||||
sc.slice_rct_coef = ivec2(1, 1);
|
||||
sc.slice_coding_mode = int(force_pcm == 1);
|
||||
sc.slice_reset_contexts = sc.slice_coding_mode == 1;
|
||||
sc.quant_table_idx = u8vec3(context_model);
|
||||
|
||||
rac_init(sc.c,
|
||||
OFFBUF(u8buf, out_data, slice_idx * slice_size_max),
|
||||
@ -84,7 +85,7 @@ void write_slice_header(inout SliceContext sc, uint64_t state)
|
||||
put_symbol_unsigned(sc.c, state, 0);
|
||||
|
||||
for (int i = 0; i < codec_planes; i++)
|
||||
put_symbol_unsigned(sc.c, state, context_model);
|
||||
put_symbol_unsigned(sc.c, state, sc.quant_table_idx[i]);
|
||||
|
||||
put_symbol_unsigned(sc.c, state, pic_mode);
|
||||
put_symbol_unsigned(sc.c, state, sar.x);
|
||||
|
@ -28,14 +28,15 @@ void main(void)
|
||||
slice_ctx[slice_idx].slice_reset_contexts == false)
|
||||
return;
|
||||
|
||||
const uint8_t qidx = slice_ctx[slice_idx].quant_table_idx[gl_WorkGroupID.z];
|
||||
uint contexts = context_count[qidx];
|
||||
uint64_t slice_state_off = uint64_t(slice_state) +
|
||||
slice_idx*plane_state_size*codec_planes;
|
||||
|
||||
#ifdef GOLOMB
|
||||
uint64_t start = slice_state_off +
|
||||
(gl_WorkGroupID.z*context_count +
|
||||
gl_LocalInvocationID.x)*VLC_STATE_SIZE;
|
||||
for (uint x = gl_LocalInvocationID.x; x < context_count; x += gl_WorkGroupSize.x) {
|
||||
(gl_WorkGroupID.z*(plane_state_size/VLC_STATE_SIZE) + gl_LocalInvocationID.x)*VLC_STATE_SIZE;
|
||||
for (uint x = gl_LocalInvocationID.x; x < contexts; x += gl_WorkGroupSize.x) {
|
||||
VlcState sb = VlcState(start);
|
||||
sb.drift = int16_t(0);
|
||||
sb.error_sum = uint16_t(4);
|
||||
@ -45,9 +46,9 @@ void main(void)
|
||||
}
|
||||
#else
|
||||
uint64_t start = slice_state_off +
|
||||
(gl_WorkGroupID.z*context_count)*CONTEXT_SIZE +
|
||||
gl_WorkGroupID.z*plane_state_size +
|
||||
(gl_LocalInvocationID.x << 2 /* dwords */); /* Bytes */
|
||||
uint count_total = context_count*(CONTEXT_SIZE /* bytes */ >> 2 /* dwords */);
|
||||
uint count_total = contexts*(CONTEXT_SIZE /* bytes */ >> 2 /* dwords */);
|
||||
for (uint x = gl_LocalInvocationID.x; x < count_total; x += gl_WorkGroupSize.x) {
|
||||
u32buf(start).v = 0x80808080;
|
||||
start += gl_WorkGroupSize.x*(CONTEXT_SIZE >> 3 /* 1/8th of context */);
|
||||
|
@ -49,7 +49,6 @@ typedef struct FFv1VulkanDecodePicture {
|
||||
uint32_t plane_state_size;
|
||||
uint32_t slice_state_size;
|
||||
uint32_t slice_data_size;
|
||||
uint32_t max_context_count;
|
||||
|
||||
AVBufferRef *slice_offset_buf;
|
||||
uint32_t *slice_offset;
|
||||
@ -77,8 +76,6 @@ typedef struct FFv1VulkanDecodeContext {
|
||||
} FFv1VulkanDecodeContext;
|
||||
|
||||
typedef struct FFv1VkParameters {
|
||||
uint32_t context_count[MAX_QUANT_TABLES];
|
||||
|
||||
VkDeviceAddress slice_data;
|
||||
VkDeviceAddress slice_state;
|
||||
VkDeviceAddress scratch_data;
|
||||
@ -111,8 +108,6 @@ typedef struct FFv1VkParameters {
|
||||
static void add_push_data(FFVulkanShader *shd)
|
||||
{
|
||||
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
|
||||
GLSLF(1, uint context_count[%i]; ,MAX_QUANT_TABLES);
|
||||
GLSLC(0, );
|
||||
GLSLC(1, u8buf slice_data; );
|
||||
GLSLC(1, u8buf slice_state; );
|
||||
GLSLC(1, u8buf scratch_data; );
|
||||
@ -162,13 +157,15 @@ static int vk_ffv1_start_frame(AVCodecContext *avctx,
|
||||
AVHWFramesContext *hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
|
||||
enum AVPixelFormat sw_format = hwfc->sw_format;
|
||||
|
||||
int max_contexts;
|
||||
int is_rgb = !(f->colorspace == 0 && sw_format != AV_PIX_FMT_YA8) &&
|
||||
!(sw_format == AV_PIX_FMT_YA8);
|
||||
|
||||
fp->slice_num = 0;
|
||||
|
||||
max_contexts = 0;
|
||||
for (int i = 0; i < f->quant_table_count; i++)
|
||||
fp->max_context_count = FFMAX(f->context_count[i], fp->max_context_count);
|
||||
max_contexts = FFMAX(f->context_count[i], max_contexts);
|
||||
|
||||
/* Allocate slice buffer data */
|
||||
if (f->ac == AC_GOLOMB_RICE)
|
||||
@ -176,7 +173,7 @@ static int vk_ffv1_start_frame(AVCodecContext *avctx,
|
||||
else
|
||||
fp->plane_state_size = CONTEXT_SIZE;
|
||||
|
||||
fp->plane_state_size *= fp->max_context_count;
|
||||
fp->plane_state_size *= max_contexts;
|
||||
fp->slice_state_size = fp->plane_state_size*f->plane_count;
|
||||
|
||||
fp->slice_data_size = 256; /* Overestimation for the SliceContext struct */
|
||||
@ -430,8 +427,6 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
|
||||
|
||||
ff_vk_exec_bind_shader(&ctx->s, exec, &fv->setup);
|
||||
pd = (FFv1VkParameters) {
|
||||
/* context_count */
|
||||
|
||||
.slice_data = slices_buf->address,
|
||||
.slice_state = slice_state->address + f->slice_count*fp->slice_data_size,
|
||||
.scratch_data = tmp_data->address,
|
||||
@ -471,9 +466,6 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
|
||||
else
|
||||
ff_vk_set_perm(sw_format, pd.fmt_lut, 0);
|
||||
|
||||
for (int i = 0; i < MAX_QUANT_TABLES; i++)
|
||||
pd.context_count[i] = f->context_count[i];
|
||||
|
||||
ff_vk_shader_update_push_const(&ctx->s, exec, &fv->setup,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
0, sizeof(pd), &pd);
|
||||
@ -505,12 +497,14 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
|
||||
pd_reset = (FFv1VkResetParameters) {
|
||||
.slice_state = slice_state->address + f->slice_count*fp->slice_data_size,
|
||||
.plane_state_size = fp->plane_state_size,
|
||||
.context_count = fp->max_context_count,
|
||||
.codec_planes = f->plane_count,
|
||||
.key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY,
|
||||
.version = f->version,
|
||||
.micro_version = f->micro_version,
|
||||
};
|
||||
for (int i = 0; i < f->quant_table_count; i++)
|
||||
pd_reset.context_count[i] = f->context_count[i];
|
||||
|
||||
ff_vk_shader_update_push_const(&ctx->s, exec, reset_shader,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
0, sizeof(pd_reset), &pd_reset);
|
||||
@ -763,9 +757,9 @@ static int init_reset_shader(FFV1Context *f, FFVulkanContext *s,
|
||||
GLSLD(ff_source_common_comp);
|
||||
|
||||
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
|
||||
GLSLF(1, uint context_count[%i]; ,MAX_QUANT_TABLES);
|
||||
GLSLC(1, u8buf slice_state; );
|
||||
GLSLC(1, uint plane_state_size; );
|
||||
GLSLC(1, uint context_count; );
|
||||
GLSLC(1, uint8_t codec_planes; );
|
||||
GLSLC(1, uint8_t key_frame; );
|
||||
GLSLC(1, uint8_t version; );
|
||||
|
Reference in New Issue
Block a user