1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-08-04 22:03:09 +02:00

ffv1/vulkan: redo context count tracking and quant_table_idx management

This commit also makes it possible for the encoder to choose a different
quantization table on a per-slice basis, as well as adding this capability
to the decoder.

Also, this commit fully fixes decoding of context=1 encoded files.
This commit is contained in:
Lynne
2025-04-05 05:04:29 +00:00
parent 66b8c92df2
commit 77f777d925
8 changed files with 31 additions and 34 deletions

View File

@ -49,9 +49,9 @@ typedef struct FFv1VkRCTParameters {
} FFv1VkRCTParameters;
typedef struct FFv1VkResetParameters {
uint32_t context_count[MAX_QUANT_TABLES];
VkDeviceAddress slice_state;
uint32_t plane_state_size;
uint32_t context_count;
uint8_t codec_planes;
uint8_t key_frame;
uint8_t version;

View File

@ -542,10 +542,12 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
pd_reset = (FFv1VkResetParameters) {
.slice_state = slice_data_buf->address + f->slice_count*256,
.plane_state_size = plane_state_size,
.context_count = context_count,
.codec_planes = f->plane_count,
.key_frame = f->key_frame,
};
for (int i = 0; i < f->quant_table_count; i++)
pd_reset.context_count[i] = f->context_count[i];
ff_vk_shader_update_push_const(&fv->s, exec, &fv->reset,
VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd_reset), &pd_reset);
@ -1071,9 +1073,9 @@ static int init_reset_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
GLSLD(ff_source_common_comp);
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
GLSLF(1, uint context_count[%i]; ,MAX_QUANT_TABLES);
GLSLC(1, u8buf slice_state; );
GLSLC(1, uint plane_state_size; );
GLSLC(1, uint context_count; );
GLSLC(1, uint8_t codec_planes; );
GLSLC(1, uint8_t key_frame; );
GLSLC(1, uint8_t version; );

View File

@ -32,8 +32,7 @@ struct SliceContext {
ivec2 slice_dim;
ivec2 slice_pos;
ivec2 slice_rct_coef;
u8vec4 quant_table_idx;
uint context_count;
u8vec3 quant_table_idx;
uint hdr_len; // only used for golomb

View File

@ -51,8 +51,8 @@ ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx)
(quant_table[quant_table_idx][4][127] != 0)) {
TYPE cur2 = TYPE(0);
if (off.x > 0) {
const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
cur2 = TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(-2, 0) + yoff_border2))[0]);
const ivec2 yoff_border2 = off.x == 1 ? ivec2(-1, -1) : ivec2(-2, 0);
cur2 = TYPE(imageLoad(dec[p], sp + LADDR(off + yoff_border2))[0]);
}
base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK];
@ -156,7 +156,7 @@ void decode_line_pcm(inout SliceContext sc, ivec2 sp, int w, int y, int p, int b
void decode_line(inout SliceContext sc, ivec2 sp, int w,
int y, int p, int bits, uint64_t state,
const int run_index)
uint8_t quant_table_idx, const int run_index)
{
#ifndef RGB
if (p > 0 && p < 3) {
@ -167,7 +167,7 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w,
for (int x = 0; x < w; x++) {
ivec2 pr = get_pred(sp, ivec2(x, y), p, w,
sc.quant_table_idx[p]);
quant_table_idx);
int diff = get_isymbol(sc.c, state + CONTEXT_SIZE*abs(pr[0]));
if (pr[0] < 0)
@ -182,7 +182,7 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w,
void decode_line(inout SliceContext sc, ivec2 sp, int w,
int y, int p, int bits, uint64_t state,
inout int run_index)
uint8_t quant_table_idx, inout int run_index)
{
#ifndef RGB
if (p > 0 && p < 3) {
@ -198,7 +198,7 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w,
ivec2 pos = sp + ivec2(x, y);
int diff;
ivec2 pr = get_pred(sp, ivec2(x, y), p, w,
sc.quant_table_idx[p]);
quant_table_idx);
VlcState sb = VlcState(state + VLC_STATE_SIZE*abs(pr[0]));
@ -325,6 +325,7 @@ void decode_slice(inout SliceContext sc, const uint slice_idx)
/* Arithmetic coding */
#endif
{
u8vec4 quant_table_idx = sc.quant_table_idx.xyyz;
u64vec4 slice_state_off = (uint64_t(slice_state) +
slice_idx*plane_state_size*codec_planes) +
plane_state_size*uvec4(0, 1, 1, 2);
@ -337,13 +338,13 @@ void decode_slice(inout SliceContext sc, const uint slice_idx)
for (int y = 0; y < h; y++)
decode_line(sc, sp, w, y, p, bits,
slice_state_off[p], run_index);
slice_state_off[p], quant_table_idx[p], run_index);
}
#else
for (int y = 0; y < sc.slice_dim.y; y++) {
for (int p = 0; p < color_planes; p++)
decode_line(sc, sp, w, y, p, bits,
slice_state_off[p], run_index);
slice_state_off[p], quant_table_idx[p], run_index);
writeout_rgb(sc, sp, w, y, true);
}

View File

@ -76,7 +76,6 @@ bool decode_slice_header(inout SliceContext sc, uint64_t state)
if (idx >= quant_table_count)
return true;
sc.quant_table_idx[i] = uint8_t(idx);
sc.context_count = context_count[idx];
}
get_usymbol(sc.c, state);

View File

@ -38,6 +38,7 @@ void init_slice(out SliceContext sc, const uint slice_idx)
sc.slice_rct_coef = ivec2(1, 1);
sc.slice_coding_mode = int(force_pcm == 1);
sc.slice_reset_contexts = sc.slice_coding_mode == 1;
sc.quant_table_idx = u8vec3(context_model);
rac_init(sc.c,
OFFBUF(u8buf, out_data, slice_idx * slice_size_max),
@ -84,7 +85,7 @@ void write_slice_header(inout SliceContext sc, uint64_t state)
put_symbol_unsigned(sc.c, state, 0);
for (int i = 0; i < codec_planes; i++)
put_symbol_unsigned(sc.c, state, context_model);
put_symbol_unsigned(sc.c, state, sc.quant_table_idx[i]);
put_symbol_unsigned(sc.c, state, pic_mode);
put_symbol_unsigned(sc.c, state, sar.x);

View File

@ -28,14 +28,15 @@ void main(void)
slice_ctx[slice_idx].slice_reset_contexts == false)
return;
const uint8_t qidx = slice_ctx[slice_idx].quant_table_idx[gl_WorkGroupID.z];
uint contexts = context_count[qidx];
uint64_t slice_state_off = uint64_t(slice_state) +
slice_idx*plane_state_size*codec_planes;
#ifdef GOLOMB
uint64_t start = slice_state_off +
(gl_WorkGroupID.z*context_count +
gl_LocalInvocationID.x)*VLC_STATE_SIZE;
for (uint x = gl_LocalInvocationID.x; x < context_count; x += gl_WorkGroupSize.x) {
(gl_WorkGroupID.z*(plane_state_size/VLC_STATE_SIZE) + gl_LocalInvocationID.x)*VLC_STATE_SIZE;
for (uint x = gl_LocalInvocationID.x; x < contexts; x += gl_WorkGroupSize.x) {
VlcState sb = VlcState(start);
sb.drift = int16_t(0);
sb.error_sum = uint16_t(4);
@ -45,9 +46,9 @@ void main(void)
}
#else
uint64_t start = slice_state_off +
(gl_WorkGroupID.z*context_count)*CONTEXT_SIZE +
gl_WorkGroupID.z*plane_state_size +
(gl_LocalInvocationID.x << 2 /* dwords */); /* Bytes */
uint count_total = context_count*(CONTEXT_SIZE /* bytes */ >> 2 /* dwords */);
uint count_total = contexts*(CONTEXT_SIZE /* bytes */ >> 2 /* dwords */);
for (uint x = gl_LocalInvocationID.x; x < count_total; x += gl_WorkGroupSize.x) {
u32buf(start).v = 0x80808080;
start += gl_WorkGroupSize.x*(CONTEXT_SIZE >> 3 /* 1/8th of context */);

View File

@ -49,7 +49,6 @@ typedef struct FFv1VulkanDecodePicture {
uint32_t plane_state_size;
uint32_t slice_state_size;
uint32_t slice_data_size;
uint32_t max_context_count;
AVBufferRef *slice_offset_buf;
uint32_t *slice_offset;
@ -77,8 +76,6 @@ typedef struct FFv1VulkanDecodeContext {
} FFv1VulkanDecodeContext;
typedef struct FFv1VkParameters {
uint32_t context_count[MAX_QUANT_TABLES];
VkDeviceAddress slice_data;
VkDeviceAddress slice_state;
VkDeviceAddress scratch_data;
@ -111,8 +108,6 @@ typedef struct FFv1VkParameters {
static void add_push_data(FFVulkanShader *shd)
{
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
GLSLF(1, uint context_count[%i]; ,MAX_QUANT_TABLES);
GLSLC(0, );
GLSLC(1, u8buf slice_data; );
GLSLC(1, u8buf slice_state; );
GLSLC(1, u8buf scratch_data; );
@ -162,13 +157,15 @@ static int vk_ffv1_start_frame(AVCodecContext *avctx,
AVHWFramesContext *hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
enum AVPixelFormat sw_format = hwfc->sw_format;
int max_contexts;
int is_rgb = !(f->colorspace == 0 && sw_format != AV_PIX_FMT_YA8) &&
!(sw_format == AV_PIX_FMT_YA8);
fp->slice_num = 0;
max_contexts = 0;
for (int i = 0; i < f->quant_table_count; i++)
fp->max_context_count = FFMAX(f->context_count[i], fp->max_context_count);
max_contexts = FFMAX(f->context_count[i], max_contexts);
/* Allocate slice buffer data */
if (f->ac == AC_GOLOMB_RICE)
@ -176,7 +173,7 @@ static int vk_ffv1_start_frame(AVCodecContext *avctx,
else
fp->plane_state_size = CONTEXT_SIZE;
fp->plane_state_size *= fp->max_context_count;
fp->plane_state_size *= max_contexts;
fp->slice_state_size = fp->plane_state_size*f->plane_count;
fp->slice_data_size = 256; /* Overestimation for the SliceContext struct */
@ -430,8 +427,6 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
ff_vk_exec_bind_shader(&ctx->s, exec, &fv->setup);
pd = (FFv1VkParameters) {
/* context_count */
.slice_data = slices_buf->address,
.slice_state = slice_state->address + f->slice_count*fp->slice_data_size,
.scratch_data = tmp_data->address,
@ -471,9 +466,6 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
else
ff_vk_set_perm(sw_format, pd.fmt_lut, 0);
for (int i = 0; i < MAX_QUANT_TABLES; i++)
pd.context_count[i] = f->context_count[i];
ff_vk_shader_update_push_const(&ctx->s, exec, &fv->setup,
VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd), &pd);
@ -505,12 +497,14 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
pd_reset = (FFv1VkResetParameters) {
.slice_state = slice_state->address + f->slice_count*fp->slice_data_size,
.plane_state_size = fp->plane_state_size,
.context_count = fp->max_context_count,
.codec_planes = f->plane_count,
.key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY,
.version = f->version,
.micro_version = f->micro_version,
};
for (int i = 0; i < f->quant_table_count; i++)
pd_reset.context_count[i] = f->context_count[i];
ff_vk_shader_update_push_const(&ctx->s, exec, reset_shader,
VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd_reset), &pd_reset);
@ -763,9 +757,9 @@ static int init_reset_shader(FFV1Context *f, FFVulkanContext *s,
GLSLD(ff_source_common_comp);
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
GLSLF(1, uint context_count[%i]; ,MAX_QUANT_TABLES);
GLSLC(1, u8buf slice_state; );
GLSLC(1, uint plane_state_size; );
GLSLC(1, uint context_count; );
GLSLC(1, uint8_t codec_planes; );
GLSLC(1, uint8_t key_frame; );
GLSLC(1, uint8_t version; );