You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-04 22:03:09 +02:00
ffv1enc_vulkan: get rid of temporary data for the setup shader
This commit is contained in:
@ -88,9 +88,6 @@ typedef struct VulkanEncodeFFv1Context {
|
|||||||
AVBufferPool *out_data_pool;
|
AVBufferPool *out_data_pool;
|
||||||
AVBufferPool *pkt_data_pool;
|
AVBufferPool *pkt_data_pool;
|
||||||
|
|
||||||
/* Temporary data buffer */
|
|
||||||
AVBufferPool *tmp_data_pool;
|
|
||||||
|
|
||||||
/* Slice results buffer */
|
/* Slice results buffer */
|
||||||
AVBufferPool *results_data_pool;
|
AVBufferPool *results_data_pool;
|
||||||
|
|
||||||
@ -303,11 +300,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
|
|||||||
|
|
||||||
AVFrame *intermediate_frame = NULL;
|
AVFrame *intermediate_frame = NULL;
|
||||||
|
|
||||||
/* Temporary data */
|
|
||||||
size_t tmp_data_size;
|
|
||||||
AVBufferRef *tmp_data_ref;
|
|
||||||
FFVkBuffer *tmp_data_buf;
|
|
||||||
|
|
||||||
/* Slice data */
|
/* Slice data */
|
||||||
AVBufferRef *slice_data_ref;
|
AVBufferRef *slice_data_ref;
|
||||||
FFVkBuffer *slice_data_buf;
|
FFVkBuffer *slice_data_buf;
|
||||||
@ -352,17 +344,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
|
|||||||
|
|
||||||
f->slice_count = f->max_slice_count;
|
f->slice_count = f->max_slice_count;
|
||||||
|
|
||||||
/* Allocate temporary data buffer */
|
|
||||||
tmp_data_size = f->slice_count*CONTEXT_SIZE;
|
|
||||||
RET(ff_vk_get_pooled_buffer(&fv->s, &fv->tmp_data_pool,
|
|
||||||
&tmp_data_ref,
|
|
||||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
|
||||||
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
|
||||||
NULL, tmp_data_size,
|
|
||||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
|
|
||||||
tmp_data_buf = (FFVkBuffer *)tmp_data_ref->data;
|
|
||||||
ff_vk_exec_add_dep_buf(&fv->s, exec, &tmp_data_ref, 1, 0);
|
|
||||||
|
|
||||||
/* Allocate slice buffer data */
|
/* Allocate slice buffer data */
|
||||||
if (f->ac == AC_GOLOMB_RICE)
|
if (f->ac == AC_GOLOMB_RICE)
|
||||||
plane_state_size = 8;
|
plane_state_size = 8;
|
||||||
@ -481,7 +462,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
|
|||||||
ff_vk_exec_bind_shader(&fv->s, exec, &fv->setup);
|
ff_vk_exec_bind_shader(&fv->s, exec, &fv->setup);
|
||||||
pd = (FFv1VkParameters) {
|
pd = (FFv1VkParameters) {
|
||||||
.slice_state = slice_data_buf->address + f->slice_count*256,
|
.slice_state = slice_data_buf->address + f->slice_count*256,
|
||||||
.scratch_data = tmp_data_buf->address,
|
|
||||||
.out_data = out_data_buf->address,
|
.out_data = out_data_buf->address,
|
||||||
.bits_per_raw_sample = f->bits_per_raw_sample,
|
.bits_per_raw_sample = f->bits_per_raw_sample,
|
||||||
.sar[0] = pict->sample_aspect_ratio.num,
|
.sar[0] = pict->sample_aspect_ratio.num,
|
||||||
@ -1698,7 +1678,6 @@ static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx)
|
|||||||
|
|
||||||
av_buffer_pool_uninit(&fv->out_data_pool);
|
av_buffer_pool_uninit(&fv->out_data_pool);
|
||||||
av_buffer_pool_uninit(&fv->pkt_data_pool);
|
av_buffer_pool_uninit(&fv->pkt_data_pool);
|
||||||
av_buffer_pool_uninit(&fv->tmp_data_pool);
|
|
||||||
|
|
||||||
av_buffer_unref(&fv->keyframe_slice_data_ref);
|
av_buffer_unref(&fv->keyframe_slice_data_ref);
|
||||||
av_buffer_pool_uninit(&fv->slice_data_pool);
|
av_buffer_pool_uninit(&fv->slice_data_pool);
|
||||||
|
@ -20,6 +20,8 @@
|
|||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
uint8_t state[CONTEXT_SIZE];
|
||||||
|
|
||||||
void init_slice(out SliceContext sc, const uint slice_idx)
|
void init_slice(out SliceContext sc, const uint slice_idx)
|
||||||
{
|
{
|
||||||
/* Set coordinates */
|
/* Set coordinates */
|
||||||
@ -45,67 +47,54 @@ void init_slice(out SliceContext sc, const uint slice_idx)
|
|||||||
slice_size_max);
|
slice_size_max);
|
||||||
}
|
}
|
||||||
|
|
||||||
void put_rac_full(inout RangeCoder c, uint64_t state, bool bit)
|
void put_usymbol(inout RangeCoder c, uint v)
|
||||||
{
|
|
||||||
put_rac_norenorm(c, state, bit);
|
|
||||||
if (c.range < 0x100)
|
|
||||||
renorm_encoder_full(c);
|
|
||||||
}
|
|
||||||
|
|
||||||
void put_symbol_unsigned(inout RangeCoder c, uint64_t state, uint v)
|
|
||||||
{
|
{
|
||||||
bool is_nil = (v == 0);
|
bool is_nil = (v == 0);
|
||||||
put_rac_full(c, state, is_nil);
|
put_rac(c, state[0], is_nil);
|
||||||
if (is_nil)
|
if (is_nil)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
const int e = findMSB(v);
|
const int e = findMSB(v);
|
||||||
|
|
||||||
state += 1;
|
|
||||||
for (int i = 0; i < e; i++)
|
for (int i = 0; i < e; i++)
|
||||||
put_rac_full(c, state + min(i, 9), true);
|
put_rac(c, state[1 + min(i, 9)], true);
|
||||||
put_rac_full(c, state + min(e, 9), false);
|
put_rac(c, state[1 + min(e, 9)], false);
|
||||||
|
|
||||||
state += 21;
|
|
||||||
for (int i = e - 1; i >= 0; i--)
|
for (int i = e - 1; i >= 0; i--)
|
||||||
put_rac_full(c, state + min(i, 9), bool(bitfieldExtract(v, i, 1)));
|
put_rac(c, state[22 + min(i, 9)], bool(bitfieldExtract(v, i, 1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
void write_slice_header(inout SliceContext sc, uint64_t state)
|
void write_slice_header(inout SliceContext sc)
|
||||||
{
|
{
|
||||||
u8buf sb = u8buf(state);
|
|
||||||
|
|
||||||
[[unroll]]
|
[[unroll]]
|
||||||
for (int i = 0; i < CONTEXT_SIZE; i++)
|
for (int i = 0; i < CONTEXT_SIZE; i++)
|
||||||
sb[i].v = uint8_t(128);
|
state[i] = uint8_t(128);
|
||||||
|
|
||||||
put_symbol_unsigned(sc.c, state, gl_WorkGroupID.x);
|
put_usymbol(sc.c, gl_WorkGroupID.x);
|
||||||
put_symbol_unsigned(sc.c, state, gl_WorkGroupID.y);
|
put_usymbol(sc.c, gl_WorkGroupID.y);
|
||||||
put_symbol_unsigned(sc.c, state, 0);
|
put_usymbol(sc.c, 0);
|
||||||
put_symbol_unsigned(sc.c, state, 0);
|
put_usymbol(sc.c, 0);
|
||||||
|
|
||||||
for (int i = 0; i < codec_planes; i++)
|
for (int i = 0; i < codec_planes; i++)
|
||||||
put_symbol_unsigned(sc.c, state, sc.quant_table_idx[i]);
|
put_usymbol(sc.c, sc.quant_table_idx[i]);
|
||||||
|
|
||||||
put_symbol_unsigned(sc.c, state, pic_mode);
|
put_usymbol(sc.c, pic_mode);
|
||||||
put_symbol_unsigned(sc.c, state, sar.x);
|
put_usymbol(sc.c, sar.x);
|
||||||
put_symbol_unsigned(sc.c, state, sar.y);
|
put_usymbol(sc.c, sar.y);
|
||||||
|
|
||||||
if (version >= 4) {
|
if (version >= 4) {
|
||||||
put_rac_full(sc.c, state, sc.slice_reset_contexts);
|
put_rac(sc.c, state[0], sc.slice_reset_contexts);
|
||||||
put_symbol_unsigned(sc.c, state, sc.slice_coding_mode);
|
put_usymbol(sc.c, sc.slice_coding_mode);
|
||||||
if (sc.slice_coding_mode != 1 && colorspace == 1) {
|
if (sc.slice_coding_mode != 1 && colorspace == 1) {
|
||||||
put_symbol_unsigned(sc.c, state, sc.slice_rct_coef.y);
|
put_usymbol(sc.c, sc.slice_rct_coef.y);
|
||||||
put_symbol_unsigned(sc.c, state, sc.slice_rct_coef.x);
|
put_usymbol(sc.c, sc.slice_rct_coef.x);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void write_frame_header(inout SliceContext sc, uint64_t state)
|
void write_frame_header(inout SliceContext sc)
|
||||||
{
|
{
|
||||||
u8buf sb = u8buf(state);
|
put_rac_equi(sc.c, bool(key_frame));
|
||||||
sb.v = uint8_t(128);
|
|
||||||
put_rac_full(sc.c, state, bool(key_frame));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef GOLOMB
|
#ifdef GOLOMB
|
||||||
@ -122,16 +111,12 @@ void main(void)
|
|||||||
{
|
{
|
||||||
const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
|
const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
|
||||||
|
|
||||||
/* Write slice data */
|
|
||||||
uint64_t scratch_state = uint64_t(scratch_data) + slice_idx*CONTEXT_SIZE;
|
|
||||||
u8buf sb = u8buf(scratch_state);
|
|
||||||
|
|
||||||
init_slice(slice_ctx[slice_idx], slice_idx);
|
init_slice(slice_ctx[slice_idx], slice_idx);
|
||||||
|
|
||||||
if (slice_idx == 0)
|
if (slice_idx == 0)
|
||||||
write_frame_header(slice_ctx[slice_idx], scratch_state);
|
write_frame_header(slice_ctx[slice_idx]);
|
||||||
|
|
||||||
write_slice_header(slice_ctx[slice_idx], scratch_state);
|
write_slice_header(slice_ctx[slice_idx]);
|
||||||
|
|
||||||
#ifdef GOLOMB
|
#ifdef GOLOMB
|
||||||
init_golomb(slice_ctx[slice_idx]);
|
init_golomb(slice_ctx[slice_idx]);
|
||||||
|
@ -91,15 +91,13 @@ void renorm_encoder(inout RangeCoder c)
|
|||||||
bs[i].v = fill;
|
bs[i].v = fill;
|
||||||
}
|
}
|
||||||
|
|
||||||
void put_rac_norenorm(inout RangeCoder c, uint64_t state, bool bit)
|
void put_rac_direct(inout RangeCoder c, uint8_t state, bool bit)
|
||||||
{
|
{
|
||||||
u8buf sb = u8buf(state);
|
int range1 = uint16_t((c.range * state) >> 8);
|
||||||
uint val = uint(sb.v);
|
|
||||||
int range1 = uint16_t((c.range * val) >> 8);
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
if (val == 0)
|
if (state == 0)
|
||||||
debugPrintfEXT("Error: state is zero (addr: 0x%lx)", uint64_t(sb));
|
debugPrintfEXT("Error: state is zero");
|
||||||
if (range1 >= c.range)
|
if (range1 >= c.range)
|
||||||
debugPrintfEXT("Error: range1 >= c.range");
|
debugPrintfEXT("Error: range1 >= c.range");
|
||||||
if (range1 <= 0)
|
if (range1 <= 0)
|
||||||
@ -113,13 +111,21 @@ void put_rac_norenorm(inout RangeCoder c, uint64_t state, bool bit)
|
|||||||
} else {
|
} else {
|
||||||
c.range = diff;
|
c.range = diff;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
sb.v = zero_one_state[(uint(bit) << 8) + val];
|
void put_rac_norenorm(inout RangeCoder c, uint64_t state, bool bit)
|
||||||
|
{
|
||||||
|
put_rac_direct(c, u8buf(state).v, bit);
|
||||||
|
|
||||||
#ifdef DEBUG
|
u8buf(state).v = zero_one_state[(uint(bit) << 8) + u8buf(state).v];
|
||||||
if (sb.v == 0)
|
}
|
||||||
debugPrintfEXT("Error: inserted zero state from tab %i idx %i", bit, val);
|
|
||||||
#endif
|
void put_rac(inout RangeCoder c, inout uint8_t state, bool bit)
|
||||||
|
{
|
||||||
|
put_rac_direct(c, state, bit);
|
||||||
|
if (c.range < 0x100)
|
||||||
|
renorm_encoder_full(c);
|
||||||
|
state = zero_one_state[(uint(bit) << 8) + state];
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Equiprobable bit */
|
/* Equiprobable bit */
|
||||||
|
Reference in New Issue
Block a user