You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-04 22:03:09 +02:00
vulkan_ffv1: remove need for scratch data during setup
This saves on some VRAM, but mainly allows for a more unified path.
This commit is contained in:
@ -20,13 +20,15 @@
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
uint get_usymbol(inout RangeCoder c, uint64_t state)
|
||||
uint8_t setup_state[CONTEXT_SIZE];
|
||||
|
||||
uint get_usymbol(inout RangeCoder c)
|
||||
{
|
||||
if (get_rac(c, state + 0))
|
||||
if (get_rac_direct(c, setup_state[0]))
|
||||
return 0;
|
||||
|
||||
int e = 0;
|
||||
while (get_rac(c, state + 1 + min(e, 9))) { // 1..10
|
||||
while (get_rac_direct(c, setup_state[1 + min(e, 9)])) { // 1..10
|
||||
e++;
|
||||
if (e > 31) {
|
||||
corrupt = true;
|
||||
@ -35,24 +37,24 @@ uint get_usymbol(inout RangeCoder c, uint64_t state)
|
||||
}
|
||||
|
||||
uint a = 1;
|
||||
for (int i = e - 1; i >= 0; i--)
|
||||
a += a + uint(get_rac(c, state + 22 + min(i, 9))); // 22..31
|
||||
for (int i = e - 1; i >= 0; i--) {
|
||||
a <<= 1;
|
||||
a |= uint(get_rac_direct(c, setup_state[22 + min(i, 9)])); // 22..31
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
bool decode_slice_header(inout SliceContext sc, uint64_t state)
|
||||
bool decode_slice_header(inout SliceContext sc)
|
||||
{
|
||||
u8buf sb = u8buf(state);
|
||||
|
||||
[[unroll]]
|
||||
for (int i = 0; i < CONTEXT_SIZE; i++)
|
||||
sb[i].v = uint8_t(128);
|
||||
setup_state[i] = uint8_t(128);
|
||||
|
||||
uint sx = get_usymbol(sc.c, state);
|
||||
uint sy = get_usymbol(sc.c, state);
|
||||
uint sw = get_usymbol(sc.c, state) + 1;
|
||||
uint sh = get_usymbol(sc.c, state) + 1;
|
||||
uint sx = get_usymbol(sc.c);
|
||||
uint sy = get_usymbol(sc.c);
|
||||
uint sw = get_usymbol(sc.c) + 1;
|
||||
uint sh = get_usymbol(sc.c) + 1;
|
||||
|
||||
if (sx < 0 || sy < 0 || sw <= 0 || sh <= 0 ||
|
||||
sx > (gl_NumWorkGroups.x - sw) || sy > (gl_NumWorkGroups.y - sh) ||
|
||||
@ -72,22 +74,22 @@ bool decode_slice_header(inout SliceContext sc, uint64_t state)
|
||||
sc.slice_coding_mode = int(0);
|
||||
|
||||
for (uint i = 0; i < codec_planes; i++) {
|
||||
uint idx = get_usymbol(sc.c, state);
|
||||
uint idx = get_usymbol(sc.c);
|
||||
if (idx >= quant_table_count)
|
||||
return true;
|
||||
sc.quant_table_idx[i] = uint8_t(idx);
|
||||
}
|
||||
|
||||
get_usymbol(sc.c, state);
|
||||
get_usymbol(sc.c, state);
|
||||
get_usymbol(sc.c, state);
|
||||
get_usymbol(sc.c);
|
||||
get_usymbol(sc.c);
|
||||
get_usymbol(sc.c);
|
||||
|
||||
if (version >= 4) {
|
||||
sc.slice_reset_contexts = get_rac(sc.c, state);
|
||||
sc.slice_coding_mode = get_usymbol(sc.c, state);
|
||||
sc.slice_reset_contexts = get_rac_direct(sc.c, setup_state[0]);
|
||||
sc.slice_coding_mode = get_usymbol(sc.c);
|
||||
if (sc.slice_coding_mode != 1 && colorspace == 1) {
|
||||
sc.slice_rct_coef.x = int(get_usymbol(sc.c, state));
|
||||
sc.slice_rct_coef.y = int(get_usymbol(sc.c, state));
|
||||
sc.slice_rct_coef.x = int(get_usymbol(sc.c));
|
||||
sc.slice_rct_coef.y = int(get_usymbol(sc.c));
|
||||
if (sc.slice_rct_coef.x + sc.slice_rct_coef.y > 4)
|
||||
return true;
|
||||
}
|
||||
@ -96,11 +98,11 @@ bool decode_slice_header(inout SliceContext sc, uint64_t state)
|
||||
return false;
|
||||
}
|
||||
|
||||
void golomb_init(inout SliceContext sc, uint64_t state)
|
||||
void golomb_init(inout SliceContext sc)
|
||||
{
|
||||
if (version == 3 && micro_version > 1 || version > 3) {
|
||||
u8buf(state).v = uint8_t(129);
|
||||
get_rac(sc.c, state);
|
||||
setup_state[0] = uint8_t(129);
|
||||
get_rac_direct(sc.c, setup_state[0]);
|
||||
}
|
||||
|
||||
uint64_t ac_byte_count = sc.c.bytestream - sc.c.bytestream_start - 1;
|
||||
@ -111,7 +113,6 @@ void golomb_init(inout SliceContext sc, uint64_t state)
|
||||
void main(void)
|
||||
{
|
||||
const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
|
||||
uint64_t scratch_state = uint64_t(scratch_data) + slice_idx*CONTEXT_SIZE;
|
||||
|
||||
u8buf bs = u8buf(slice_data + slice_offsets[2*slice_idx + 0]);
|
||||
uint32_t slice_size = slice_offsets[2*slice_idx + 1];
|
||||
@ -122,10 +123,10 @@ void main(void)
|
||||
if (slice_idx == (gl_NumWorkGroups.x*gl_NumWorkGroups.y - 1))
|
||||
get_rac_equi(slice_ctx[slice_idx].c);
|
||||
|
||||
decode_slice_header(slice_ctx[slice_idx], scratch_state);
|
||||
decode_slice_header(slice_ctx[slice_idx]);
|
||||
|
||||
if (golomb == 1)
|
||||
golomb_init(slice_ctx[slice_idx], scratch_state);
|
||||
golomb_init(slice_ctx[slice_idx]);
|
||||
|
||||
if (ec != 0 && check_crc != 0) {
|
||||
uint32_t crc = crcref;
|
||||
|
@ -226,25 +226,31 @@ void refill(inout RangeCoder c)
|
||||
}
|
||||
}
|
||||
|
||||
bool get_rac(inout RangeCoder c, uint64_t state)
|
||||
bool get_rac_direct(inout RangeCoder c, inout uint8_t state)
|
||||
{
|
||||
u8buf sb = u8buf(state);
|
||||
int val = int(sb.v);
|
||||
int range1 = -int(c.range * val >> 8);
|
||||
int ranged = c.range + range1;
|
||||
int range1 = c.range * state >> 8;
|
||||
int ranged = c.range - range1;
|
||||
|
||||
bool bit = c.low >= ranged;
|
||||
sb.v = zero_one_state[val + (bit ? 256 : 0)];
|
||||
|
||||
c.low = c.low - (bit ? ranged : 0);
|
||||
c.range = (bit ? 0 : ranged) - (bit ? range1 : 0);
|
||||
c.low -= bit ? ranged : 0;
|
||||
c.range = (bit ? 0 : ranged) + (bit ? range1 : 0);
|
||||
|
||||
if (c.range < 0x100)
|
||||
refill(c);
|
||||
|
||||
state = zero_one_state[state + (bit ? 256 : 0)];
|
||||
return bit;
|
||||
}
|
||||
|
||||
bool get_rac(inout RangeCoder c, uint64_t state)
|
||||
{
|
||||
u8buf sb = u8buf(state);
|
||||
uint8_t val = sb.v;
|
||||
bool ret = get_rac_direct(c, val);
|
||||
sb.v = val;
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool get_rac_equi(inout RangeCoder c)
|
||||
{
|
||||
int range1 = c.range >> 1;
|
||||
|
@ -43,8 +43,6 @@ const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc = {
|
||||
typedef struct FFv1VulkanDecodePicture {
|
||||
FFVulkanDecodePicture vp;
|
||||
|
||||
AVBufferRef *tmp_data;
|
||||
|
||||
AVBufferRef *slice_state;
|
||||
uint32_t plane_state_size;
|
||||
uint32_t slice_state_size;
|
||||
@ -70,7 +68,6 @@ typedef struct FFv1VulkanDecodeContext {
|
||||
FFVkBuffer crc_tab_buf;
|
||||
|
||||
AVBufferPool *slice_state_pool;
|
||||
AVBufferPool *tmp_data_pool;
|
||||
AVBufferPool *slice_offset_pool;
|
||||
AVBufferPool *slice_status_pool;
|
||||
} FFv1VulkanDecodeContext;
|
||||
@ -78,7 +75,6 @@ typedef struct FFv1VulkanDecodeContext {
|
||||
typedef struct FFv1VkParameters {
|
||||
VkDeviceAddress slice_data;
|
||||
VkDeviceAddress slice_state;
|
||||
VkDeviceAddress scratch_data;
|
||||
|
||||
int fmt_lut[4];
|
||||
uint32_t img_size[2];
|
||||
@ -111,7 +107,6 @@ static void add_push_data(FFVulkanShader *shd)
|
||||
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
|
||||
GLSLC(1, u8buf slice_data; );
|
||||
GLSLC(1, u8buf slice_state; );
|
||||
GLSLC(1, u8buf scratch_data; );
|
||||
GLSLC(0, );
|
||||
GLSLC(1, ivec4 fmt_lut; );
|
||||
GLSLC(1, uvec2 img_size; );
|
||||
@ -208,16 +203,6 @@ static int vk_ffv1_start_frame(AVCodecContext *avctx,
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
/* Allocate temporary data buffer */
|
||||
err = ff_vk_get_pooled_buffer(&ctx->s, &fv->tmp_data_pool,
|
||||
&fp->tmp_data,
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
||||
NULL, f->slice_count*CONTEXT_SIZE,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
/* Allocate slice offsets buffer */
|
||||
err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_offset_pool,
|
||||
&fp->slice_offset_buf,
|
||||
@ -327,7 +312,6 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
|
||||
FFVkBuffer *slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data;
|
||||
FFVkBuffer *slice_status = (FFVkBuffer *)fp->slice_status_buf->data;
|
||||
|
||||
FFVkBuffer *tmp_data = (FFVkBuffer *)fp->tmp_data->data;
|
||||
VkImageView rct_image_views[AV_NUM_DATA_POINTERS];
|
||||
|
||||
AVFrame *decode_dst = is_rgb ? vp->dpb_frame : f->picture.f;
|
||||
@ -380,8 +364,6 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
|
||||
vp->slices_buf = NULL;
|
||||
RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_offset_buf, 1, 0));
|
||||
fp->slice_offset_buf = NULL;
|
||||
RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->tmp_data, 1, 0));
|
||||
fp->tmp_data = NULL;
|
||||
|
||||
/* Entry barrier for the slice state */
|
||||
buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
|
||||
@ -431,7 +413,6 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
|
||||
pd = (FFv1VkParameters) {
|
||||
.slice_data = slices_buf->address,
|
||||
.slice_state = slice_state->address + f->slice_count*fp->slice_data_size,
|
||||
.scratch_data = tmp_data->address,
|
||||
|
||||
.img_size[0] = f->picture.f->width,
|
||||
.img_size[1] = f->picture.f->height,
|
||||
@ -990,7 +971,6 @@ static void vk_decode_ffv1_uninit(FFVulkanDecodeShared *ctx)
|
||||
ff_vk_free_buf(&ctx->s, &fv->rangecoder_static_buf);
|
||||
ff_vk_free_buf(&ctx->s, &fv->crc_tab_buf);
|
||||
|
||||
av_buffer_pool_uninit(&fv->tmp_data_pool);
|
||||
av_buffer_pool_uninit(&fv->slice_state_pool);
|
||||
av_buffer_pool_uninit(&fv->slice_offset_pool);
|
||||
av_buffer_pool_uninit(&fv->slice_status_pool);
|
||||
@ -1148,7 +1128,6 @@ static void vk_ffv1_free_frame_priv(AVRefStructOpaque _hwctx, void *data)
|
||||
av_buffer_unref(&fp->slice_state);
|
||||
av_buffer_unref(&fp->slice_offset_buf);
|
||||
av_buffer_unref(&fp->slice_status_buf);
|
||||
av_buffer_unref(&fp->tmp_data);
|
||||
}
|
||||
|
||||
const FFHWAccel ff_ffv1_vulkan_hwaccel = {
|
||||
|
Reference in New Issue
Block a user