1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-24 13:56:33 +02:00

vulkan: use push descriptors where possible

Push descriptors are in theory slightly faster, but come with
limitations for which we have to check.

Either way, they're not difficult to implement, so even though
no one should be using peasant-tier descriptors, do it anyway.
This commit is contained in:
Lynne 2024-09-22 13:43:33 +02:00
parent 8a7af4aa49
commit bc36fe6f1f
No known key found for this signature in database
GPG Key ID: A2FEA5F03F034464
6 changed files with 97 additions and 49 deletions

View File

@ -687,14 +687,16 @@ static int denoise_pass(NLMeansVulkanContext *s, FFVkExecContext *exec,
VkBufferMemoryBarrier2 buf_bar[8];
int nb_buf_bar = 0;
DenoisePushData pd = {
{ ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
};
/* Denoise pass pipeline */
ff_vk_exec_bind_pipeline(vkctx, exec, &s->pl_denoise);
/* Push data */
ff_vk_update_push_exec(vkctx, exec, &s->pl_denoise, VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(DenoisePushData), &(DenoisePushData) {
{ ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
});
0, sizeof(pd), &pd);
buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
@ -970,6 +972,10 @@ static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
offsets_dispatched,
};
/* Push data */
ff_vk_update_push_exec(vkctx, exec, &s->pl_weights, VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd), &pd);
if (offsets_dispatched) {
nb_buf_bar = 0;
buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
@ -995,10 +1001,6 @@ static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
integral_vk->access = buf_bar[1].dstAccessMask;
}
/* Push data */
ff_vk_update_push_exec(vkctx, exec, &s->pl_weights, VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd), &pd);
wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t);
wg_invoc = FFMIN(wg_invoc, vkctx->props.properties.limits.maxComputeWorkGroupCount[2]);

View File

@ -422,7 +422,7 @@ static const VulkanOptExtension optional_instance_exts[] = {
static const VulkanOptExtension optional_device_exts[] = {
/* Misc or required by other extensions */
{ VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
{ VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
{ VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_PUSH_DESCRIPTOR },
{ VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER, },
{ VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, FF_VK_EXT_DEVICE_DRM },
{ VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, FF_VK_EXT_ATOMIC_FLOAT },

View File

@ -1520,12 +1520,8 @@ int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl,
FFVulkanDescriptorSetBinding *desc, int nb,
int singular, int print_to_shader_only)
{
VkResult ret;
int has_sampler = 0;
FFVulkanFunctions *vk = &s->vkfn;
FFVulkanDescriptorSet *set;
VkDescriptorSetLayout *layout;
VkDescriptorSetLayoutCreateInfo desc_create_layout;
if (print_to_shader_only)
goto print;
@ -1537,14 +1533,7 @@ int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl,
return AVERROR(ENOMEM);
pl->desc_set = set;
layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout),
pl->nb_descriptor_sets + 1);
if (!layout)
return AVERROR(ENOMEM);
pl->desc_layout = layout;
set = &set[pl->nb_descriptor_sets];
layout = &layout[pl->nb_descriptor_sets];
memset(set, 0, sizeof(*set));
set->binding = av_calloc(nb, sizeof(*set->binding));
@ -1557,14 +1546,6 @@ int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl,
return AVERROR(ENOMEM);
}
desc_create_layout = (VkDescriptorSetLayoutCreateInfo) {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.bindingCount = nb,
.pBindings = set->binding,
.flags = (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) ?
VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT : 0x0,
};
for (int i = 0; i < nb; i++) {
set->binding[i].binding = i;
set->binding[i].descriptorType = desc[i].type;
@ -1582,22 +1563,7 @@ int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl,
if (has_sampler)
set->usage |= VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT;
ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
s->hwctx->alloc, layout);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Unable to init descriptor set layout: %s",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) {
vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, *layout, &set->layout_size);
set->aligned_size = FFALIGN(set->layout_size, s->desc_buf_props.descriptorBufferOffsetAlignment);
for (int i = 0; i < nb; i++)
vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev, *layout,
i, &set->binding_offset[i]);
} else {
if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER)) {
for (int i = 0; i < nb; i++) {
int j;
VkDescriptorPoolSize *desc_pool_size;
@ -1606,8 +1572,8 @@ int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl,
break;
if (j >= pl->nb_desc_pool_size) {
desc_pool_size = av_realloc_array(pl->desc_pool_size,
sizeof(*desc_pool_size),
pl->nb_desc_pool_size + 1);
sizeof(*desc_pool_size),
pl->nb_desc_pool_size + 1);
if (!desc_pool_size)
return AVERROR(ENOMEM);
@ -1703,7 +1669,7 @@ int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool,
pl->bound_buffer_indices[i] = i;
}
} else {
} else if (!pl->use_push) {
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
VkDescriptorSetLayout *tmp_layouts;
@ -1796,8 +1762,16 @@ static inline void update_set_pool_write(FFVulkanContext *s,
vk->UpdateDescriptorSets(s->hwctx->act_dev, 1, write_info, 0, NULL);
}
} else {
write_info->dstSet = pl->desc_sets[e->idx*pl->nb_descriptor_sets + set];
vk->UpdateDescriptorSets(s->hwctx->act_dev, 1, write_info, 0, NULL);
if (pl->use_push) {
vk->CmdPushDescriptorSetKHR(e->buf,
pl->bind_point,
pl->pipeline_layout,
set, 1,
write_info);
} else {
write_info->dstSet = pl->desc_sets[e->idx*pl->nb_descriptor_sets + set];
vk->UpdateDescriptorSets(s->hwctx->act_dev, 1, write_info, 0, NULL);
}
}
}
@ -1954,6 +1928,70 @@ void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
stage, offset, size, src);
}
static int init_descriptors(FFVulkanContext *s, FFVulkanPipeline *pl)
{
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
pl->desc_layout = av_malloc_array(pl->nb_descriptor_sets,
sizeof(*pl->desc_layout));
if (!pl->desc_layout)
return AVERROR(ENOMEM);
if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER)) {
int has_singular = 0;
for (int i = 0; i < pl->nb_descriptor_sets; i++) {
if (pl->desc_set[i].singular) {
has_singular = 1;
break;
}
}
pl->use_push = (s->extensions & FF_VK_EXT_PUSH_DESCRIPTOR) &&
(pl->nb_descriptor_sets == 1) &&
!has_singular;
}
for (int i = 0; i < pl->nb_descriptor_sets; i++) {
FFVulkanDescriptorSet *set = &pl->desc_set[i];
VkDescriptorSetLayoutCreateInfo desc_layout_create = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.bindingCount = set->nb_bindings,
.pBindings = set->binding,
.flags = (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) ?
VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT :
(pl->use_push) ?
VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR :
0x0,
};
ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev,
&desc_layout_create,
s->hwctx->alloc,
&pl->desc_layout[i]);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Unable to create descriptor set layout: %s",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) {
vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, pl->desc_layout[i],
&set->layout_size);
set->aligned_size = FFALIGN(set->layout_size,
s->desc_buf_props.descriptorBufferOffsetAlignment);
for (int j = 0; j < set->nb_bindings; j++)
vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev,
pl->desc_layout[i],
j,
&set->binding_offset[j]);
}
}
return 0;
}
static int init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl)
{
VkResult ret;
@ -1989,6 +2027,10 @@ int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl,
VkComputePipelineCreateInfo pipeline_create_info;
err = init_descriptors(s, pl);
if (err < 0)
return err;
err = init_pipeline_layout(s, pl);
if (err < 0)
return err;
@ -2038,7 +2080,7 @@ void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e,
vk->CmdSetDescriptorBufferOffsetsEXT(e->buf, pl->bind_point, pl->pipeline_layout,
0, pl->nb_descriptor_sets,
pl->bound_buffer_indices, offsets);
} else {
} else if (!pl->use_push) {
vk->CmdBindDescriptorSets(e->buf, pl->bind_point, pl->pipeline_layout,
0, pl->nb_descriptor_sets,
&pl->desc_sets[e->idx*pl->nb_descriptor_sets],

View File

@ -226,6 +226,7 @@ typedef struct FFVulkanPipeline {
int nb_descriptor_sets;
/* Descriptor pool */
int use_push;
VkDescriptorSet *desc_sets;
VkDescriptorPool desc_pool;
VkDescriptorPoolSize *desc_pool_size;

View File

@ -48,6 +48,7 @@ typedef enum FFVulkanExtensions {
FF_VK_EXT_COOP_MATRIX = 1ULL << 16, /* VK_KHR_cooperative_matrix */
FF_VK_EXT_OPTICAL_FLOW = 1ULL << 17, /* VK_NV_optical_flow */
FF_VK_EXT_SHADER_OBJECT = 1ULL << 18, /* VK_EXT_shader_object */
FF_VK_EXT_PUSH_DESCRIPTOR = 1ULL << 19, /* VK_KHR_push_descriptor */
FF_VK_EXT_VIDEO_MAINTENANCE_1 = 1ULL << 27, /* VK_KHR_video_maintenance1 */
FF_VK_EXT_VIDEO_ENCODE_QUEUE = 1ULL << 28, /* VK_KHR_video_encode_queue */
@ -179,6 +180,7 @@ typedef enum FFVulkanExtensions {
\
/* Descriptors */ \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, UpdateDescriptorSets) \
MACRO(1, 1, FF_VK_EXT_PUSH_DESCRIPTOR, CmdPushDescriptorSetKHR) \
\
/* Queries */ \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateQueryPool) \

View File

@ -65,6 +65,7 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions,
{ VK_KHR_VIDEO_ENCODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H265 },
{ VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 },
{ VK_KHR_VIDEO_DECODE_AV1_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_AV1 },
{ VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_PUSH_DESCRIPTOR },
};
FFVulkanExtensions mask = 0x0;