mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-24 13:56:33 +02:00
vulkan: use push descriptors where possible
Push descriptors are in theory slightly faster, but come with limitations for which we have to check. Either way, they're not difficult to implement, so even though no one should be using peasant-tier descriptors, do it anyway.
This commit is contained in:
parent
8a7af4aa49
commit
bc36fe6f1f
@ -687,14 +687,16 @@ static int denoise_pass(NLMeansVulkanContext *s, FFVkExecContext *exec,
|
||||
VkBufferMemoryBarrier2 buf_bar[8];
|
||||
int nb_buf_bar = 0;
|
||||
|
||||
DenoisePushData pd = {
|
||||
{ ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
|
||||
};
|
||||
|
||||
/* Denoise pass pipeline */
|
||||
ff_vk_exec_bind_pipeline(vkctx, exec, &s->pl_denoise);
|
||||
|
||||
/* Push data */
|
||||
ff_vk_update_push_exec(vkctx, exec, &s->pl_denoise, VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
0, sizeof(DenoisePushData), &(DenoisePushData) {
|
||||
{ ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
|
||||
});
|
||||
0, sizeof(pd), &pd);
|
||||
|
||||
buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
|
||||
@ -970,6 +972,10 @@ static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
|
||||
offsets_dispatched,
|
||||
};
|
||||
|
||||
/* Push data */
|
||||
ff_vk_update_push_exec(vkctx, exec, &s->pl_weights, VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
0, sizeof(pd), &pd);
|
||||
|
||||
if (offsets_dispatched) {
|
||||
nb_buf_bar = 0;
|
||||
buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
|
||||
@ -995,10 +1001,6 @@ static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
|
||||
integral_vk->access = buf_bar[1].dstAccessMask;
|
||||
}
|
||||
|
||||
/* Push data */
|
||||
ff_vk_update_push_exec(vkctx, exec, &s->pl_weights, VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
0, sizeof(pd), &pd);
|
||||
|
||||
wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t);
|
||||
wg_invoc = FFMIN(wg_invoc, vkctx->props.properties.limits.maxComputeWorkGroupCount[2]);
|
||||
|
||||
|
@ -422,7 +422,7 @@ static const VulkanOptExtension optional_instance_exts[] = {
|
||||
static const VulkanOptExtension optional_device_exts[] = {
|
||||
/* Misc or required by other extensions */
|
||||
{ VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
|
||||
{ VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
|
||||
{ VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_PUSH_DESCRIPTOR },
|
||||
{ VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER, },
|
||||
{ VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, FF_VK_EXT_DEVICE_DRM },
|
||||
{ VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, FF_VK_EXT_ATOMIC_FLOAT },
|
||||
|
@ -1520,12 +1520,8 @@ int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl,
|
||||
FFVulkanDescriptorSetBinding *desc, int nb,
|
||||
int singular, int print_to_shader_only)
|
||||
{
|
||||
VkResult ret;
|
||||
int has_sampler = 0;
|
||||
FFVulkanFunctions *vk = &s->vkfn;
|
||||
FFVulkanDescriptorSet *set;
|
||||
VkDescriptorSetLayout *layout;
|
||||
VkDescriptorSetLayoutCreateInfo desc_create_layout;
|
||||
|
||||
if (print_to_shader_only)
|
||||
goto print;
|
||||
@ -1537,14 +1533,7 @@ int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl,
|
||||
return AVERROR(ENOMEM);
|
||||
pl->desc_set = set;
|
||||
|
||||
layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout),
|
||||
pl->nb_descriptor_sets + 1);
|
||||
if (!layout)
|
||||
return AVERROR(ENOMEM);
|
||||
pl->desc_layout = layout;
|
||||
|
||||
set = &set[pl->nb_descriptor_sets];
|
||||
layout = &layout[pl->nb_descriptor_sets];
|
||||
memset(set, 0, sizeof(*set));
|
||||
|
||||
set->binding = av_calloc(nb, sizeof(*set->binding));
|
||||
@ -1557,14 +1546,6 @@ int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl,
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
desc_create_layout = (VkDescriptorSetLayoutCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
.bindingCount = nb,
|
||||
.pBindings = set->binding,
|
||||
.flags = (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) ?
|
||||
VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT : 0x0,
|
||||
};
|
||||
|
||||
for (int i = 0; i < nb; i++) {
|
||||
set->binding[i].binding = i;
|
||||
set->binding[i].descriptorType = desc[i].type;
|
||||
@ -1582,22 +1563,7 @@ int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl,
|
||||
if (has_sampler)
|
||||
set->usage |= VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT;
|
||||
|
||||
ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
|
||||
s->hwctx->alloc, layout);
|
||||
if (ret != VK_SUCCESS) {
|
||||
av_log(s, AV_LOG_ERROR, "Unable to init descriptor set layout: %s",
|
||||
ff_vk_ret2str(ret));
|
||||
return AVERROR_EXTERNAL;
|
||||
}
|
||||
|
||||
if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) {
|
||||
vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, *layout, &set->layout_size);
|
||||
set->aligned_size = FFALIGN(set->layout_size, s->desc_buf_props.descriptorBufferOffsetAlignment);
|
||||
|
||||
for (int i = 0; i < nb; i++)
|
||||
vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev, *layout,
|
||||
i, &set->binding_offset[i]);
|
||||
} else {
|
||||
if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER)) {
|
||||
for (int i = 0; i < nb; i++) {
|
||||
int j;
|
||||
VkDescriptorPoolSize *desc_pool_size;
|
||||
@ -1606,8 +1572,8 @@ int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl,
|
||||
break;
|
||||
if (j >= pl->nb_desc_pool_size) {
|
||||
desc_pool_size = av_realloc_array(pl->desc_pool_size,
|
||||
sizeof(*desc_pool_size),
|
||||
pl->nb_desc_pool_size + 1);
|
||||
sizeof(*desc_pool_size),
|
||||
pl->nb_desc_pool_size + 1);
|
||||
if (!desc_pool_size)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
@ -1703,7 +1669,7 @@ int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool,
|
||||
|
||||
pl->bound_buffer_indices[i] = i;
|
||||
}
|
||||
} else {
|
||||
} else if (!pl->use_push) {
|
||||
VkResult ret;
|
||||
FFVulkanFunctions *vk = &s->vkfn;
|
||||
VkDescriptorSetLayout *tmp_layouts;
|
||||
@ -1796,8 +1762,16 @@ static inline void update_set_pool_write(FFVulkanContext *s,
|
||||
vk->UpdateDescriptorSets(s->hwctx->act_dev, 1, write_info, 0, NULL);
|
||||
}
|
||||
} else {
|
||||
write_info->dstSet = pl->desc_sets[e->idx*pl->nb_descriptor_sets + set];
|
||||
vk->UpdateDescriptorSets(s->hwctx->act_dev, 1, write_info, 0, NULL);
|
||||
if (pl->use_push) {
|
||||
vk->CmdPushDescriptorSetKHR(e->buf,
|
||||
pl->bind_point,
|
||||
pl->pipeline_layout,
|
||||
set, 1,
|
||||
write_info);
|
||||
} else {
|
||||
write_info->dstSet = pl->desc_sets[e->idx*pl->nb_descriptor_sets + set];
|
||||
vk->UpdateDescriptorSets(s->hwctx->act_dev, 1, write_info, 0, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1954,6 +1928,70 @@ void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
|
||||
stage, offset, size, src);
|
||||
}
|
||||
|
||||
static int init_descriptors(FFVulkanContext *s, FFVulkanPipeline *pl)
|
||||
{
|
||||
VkResult ret;
|
||||
FFVulkanFunctions *vk = &s->vkfn;
|
||||
|
||||
pl->desc_layout = av_malloc_array(pl->nb_descriptor_sets,
|
||||
sizeof(*pl->desc_layout));
|
||||
if (!pl->desc_layout)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER)) {
|
||||
int has_singular = 0;
|
||||
for (int i = 0; i < pl->nb_descriptor_sets; i++) {
|
||||
if (pl->desc_set[i].singular) {
|
||||
has_singular = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
pl->use_push = (s->extensions & FF_VK_EXT_PUSH_DESCRIPTOR) &&
|
||||
(pl->nb_descriptor_sets == 1) &&
|
||||
!has_singular;
|
||||
}
|
||||
|
||||
for (int i = 0; i < pl->nb_descriptor_sets; i++) {
|
||||
FFVulkanDescriptorSet *set = &pl->desc_set[i];
|
||||
VkDescriptorSetLayoutCreateInfo desc_layout_create = {
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
.bindingCount = set->nb_bindings,
|
||||
.pBindings = set->binding,
|
||||
.flags = (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) ?
|
||||
VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT :
|
||||
(pl->use_push) ?
|
||||
VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR :
|
||||
0x0,
|
||||
};
|
||||
|
||||
ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev,
|
||||
&desc_layout_create,
|
||||
s->hwctx->alloc,
|
||||
&pl->desc_layout[i]);
|
||||
if (ret != VK_SUCCESS) {
|
||||
av_log(s, AV_LOG_ERROR, "Unable to create descriptor set layout: %s",
|
||||
ff_vk_ret2str(ret));
|
||||
return AVERROR_EXTERNAL;
|
||||
}
|
||||
|
||||
if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) {
|
||||
vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, pl->desc_layout[i],
|
||||
&set->layout_size);
|
||||
|
||||
set->aligned_size = FFALIGN(set->layout_size,
|
||||
s->desc_buf_props.descriptorBufferOffsetAlignment);
|
||||
|
||||
for (int j = 0; j < set->nb_bindings; j++)
|
||||
vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev,
|
||||
pl->desc_layout[i],
|
||||
j,
|
||||
&set->binding_offset[j]);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl)
|
||||
{
|
||||
VkResult ret;
|
||||
@ -1989,6 +2027,10 @@ int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl,
|
||||
|
||||
VkComputePipelineCreateInfo pipeline_create_info;
|
||||
|
||||
err = init_descriptors(s, pl);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
err = init_pipeline_layout(s, pl);
|
||||
if (err < 0)
|
||||
return err;
|
||||
@ -2038,7 +2080,7 @@ void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e,
|
||||
vk->CmdSetDescriptorBufferOffsetsEXT(e->buf, pl->bind_point, pl->pipeline_layout,
|
||||
0, pl->nb_descriptor_sets,
|
||||
pl->bound_buffer_indices, offsets);
|
||||
} else {
|
||||
} else if (!pl->use_push) {
|
||||
vk->CmdBindDescriptorSets(e->buf, pl->bind_point, pl->pipeline_layout,
|
||||
0, pl->nb_descriptor_sets,
|
||||
&pl->desc_sets[e->idx*pl->nb_descriptor_sets],
|
||||
|
@ -226,6 +226,7 @@ typedef struct FFVulkanPipeline {
|
||||
int nb_descriptor_sets;
|
||||
|
||||
/* Descriptor pool */
|
||||
int use_push;
|
||||
VkDescriptorSet *desc_sets;
|
||||
VkDescriptorPool desc_pool;
|
||||
VkDescriptorPoolSize *desc_pool_size;
|
||||
|
@ -48,6 +48,7 @@ typedef enum FFVulkanExtensions {
|
||||
FF_VK_EXT_COOP_MATRIX = 1ULL << 16, /* VK_KHR_cooperative_matrix */
|
||||
FF_VK_EXT_OPTICAL_FLOW = 1ULL << 17, /* VK_NV_optical_flow */
|
||||
FF_VK_EXT_SHADER_OBJECT = 1ULL << 18, /* VK_EXT_shader_object */
|
||||
FF_VK_EXT_PUSH_DESCRIPTOR = 1ULL << 19, /* VK_KHR_push_descriptor */
|
||||
|
||||
FF_VK_EXT_VIDEO_MAINTENANCE_1 = 1ULL << 27, /* VK_KHR_video_maintenance1 */
|
||||
FF_VK_EXT_VIDEO_ENCODE_QUEUE = 1ULL << 28, /* VK_KHR_video_encode_queue */
|
||||
@ -179,6 +180,7 @@ typedef enum FFVulkanExtensions {
|
||||
\
|
||||
/* Descriptors */ \
|
||||
MACRO(1, 1, FF_VK_EXT_NO_FLAG, UpdateDescriptorSets) \
|
||||
MACRO(1, 1, FF_VK_EXT_PUSH_DESCRIPTOR, CmdPushDescriptorSetKHR) \
|
||||
\
|
||||
/* Queries */ \
|
||||
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateQueryPool) \
|
||||
|
@ -65,6 +65,7 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions,
|
||||
{ VK_KHR_VIDEO_ENCODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H265 },
|
||||
{ VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 },
|
||||
{ VK_KHR_VIDEO_DECODE_AV1_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_AV1 },
|
||||
{ VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_PUSH_DESCRIPTOR },
|
||||
};
|
||||
|
||||
FFVulkanExtensions mask = 0x0;
|
||||
|
Loading…
x
Reference in New Issue
Block a user