1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-12-07 11:13:41 +02:00
FFmpeg/libavutil/vulkan.c

1756 lines
60 KiB
C

/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "avassert.h"
#include "vulkan.h"
#include "vulkan_loader.h"
#if CONFIG_LIBGLSLANG
#include "vulkan_glslang.c"
#elif CONFIG_LIBSHADERC
#include "vulkan_shaderc.c"
#endif
/* Generic macro for creating contexts which need to keep their addresses
* if another context is created. */
#define FN_CREATING(ctx, type, shortname, array, num) \
static av_always_inline type *create_ ##shortname(ctx *dctx) \
{ \
type **array, *sctx = av_mallocz(sizeof(*sctx)); \
if (!sctx) \
return NULL; \
\
array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\
if (!array) { \
av_free(sctx); \
return NULL; \
} \
\
dctx->array = array; \
dctx->array[dctx->num++] = sctx; \
\
return sctx; \
}
const VkComponentMapping ff_comp_identity_map = {
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
};
/* Converts return values to strings */
const char *ff_vk_ret2str(VkResult res)
{
#define CASE(VAL) case VAL: return #VAL
switch (res) {
CASE(VK_SUCCESS);
CASE(VK_NOT_READY);
CASE(VK_TIMEOUT);
CASE(VK_EVENT_SET);
CASE(VK_EVENT_RESET);
CASE(VK_INCOMPLETE);
CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
CASE(VK_ERROR_INITIALIZATION_FAILED);
CASE(VK_ERROR_DEVICE_LOST);
CASE(VK_ERROR_MEMORY_MAP_FAILED);
CASE(VK_ERROR_LAYER_NOT_PRESENT);
CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
CASE(VK_ERROR_FEATURE_NOT_PRESENT);
CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
CASE(VK_ERROR_TOO_MANY_OBJECTS);
CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
CASE(VK_ERROR_FRAGMENTED_POOL);
CASE(VK_ERROR_UNKNOWN);
CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
CASE(VK_ERROR_FRAGMENTATION);
CASE(VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS);
CASE(VK_PIPELINE_COMPILE_REQUIRED);
CASE(VK_ERROR_SURFACE_LOST_KHR);
CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
CASE(VK_SUBOPTIMAL_KHR);
CASE(VK_ERROR_OUT_OF_DATE_KHR);
CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
CASE(VK_ERROR_VALIDATION_FAILED_EXT);
CASE(VK_ERROR_INVALID_SHADER_NV);
CASE(VK_ERROR_VIDEO_PICTURE_LAYOUT_NOT_SUPPORTED_KHR);
CASE(VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR);
CASE(VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR);
CASE(VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR);
CASE(VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR);
CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
CASE(VK_ERROR_NOT_PERMITTED_KHR);
CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
CASE(VK_THREAD_IDLE_KHR);
CASE(VK_THREAD_DONE_KHR);
CASE(VK_OPERATION_DEFERRED_KHR);
CASE(VK_OPERATION_NOT_DEFERRED_KHR);
default: return "Unknown error";
}
#undef CASE
}
int ff_vk_load_props(FFVulkanContext *s)
{
uint32_t qc = 0;
FFVulkanFunctions *vk = &s->vkfn;
s->driver_props = (VkPhysicalDeviceDriverProperties) {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES,
};
s->props = (VkPhysicalDeviceProperties2) {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
.pNext = &s->driver_props,
};
vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props);
vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props);
if (s->qf_props)
return 0;
s->qf_props = av_calloc(qc, sizeof(*s->qf_props));
if (!s->qf_props)
return AVERROR(ENOMEM);
s->query_props = av_calloc(qc, sizeof(*s->query_props));
if (!s->qf_props) {
av_freep(&s->qf_props);
return AVERROR(ENOMEM);
}
s->video_props = av_calloc(qc, sizeof(*s->video_props));
if (!s->video_props) {
av_freep(&s->qf_props);
av_freep(&s->query_props);
return AVERROR(ENOMEM);
}
for (uint32_t i = 0; i < qc; i++) {
s->query_props[i] = (VkQueueFamilyQueryResultStatusPropertiesKHR) {
.sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR,
};
s->video_props[i] = (VkQueueFamilyVideoPropertiesKHR) {
.sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
.pNext = &s->query_props[i],
};
s->qf_props[i] = (VkQueueFamilyProperties2) {
.sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
.pNext = &s->video_props[i],
};
}
vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props);
return 0;
}
void ff_vk_qf_fill(FFVulkanContext *s)
{
s->nb_qfs = 0;
/* Simply fills in all unique queues into s->qfs */
if (s->hwctx->queue_family_index >= 0)
s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index;
if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index)
s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index;
if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index &&
s->qfs[1] != s->hwctx->queue_family_comp_index))
s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index;
if (s->hwctx->queue_family_decode_index >= 0 &&
(s->qfs[0] != s->hwctx->queue_family_decode_index &&
s->qfs[1] != s->hwctx->queue_family_decode_index &&
s->qfs[2] != s->hwctx->queue_family_decode_index))
s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index;
if (s->hwctx->queue_family_encode_index >= 0 &&
(s->qfs[0] != s->hwctx->queue_family_encode_index &&
s->qfs[1] != s->hwctx->queue_family_encode_index &&
s->qfs[2] != s->hwctx->queue_family_encode_index &&
s->qfs[3] != s->hwctx->queue_family_encode_index))
s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index;
}
int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb)
{
int ret, num;
switch (dev_family) {
case VK_QUEUE_GRAPHICS_BIT:
ret = s->hwctx->queue_family_index;
num = s->hwctx->nb_graphics_queues;
break;
case VK_QUEUE_COMPUTE_BIT:
ret = s->hwctx->queue_family_comp_index;
num = s->hwctx->nb_comp_queues;
break;
case VK_QUEUE_TRANSFER_BIT:
ret = s->hwctx->queue_family_tx_index;
num = s->hwctx->nb_tx_queues;
break;
case VK_QUEUE_VIDEO_ENCODE_BIT_KHR:
ret = s->hwctx->queue_family_encode_index;
num = s->hwctx->nb_encode_queues;
break;
case VK_QUEUE_VIDEO_DECODE_BIT_KHR:
ret = s->hwctx->queue_family_decode_index;
num = s->hwctx->nb_decode_queues;
break;
default:
av_assert0(0); /* Should never happen */
}
if (nb)
*nb = num;
return ret;
}
int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
VkQueueFlagBits dev_family, int nb_queues)
{
int ret;
ret = qf->queue_family = ff_vk_qf_get_index(s, dev_family, &qf->actual_queues);
if (!nb_queues)
qf->nb_queues = qf->actual_queues;
else
qf->nb_queues = nb_queues;
return ret;
}
int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
{
qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues;
return qf->cur_queue;
}
int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
{
VkResult ret;
int index = -1;
FFVulkanFunctions *vk = &s->vkfn;
VkMemoryAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = alloc_extension,
};
/* Align if we need to */
if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
req->size = FFALIGN(req->size, s->props.properties.limits.minMemoryMapAlignment);
alloc_info.allocationSize = req->size;
/* The vulkan spec requires memory types to be sorted in the "optimal"
* order, so the first matching type we find will be the best/fastest one */
for (int i = 0; i < s->mprops.memoryTypeCount; i++) {
/* The memory type must be supported by the requirements (bitfield) */
if (!(req->memoryTypeBits & (1 << i)))
continue;
/* The memory type flags must include our properties */
if ((req_flags != UINT32_MAX) &&
((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags))
continue;
/* Found a suitable memory type */
index = i;
break;
}
if (index < 0) {
av_log(s->device, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
req_flags);
return AVERROR(EINVAL);
}
alloc_info.memoryTypeIndex = index;
ret = vk->AllocateMemory(s->hwctx->act_dev, &alloc_info,
s->hwctx->alloc, mem);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
ff_vk_ret2str(ret));
return AVERROR(ENOMEM);
}
if (mem_flags)
*mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
return 0;
}
int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
void *pNext, void *alloc_pNext,
VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
{
int err;
VkResult ret;
int use_ded_mem;
FFVulkanFunctions *vk = &s->vkfn;
VkBufferCreateInfo buf_spawn = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = pNext,
.usage = usage,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.size = size, /* Gets FFALIGNED during alloc if host visible
but should be ok */
};
VkBufferMemoryRequirementsInfo2 req_desc = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
};
VkMemoryDedicatedAllocateInfo ded_alloc = {
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
.pNext = alloc_pNext,
};
VkMemoryDedicatedRequirements ded_req = {
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
};
VkMemoryRequirements2 req = {
.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
.pNext = &ded_req,
};
ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Failed to create buffer: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
req_desc.buffer = buf->buf;
vk->GetBufferMemoryRequirements2(s->hwctx->act_dev, &req_desc, &req);
/* In case the implementation prefers/requires dedicated allocation */
use_ded_mem = ded_req.prefersDedicatedAllocation |
ded_req.requiresDedicatedAllocation;
if (use_ded_mem)
ded_alloc.buffer = buf->buf;
err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags,
use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
&buf->flags, &buf->mem);
if (err)
return err;
ret = vk->BindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
buf->size = size;
return 0;
}
int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[],
int nb_buffers, int invalidate)
{
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
VkMappedMemoryRange *inval_list = NULL;
int inval_count = 0;
for (int i = 0; i < nb_buffers; i++) {
ret = vk->MapMemory(s->hwctx->act_dev, buf[i].mem, 0,
VK_WHOLE_SIZE, 0, (void **)&mem[i]);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
}
if (!invalidate)
return 0;
for (int i = 0; i < nb_buffers; i++) {
const VkMappedMemoryRange ival_buf = {
.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
.memory = buf[i].mem,
.size = VK_WHOLE_SIZE,
};
if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
continue;
inval_list = av_fast_realloc(s->scratch, &s->scratch_size,
(++inval_count)*sizeof(*inval_list));
if (!inval_list)
return AVERROR(ENOMEM);
inval_list[inval_count - 1] = ival_buf;
}
if (inval_count) {
ret = vk->InvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count,
inval_list);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
}
return 0;
}
int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
int flush)
{
int err = 0;
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
VkMappedMemoryRange *flush_list = NULL;
int flush_count = 0;
if (flush) {
for (int i = 0; i < nb_buffers; i++) {
const VkMappedMemoryRange flush_buf = {
.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
.memory = buf[i].mem,
.size = VK_WHOLE_SIZE,
};
if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
continue;
flush_list = av_fast_realloc(s->scratch, &s->scratch_size,
(++flush_count)*sizeof(*flush_list));
if (!flush_list)
return AVERROR(ENOMEM);
flush_list[flush_count - 1] = flush_buf;
}
}
if (flush_count) {
ret = vk->FlushMappedMemoryRanges(s->hwctx->act_dev, flush_count,
flush_list);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Failed to flush memory: %s\n",
ff_vk_ret2str(ret));
err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
}
}
for (int i = 0; i < nb_buffers; i++)
vk->UnmapMemory(s->hwctx->act_dev, buf[i].mem);
return err;
}
void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
{
FFVulkanFunctions *vk = &s->vkfn;
if (!buf || !s->hwctx)
return;
if (buf->buf != VK_NULL_HANDLE)
vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
if (buf->mem != VK_NULL_HANDLE)
vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
}
int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *f, int idx,
int width, int height, VkFormat fmt, VkImageTiling tiling,
VkImageUsageFlagBits usage, VkImageCreateFlags flags,
void *create_pnext, VkDeviceMemory *mem, void *alloc_pnext)
{
int err;
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
AVVulkanDeviceContext *hwctx = s->hwctx;
VkExportSemaphoreCreateInfo ext_sem_info = {
.sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
#ifdef _WIN32
.handleTypes = IsWindows8OrGreater()
? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
: VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
#else
.handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
#endif
};
VkSemaphoreTypeCreateInfo sem_type_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
#ifdef _WIN32
.pNext = s->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info : NULL,
#else
.pNext = s->extensions & FF_VK_EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL,
#endif
.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
.initialValue = 0,
};
VkSemaphoreCreateInfo sem_spawn = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
.pNext = &sem_type_info,
};
/* Create the image */
VkImageCreateInfo create_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = create_pnext,
.imageType = VK_IMAGE_TYPE_2D,
.format = fmt,
.extent.depth = 1,
.mipLevels = 1,
.arrayLayers = 1,
.flags = flags,
.tiling = tiling,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.usage = usage,
.samples = VK_SAMPLE_COUNT_1_BIT,
.pQueueFamilyIndices = s->qfs,
.queueFamilyIndexCount = s->nb_qfs,
.sharingMode = s->nb_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
VK_SHARING_MODE_EXCLUSIVE,
};
ret = vk->CreateImage(hwctx->act_dev, &create_info,
hwctx->alloc, &f->img[0]);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Image creation failure: %s\n",
ff_vk_ret2str(ret));
err = AVERROR(EINVAL);
goto fail;
}
/* Create semaphore */
ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
hwctx->alloc, &f->sem[0]);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
f->queue_family[0] = s->nb_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : s->qfs[0];
f->layout[0] = create_info.initialLayout;
f->access[0] = 0x0;
f->sem_value[0] = 0;
f->flags = 0x0;
f->tiling = tiling;
return 0;
fail:
return err;
}
int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
VkShaderStageFlagBits stage)
{
VkPushConstantRange *pc;
pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts),
pl->push_consts_num + 1);
if (!pl->push_consts)
return AVERROR(ENOMEM);
pc = &pl->push_consts[pl->push_consts_num++];
memset(pc, 0, sizeof(*pc));
pc->stageFlags = stage;
pc->offset = offset;
pc->size = size;
return 0;
}
FN_CREATING(FFVulkanContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num)
int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
FFVkQueueFamilyCtx *qf)
{
VkResult ret;
FFVkExecContext *e;
FFVulkanFunctions *vk = &s->vkfn;
VkCommandPoolCreateInfo cqueue_create = {
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
.queueFamilyIndex = qf->queue_family,
};
VkCommandBufferAllocateInfo cbuf_create = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
.commandBufferCount = qf->nb_queues,
};
e = create_exec_ctx(s);
if (!e)
return AVERROR(ENOMEM);
e->qf = qf;
e->queues = av_mallocz(qf->nb_queues * sizeof(*e->queues));
if (!e->queues)
return AVERROR(ENOMEM);
e->bufs = av_mallocz(qf->nb_queues * sizeof(*e->bufs));
if (!e->bufs)
return AVERROR(ENOMEM);
/* Create command pool */
ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create,
s->hwctx->alloc, &e->pool);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
cbuf_create.commandPool = e->pool;
/* Allocate command buffer */
ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, e->bufs);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
for (int i = 0; i < qf->nb_queues; i++) {
FFVkQueueCtx *q = &e->queues[i];
vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family,
i % qf->actual_queues, &q->queue);
}
*ctx = e;
return 0;
}
int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e,
int nb_queries, VkQueryType type,
int elem_64bits, void *create_pnext)
{
VkResult ret;
size_t qd_size;
int nb_results = nb_queries;
int nb_statuses = 0 /* Once RADV has support, = nb_queries */;
int status_stride = 2;
int result_elem_size = elem_64bits ? 8 : 4;
FFVulkanFunctions *vk = &s->vkfn;
VkQueryPoolCreateInfo query_pool_info = {
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
.pNext = create_pnext,
.queryType = type,
.queryCount = nb_queries*e->qf->nb_queues,
};
if (e->query.pool)
return AVERROR(EINVAL);
/* Video encode quieries produce two results per query */
if (type == VK_QUERY_TYPE_VIDEO_ENCODE_BITSTREAM_BUFFER_RANGE_KHR) {
status_stride = 3; /* skip,skip,result,skip,skip,result */
nb_results *= 2;
} else if (type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) {
status_stride = 1;
nb_results *= 0;
}
qd_size = nb_results*result_elem_size + nb_statuses*result_elem_size;
e->query.data = av_mallocz(e->qf->nb_queues*qd_size);
if (!e->query.data)
return AVERROR(ENOMEM);
ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info,
s->hwctx->alloc, &e->query.pool);
if (ret != VK_SUCCESS)
return AVERROR_EXTERNAL;
e->query.data_per_queue = qd_size;
e->query.nb_queries = nb_queries;
e->query.nb_results = nb_results;
e->query.nb_statuses = nb_statuses;
e->query.elem_64bits = elem_64bits;
e->query.status_stride = status_stride;
return 0;
}
int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e,
int query_idx, void **data, int64_t *status)
{
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
uint8_t *qd;
int32_t *res32;
int64_t *res64;
int64_t res = 0;
VkQueryResultFlags qf = 0;
FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
if (!q->submitted) {
*data = NULL;
return 0;
}
qd = e->query.data + e->qf->cur_queue*e->query.data_per_queue;
qf |= e->query.nb_results && e->query.nb_statuses ?
VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0;
qf |= e->query.elem_64bits ? VK_QUERY_RESULT_64_BIT : 0x0;
res32 = (int32_t *)(qd + e->query.nb_results*4);
res64 = (int64_t *)(qd + e->query.nb_results*8);
ret = vk->GetQueryPoolResults(s->hwctx->act_dev, e->query.pool,
query_idx,
e->query.nb_queries,
e->query.data_per_queue, qd,
e->query.elem_64bits ? 8 : 4, qf);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Unable to perform query: %s!\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
if (e->query.nb_statuses && e->query.elem_64bits) {
for (int i = 0; i < e->query.nb_queries; i++) {
res = (res64[i] < res) || (res >= 0 && res64[i] > res) ?
res64[i] : res;
res64 += e->query.status_stride;
}
} else if (e->query.nb_statuses) {
for (int i = 0; i < e->query.nb_queries; i++) {
res = (res32[i] < res) || (res >= 0 && res32[i] > res) ?
res32[i] : res;
res32 += e->query.status_stride;
}
}
if (data)
*data = qd;
if (status)
*status = res;
return 0;
}
void ff_vk_discard_exec_deps(FFVkExecContext *e)
{
FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
for (int j = 0; j < q->nb_buf_deps; j++)
av_buffer_unref(&q->buf_deps[j]);
q->nb_buf_deps = 0;
for (int j = 0; j < q->nb_frame_deps; j++)
av_frame_free(&q->frame_deps[j]);
q->nb_frame_deps = 0;
e->sem_wait_cnt = 0;
e->sem_sig_cnt = 0;
}
int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e)
{
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
VkCommandBufferBeginInfo cmd_start = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
};
/* Create the fence and don't wait for it initially */
if (!q->fence) {
VkFenceCreateInfo fence_spawn = {
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
};
ret = vk->CreateFence(s->hwctx->act_dev, &fence_spawn, s->hwctx->alloc,
&q->fence);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Failed to queue frame fence: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
} else if (!q->synchronous) {
vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
}
q->synchronous = 0;
/* Discard queue dependencies */
ff_vk_discard_exec_deps(e);
ret = vk->BeginCommandBuffer(e->bufs[e->qf->cur_queue], &cmd_start);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
if (e->query.pool) {
e->query.idx = e->qf->cur_queue*e->query.nb_queries;
vk->CmdResetQueryPool(e->bufs[e->qf->cur_queue], e->query.pool,
e->query.idx, e->query.nb_queries);
}
return 0;
}
VkCommandBuffer ff_vk_get_exec_buf(FFVkExecContext *e)
{
return e->bufs[e->qf->cur_queue];
}
int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame,
VkPipelineStageFlagBits in_wait_dst_flag)
{
AVFrame **dst;
AVVkFrame *f = (AVVkFrame *)frame->data[0];
FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data;
int planes = av_pix_fmt_count_planes(fc->sw_format);
for (int i = 0; i < planes; i++) {
e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc,
(e->sem_wait_cnt + 1)*sizeof(*e->sem_wait));
if (!e->sem_wait) {
ff_vk_discard_exec_deps(e);
return AVERROR(ENOMEM);
}
e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc,
(e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst));
if (!e->sem_wait_dst) {
ff_vk_discard_exec_deps(e);
return AVERROR(ENOMEM);
}
e->sem_wait_val = av_fast_realloc(e->sem_wait_val, &e->sem_wait_val_alloc,
(e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_val));
if (!e->sem_wait_val) {
ff_vk_discard_exec_deps(e);
return AVERROR(ENOMEM);
}
e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc,
(e->sem_sig_cnt + 1)*sizeof(*e->sem_sig));
if (!e->sem_sig) {
ff_vk_discard_exec_deps(e);
return AVERROR(ENOMEM);
}
e->sem_sig_val = av_fast_realloc(e->sem_sig_val, &e->sem_sig_val_alloc,
(e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val));
if (!e->sem_sig_val) {
ff_vk_discard_exec_deps(e);
return AVERROR(ENOMEM);
}
e->sem_sig_val_dst = av_fast_realloc(e->sem_sig_val_dst, &e->sem_sig_val_dst_alloc,
(e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val_dst));
if (!e->sem_sig_val_dst) {
ff_vk_discard_exec_deps(e);
return AVERROR(ENOMEM);
}
e->sem_wait[e->sem_wait_cnt] = f->sem[i];
e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i];
e->sem_wait_cnt++;
e->sem_sig[e->sem_sig_cnt] = f->sem[i];
e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1;
e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i];
e->sem_sig_cnt++;
}
dst = av_fast_realloc(q->frame_deps, &q->frame_deps_alloc_size,
(q->nb_frame_deps + 1) * sizeof(*dst));
if (!dst) {
ff_vk_discard_exec_deps(e);
return AVERROR(ENOMEM);
}
q->frame_deps = dst;
q->frame_deps[q->nb_frame_deps] = av_frame_clone(frame);
if (!q->frame_deps[q->nb_frame_deps]) {
ff_vk_discard_exec_deps(e);
return AVERROR(ENOMEM);
}
q->nb_frame_deps++;
return 0;
}
int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
{
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
.pWaitSemaphoreValues = e->sem_wait_val,
.pSignalSemaphoreValues = e->sem_sig_val,
.waitSemaphoreValueCount = e->sem_wait_cnt,
.signalSemaphoreValueCount = e->sem_sig_cnt,
};
VkSubmitInfo s_info = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = &s_timeline_sem_info,
.commandBufferCount = 1,
.pCommandBuffers = &e->bufs[e->qf->cur_queue],
.pWaitSemaphores = e->sem_wait,
.pWaitDstStageMask = e->sem_wait_dst,
.waitSemaphoreCount = e->sem_wait_cnt,
.pSignalSemaphores = e->sem_sig,
.signalSemaphoreCount = e->sem_sig_cnt,
};
ret = vk->EndCommandBuffer(e->bufs[e->qf->cur_queue]);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data,
e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence);
s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data,
e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
for (int i = 0; i < e->sem_sig_cnt; i++)
*e->sem_sig_val_dst[i] += 1;
e->query.idx = e->qf->cur_queue*e->query.nb_queries;
q->submitted = 1;
return 0;
}
void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
{
FFVulkanFunctions *vk = &s->vkfn;
FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
if (!q->submitted)
return;
vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
q->synchronous = 1;
}
int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e,
AVBufferRef **deps, int nb_deps)
{
AVBufferRef **dst;
FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
if (!deps || !nb_deps)
return 0;
dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size,
(q->nb_buf_deps + nb_deps) * sizeof(*dst));
if (!dst)
goto err;
q->buf_deps = dst;
for (int i = 0; i < nb_deps; i++) {
q->buf_deps[q->nb_buf_deps] = deps[i];
if (!q->buf_deps[q->nb_buf_deps])
goto err;
q->nb_buf_deps++;
}
return 0;
err:
ff_vk_discard_exec_deps(e);
return AVERROR(ENOMEM);
}
FN_CREATING(FFVulkanContext, FFVkSampler, sampler, samplers, samplers_num)
FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s,
int unnorm_coords, VkFilter filt)
{
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
VkSamplerCreateInfo sampler_info = {
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
.magFilter = filt,
.minFilter = sampler_info.magFilter,
.mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST :
VK_SAMPLER_MIPMAP_MODE_LINEAR,
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
.addressModeV = sampler_info.addressModeU,
.addressModeW = sampler_info.addressModeU,
.anisotropyEnable = VK_FALSE,
.compareOp = VK_COMPARE_OP_NEVER,
.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
.unnormalizedCoordinates = unnorm_coords,
};
FFVkSampler *sctx = create_sampler(s);
if (!sctx)
return NULL;
ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info,
s->hwctx->alloc, &sctx->sampler[0]);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Unable to init sampler: %s\n",
ff_vk_ret2str(ret));
return NULL;
}
for (int i = 1; i < 4; i++)
sctx->sampler[i] = sctx->sampler[0];
return sctx;
}
int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
{
if (pix_fmt == AV_PIX_FMT_ABGR || pix_fmt == AV_PIX_FMT_BGRA ||
pix_fmt == AV_PIX_FMT_RGBA || pix_fmt == AV_PIX_FMT_RGB24 ||
pix_fmt == AV_PIX_FMT_BGR24 || pix_fmt == AV_PIX_FMT_RGB48 ||
pix_fmt == AV_PIX_FMT_RGBA64 || pix_fmt == AV_PIX_FMT_RGB565 ||
pix_fmt == AV_PIX_FMT_BGR565 || pix_fmt == AV_PIX_FMT_BGR0 ||
pix_fmt == AV_PIX_FMT_0BGR || pix_fmt == AV_PIX_FMT_RGB0)
return 1;
return 0;
}
const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)
{
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pixfmt);
const int high = desc->comp[0].depth > 8;
return high ? "rgba16f" : "rgba8";
}
typedef struct ImageViewCtx {
VkImageView view;
} ImageViewCtx;
static void destroy_imageview(void *opaque, uint8_t *data)
{
FFVulkanContext *s = opaque;
FFVulkanFunctions *vk = &s->vkfn;
ImageViewCtx *iv = (ImageViewCtx *)data;
vk->DestroyImageView(s->hwctx->act_dev, iv->view, s->hwctx->alloc);
av_free(iv);
}
int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e,
VkImageView *v, VkImage img, VkFormat fmt,
const VkComponentMapping map)
{
int err;
AVBufferRef *buf;
FFVulkanFunctions *vk = &s->vkfn;
VkImageViewCreateInfo imgview_spawn = {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = NULL,
.image = img,
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = fmt,
.components = map,
.subresourceRange = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
ImageViewCtx *iv = av_mallocz(sizeof(*iv));
VkResult ret = vk->CreateImageView(s->hwctx->act_dev, &imgview_spawn,
s->hwctx->alloc, &iv->view);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageview, s, 0);
if (!buf) {
destroy_imageview(s, (uint8_t *)iv);
return AVERROR(ENOMEM);
}
/* Add to queue dependencies */
err = ff_vk_add_dep_exec_ctx(s, e, &buf, 1);
if (err) {
av_buffer_unref(&buf);
return err;
}
*v = iv->view;
return 0;
}
FN_CREATING(FFVulkanPipeline, FFVkSPIRVShader, shader, shaders, shaders_num)
FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name,
VkShaderStageFlags stage)
{
FFVkSPIRVShader *shd = create_shader(pl);
if (!shd)
return NULL;
av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED);
shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
shd->shader.stage = stage;
shd->name = name;
GLSLF(0, #version %i ,460);
GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) );
GLSLC(0, );
return shd;
}
void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3])
{
shd->local_size[0] = local_size[0];
shd->local_size[1] = local_size[1];
shd->local_size[2] = local_size[2];
av_bprintf(&shd->src, "layout (local_size_x = %i, "
"local_size_y = %i, local_size_z = %i) in;\n\n",
shd->local_size[0], shd->local_size[1], shd->local_size[2]);
}
void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio)
{
int line = 0;
const char *p = shd->src.str;
const char *start = p;
const size_t len = strlen(p);
AVBPrint buf;
av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
for (int i = 0; i < len; i++) {
if (p[i] == '\n') {
av_bprintf(&buf, "%i\t", ++line);
av_bprint_append_data(&buf, start, &p[i] - start + 1);
start = &p[i + 1];
}
}
av_log(ctx, prio, "Shader %s: \n%s", shd->name, buf.str);
av_bprint_finalize(&buf, NULL);
}
int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd,
const char *entrypoint)
{
int err;
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
VkShaderModuleCreateInfo shader_create;
uint8_t *spirv;
size_t spirv_size;
void *priv;
shd->shader.pName = entrypoint;
if (!s->spirv_compiler) {
#if CONFIG_LIBGLSLANG
s->spirv_compiler = ff_vk_glslang_init();
#elif CONFIG_LIBSHADERC
s->spirv_compiler = ff_vk_shaderc_init();
#else
return AVERROR(ENOSYS);
#endif
if (!s->spirv_compiler)
return AVERROR(ENOMEM);
}
err = s->spirv_compiler->compile_shader(s->spirv_compiler, s, shd, &spirv,
&spirv_size, entrypoint, &priv);
if (err < 0)
return err;
av_log(s, AV_LOG_VERBOSE, "Shader %s compiled! Size: %zu bytes\n",
shd->name, spirv_size);
shader_create.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
shader_create.pNext = NULL;
shader_create.codeSize = spirv_size;
shader_create.flags = 0;
shader_create.pCode = (void *)spirv;
ret = vk->CreateShaderModule(s->hwctx->act_dev, &shader_create, NULL,
&shd->shader.module);
s->spirv_compiler->free_shader(s->spirv_compiler, &priv);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Unable to create shader module: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
return 0;
}
static const struct descriptor_props {
size_t struct_size; /* Size of the opaque which updates the descriptor */
const char *type;
int is_uniform;
int mem_quali; /* Can use a memory qualifier */
int dim_needed; /* Must indicate dimension */
int buf_content; /* Must indicate buffer contents */
} descriptor_props[] = {
[VK_DESCRIPTOR_TYPE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 0, 0, },
[VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE] = { sizeof(VkDescriptorImageInfo), "texture", 1, 0, 1, 0, },
[VK_DESCRIPTOR_TYPE_STORAGE_IMAGE] = { sizeof(VkDescriptorImageInfo), "image", 1, 1, 1, 0, },
[VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT] = { sizeof(VkDescriptorImageInfo), "subpassInput", 1, 0, 0, 0, },
[VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 1, 0, },
[VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, },
[VK_DESCRIPTOR_TYPE_STORAGE_BUFFER] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, },
[VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, },
[VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, },
[VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER] = { sizeof(VkBufferView), "samplerBuffer", 1, 0, 0, 0, },
[VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, },
};
int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc,
int num, int only_print_to_shader)
{
VkResult ret;
VkDescriptorSetLayout *layout;
FFVulkanFunctions *vk = &s->vkfn;
if (only_print_to_shader)
goto print;
pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout),
pl->desc_layout_num + pl->qf->nb_queues);
if (!pl->desc_layout)
return AVERROR(ENOMEM);
pl->desc_set_initialized = av_realloc_array(pl->desc_set_initialized,
sizeof(*pl->desc_set_initialized),
pl->descriptor_sets_num + 1);
if (!pl->desc_set_initialized)
return AVERROR(ENOMEM);
pl->desc_set_initialized[pl->descriptor_sets_num] = 0;
layout = &pl->desc_layout[pl->desc_layout_num];
{ /* Create descriptor set layout descriptions */
VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 };
VkDescriptorSetLayoutBinding *desc_binding;
desc_binding = av_mallocz(sizeof(*desc_binding)*num);
if (!desc_binding)
return AVERROR(ENOMEM);
for (int i = 0; i < num; i++) {
desc_binding[i].binding = i;
desc_binding[i].descriptorType = desc[i].type;
desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1);
desc_binding[i].stageFlags = desc[i].stages;
desc_binding[i].pImmutableSamplers = desc[i].sampler ?
desc[i].sampler->sampler :
NULL;
}
desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
desc_create_layout.pBindings = desc_binding;
desc_create_layout.bindingCount = num;
for (int i = 0; i < pl->qf->nb_queues; i++) {
ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
s->hwctx->alloc, &layout[i]);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Unable to init descriptor set "
"layout: %s\n", ff_vk_ret2str(ret));
av_free(desc_binding);
return AVERROR_EXTERNAL;
}
}
av_free(desc_binding);
}
{ /* Pool each descriptor by type and update pool counts */
for (int i = 0; i < num; i++) {
int j;
for (j = 0; j < pl->pool_size_desc_num; j++)
if (pl->pool_size_desc[j].type == desc[i].type)
break;
if (j >= pl->pool_size_desc_num) {
pl->pool_size_desc = av_realloc_array(pl->pool_size_desc,
sizeof(*pl->pool_size_desc),
++pl->pool_size_desc_num);
if (!pl->pool_size_desc)
return AVERROR(ENOMEM);
memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize));
}
pl->pool_size_desc[j].type = desc[i].type;
pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1)*pl->qf->nb_queues;
}
}
{ /* Create template creation struct */
VkDescriptorUpdateTemplateCreateInfo *dt;
VkDescriptorUpdateTemplateEntry *des_entries;
/* Freed after descriptor set initialization */
des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry));
if (!des_entries)
return AVERROR(ENOMEM);
for (int i = 0; i < num; i++) {
des_entries[i].dstBinding = i;
des_entries[i].descriptorType = desc[i].type;
des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1);
des_entries[i].dstArrayElement = 0;
des_entries[i].offset = ((uint8_t *)desc[i].updater) - (uint8_t *)s;
des_entries[i].stride = descriptor_props[desc[i].type].struct_size;
}
pl->desc_template_info = av_realloc_array(pl->desc_template_info,
sizeof(*pl->desc_template_info),
pl->total_descriptor_sets + pl->qf->nb_queues);
if (!pl->desc_template_info)
return AVERROR(ENOMEM);
dt = &pl->desc_template_info[pl->total_descriptor_sets];
memset(dt, 0, sizeof(*dt)*pl->qf->nb_queues);
for (int i = 0; i < pl->qf->nb_queues; i++) {
dt[i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
dt[i].templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
dt[i].descriptorSetLayout = layout[i];
dt[i].pDescriptorUpdateEntries = des_entries;
dt[i].descriptorUpdateEntryCount = num;
}
}
pl->descriptor_sets_num++;
pl->desc_layout_num += pl->qf->nb_queues;
pl->total_descriptor_sets += pl->qf->nb_queues;
print:
/* Write shader info */
for (int i = 0; i < num; i++) {
const struct descriptor_props *prop = &descriptor_props[desc[i].type];
GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i);
if (desc[i].mem_layout)
GLSLA(", %s", desc[i].mem_layout);
GLSLA(")");
if (prop->is_uniform)
GLSLA(" uniform");
if (prop->mem_quali && desc[i].mem_quali)
GLSLA(" %s", desc[i].mem_quali);
if (prop->type)
GLSLA(" %s", prop->type);
if (prop->dim_needed)
GLSLA("%iD", desc[i].dimensions);
GLSLA(" %s", desc[i].name);
if (prop->buf_content)
GLSLA(" {\n %s\n}", desc[i].buf_content);
else if (desc[i].elems > 0)
GLSLA("[%i]", desc[i].elems);
GLSLA(";\n");
}
GLSLA("\n");
return 0;
}
void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
int set_id)
{
FFVulkanFunctions *vk = &s->vkfn;
/* If a set has never been updated, update all queues' sets. */
if (!pl->desc_set_initialized[set_id]) {
for (int i = 0; i < pl->qf->nb_queues; i++) {
int idx = set_id*pl->qf->nb_queues + i;
vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
pl->desc_set[idx],
pl->desc_template[idx],
s);
}
pl->desc_set_initialized[set_id] = 1;
return;
}
set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue;
vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
pl->desc_set[set_id],
pl->desc_template[set_id],
s);
}
void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
VkShaderStageFlagBits stage, int offset,
size_t size, void *src)
{
FFVulkanFunctions *vk = &s->vkfn;
vk->CmdPushConstants(e->bufs[e->qf->cur_queue], e->bound_pl->pipeline_layout,
stage, offset, size, src);
}
int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl)
{
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
pl->desc_staging = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_staging));
if (!pl->desc_staging)
return AVERROR(ENOMEM);
{ /* Init descriptor set pool */
VkDescriptorPoolCreateInfo pool_create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.poolSizeCount = pl->pool_size_desc_num,
.pPoolSizes = pl->pool_size_desc,
.maxSets = pl->total_descriptor_sets,
};
ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info,
s->hwctx->alloc, &pl->desc_pool);
av_freep(&pl->pool_size_desc);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Unable to init descriptor set "
"pool: %s\n", ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
}
{ /* Allocate descriptor sets */
VkDescriptorSetAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.descriptorPool = pl->desc_pool,
.descriptorSetCount = pl->total_descriptor_sets,
.pSetLayouts = pl->desc_layout,
};
pl->desc_set = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_set));
if (!pl->desc_set)
return AVERROR(ENOMEM);
ret = vk->AllocateDescriptorSets(s->hwctx->act_dev, &alloc_info,
pl->desc_set);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
}
{ /* Finally create the pipeline layout */
VkPipelineLayoutCreateInfo spawn_pipeline_layout = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pSetLayouts = (VkDescriptorSetLayout *)pl->desc_staging,
.pushConstantRangeCount = pl->push_consts_num,
.pPushConstantRanges = pl->push_consts,
};
for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues)
pl->desc_staging[spawn_pipeline_layout.setLayoutCount++] = pl->desc_layout[i];
ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout,
s->hwctx->alloc, &pl->pipeline_layout);
av_freep(&pl->push_consts);
pl->push_consts_num = 0;
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
}
{ /* Descriptor template (for tightly packed descriptors) */
VkDescriptorUpdateTemplateCreateInfo *dt;
pl->desc_template = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_template));
if (!pl->desc_template)
return AVERROR(ENOMEM);
/* Create update templates for the descriptor sets */
for (int i = 0; i < pl->total_descriptor_sets; i++) {
dt = &pl->desc_template_info[i];
dt->pipelineLayout = pl->pipeline_layout;
ret = vk->CreateDescriptorUpdateTemplate(s->hwctx->act_dev,
dt, s->hwctx->alloc,
&pl->desc_template[i]);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Unable to init descriptor "
"template: %s\n", ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
}
/* Free the duplicated memory used for the template entries */
for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) {
dt = &pl->desc_template_info[i];
av_free((void *)dt->pDescriptorUpdateEntries);
}
av_freep(&pl->desc_template_info);
}
return 0;
}
FN_CREATING(FFVulkanContext, FFVulkanPipeline, pipeline, pipelines, pipelines_num)
FFVulkanPipeline *ff_vk_create_pipeline(FFVulkanContext *s, FFVkQueueFamilyCtx *qf)
{
FFVulkanPipeline *pl = create_pipeline(s);
if (pl)
pl->qf = qf;
return pl;
}
int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
{
int i;
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
VkComputePipelineCreateInfo pipe = {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.layout = pl->pipeline_layout,
};
for (i = 0; i < pl->shaders_num; i++) {
if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) {
pipe.stage = pl->shaders[i]->shader;
break;
}
}
if (i == pl->shaders_num) {
av_log(s, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n");
return AVERROR(EINVAL);
}
ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe,
s->hwctx->alloc, &pl->pipeline);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
pl->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
return 0;
}
void ff_vk_bind_pipeline_exec(FFVulkanContext *s, FFVkExecContext *e,
FFVulkanPipeline *pl)
{
FFVulkanFunctions *vk = &s->vkfn;
vk->CmdBindPipeline(e->bufs[e->qf->cur_queue], pl->bind_point, pl->pipeline);
for (int i = 0; i < pl->descriptor_sets_num; i++)
pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue];
vk->CmdBindDescriptorSets(e->bufs[e->qf->cur_queue], pl->bind_point,
pl->pipeline_layout, 0,
pl->descriptor_sets_num,
(VkDescriptorSet *)pl->desc_staging,
0, NULL);
e->bound_pl = pl;
}
static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
{
FFVulkanFunctions *vk = &s->vkfn;
/* Make sure all queues have finished executing */
for (int i = 0; i < e->qf->nb_queues; i++) {
FFVkQueueCtx *q = &e->queues[i];
if (q->fence) {
vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
}
/* Free the fence */
if (q->fence)
vk->DestroyFence(s->hwctx->act_dev, q->fence, s->hwctx->alloc);
/* Free buffer dependencies */
for (int j = 0; j < q->nb_buf_deps; j++)
av_buffer_unref(&q->buf_deps[j]);
av_free(q->buf_deps);
/* Free frame dependencies */
for (int j = 0; j < q->nb_frame_deps; j++)
av_frame_free(&q->frame_deps[j]);
av_free(q->frame_deps);
}
if (e->bufs)
vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs);
if (e->pool)
vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
if (e->query.pool)
vk->DestroyQueryPool(s->hwctx->act_dev, e->query.pool, s->hwctx->alloc);
av_freep(&e->query.data);
av_freep(&e->bufs);
av_freep(&e->queues);
av_freep(&e->sem_sig);
av_freep(&e->sem_sig_val);
av_freep(&e->sem_sig_val_dst);
av_freep(&e->sem_wait);
av_freep(&e->sem_wait_dst);
av_freep(&e->sem_wait_val);
av_free(e);
}
static void free_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
{
FFVulkanFunctions *vk = &s->vkfn;
for (int i = 0; i < pl->shaders_num; i++) {
FFVkSPIRVShader *shd = pl->shaders[i];
av_bprint_finalize(&shd->src, NULL);
vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module,
s->hwctx->alloc);
av_free(shd);
}
vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc);
vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout,
s->hwctx->alloc);
for (int i = 0; i < pl->desc_layout_num; i++) {
if (pl->desc_template && pl->desc_template[i])
vk->DestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i],
s->hwctx->alloc);
if (pl->desc_layout && pl->desc_layout[i])
vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, pl->desc_layout[i],
s->hwctx->alloc);
}
/* Also frees the descriptor sets */
if (pl->desc_pool)
vk->DestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool,
s->hwctx->alloc);
av_freep(&pl->desc_staging);
av_freep(&pl->desc_set);
av_freep(&pl->shaders);
av_freep(&pl->desc_layout);
av_freep(&pl->desc_template);
av_freep(&pl->desc_set_initialized);
av_freep(&pl->push_consts);
pl->push_consts_num = 0;
/* Only freed in case of failure */
av_freep(&pl->pool_size_desc);
if (pl->desc_template_info) {
for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) {
VkDescriptorUpdateTemplateCreateInfo *dt = &pl->desc_template_info[i];
av_free((void *)dt->pDescriptorUpdateEntries);
}
av_freep(&pl->desc_template_info);
}
av_free(pl);
}
void ff_vk_uninit(FFVulkanContext *s)
{
FFVulkanFunctions *vk = &s->vkfn;
av_freep(&s->query_props);
av_freep(&s->qf_props);
av_freep(&s->video_props);
if (s->spirv_compiler)
s->spirv_compiler->uninit(&s->spirv_compiler);
for (int i = 0; i < s->exec_ctx_num; i++)
free_exec_ctx(s, s->exec_ctx[i]);
av_freep(&s->exec_ctx);
for (int i = 0; i < s->samplers_num; i++) {
vk->DestroySampler(s->hwctx->act_dev, s->samplers[i]->sampler[0],
s->hwctx->alloc);
av_free(s->samplers[i]);
}
av_freep(&s->samplers);
for (int i = 0; i < s->pipelines_num; i++)
free_pipeline(s, s->pipelines[i]);
av_freep(&s->pipelines);
av_freep(&s->scratch);
s->scratch_size = 0;
av_buffer_unref(&s->device_ref);
av_buffer_unref(&s->frames_ref);
}