mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-03 05:10:03 +02:00
hwcontext_vulkan: support threadsafe queue and frame operations
This commit is contained in:
parent
2a1fd2814f
commit
46a77c6496
@ -27,6 +27,7 @@
|
|||||||
#include <dlfcn.h>
|
#include <dlfcn.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "thread.h"
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
@ -92,8 +93,10 @@ typedef struct VulkanDevicePriv {
|
|||||||
VkPhysicalDeviceVulkan13Features device_features_1_3;
|
VkPhysicalDeviceVulkan13Features device_features_1_3;
|
||||||
|
|
||||||
/* Queues */
|
/* Queues */
|
||||||
uint32_t qfs[5];
|
pthread_mutex_t **qf_mutex;
|
||||||
int num_qfs;
|
uint32_t nb_tot_qfs;
|
||||||
|
uint32_t img_qfs[5];
|
||||||
|
uint32_t nb_img_qfs;
|
||||||
|
|
||||||
/* Debug callback */
|
/* Debug callback */
|
||||||
VkDebugUtilsMessengerEXT debug_ctx;
|
VkDebugUtilsMessengerEXT debug_ctx;
|
||||||
@ -127,6 +130,8 @@ typedef struct VulkanFramesPriv {
|
|||||||
} VulkanFramesPriv;
|
} VulkanFramesPriv;
|
||||||
|
|
||||||
typedef struct AVVkFrameInternal {
|
typedef struct AVVkFrameInternal {
|
||||||
|
pthread_mutex_t update_mutex;
|
||||||
|
|
||||||
#if CONFIG_CUDA
|
#if CONFIG_CUDA
|
||||||
/* Importing external memory into cuda is really expensive so we keep the
|
/* Importing external memory into cuda is really expensive so we keep the
|
||||||
* memory imported all the time */
|
* memory imported all the time */
|
||||||
@ -1305,6 +1310,12 @@ static void vulkan_device_free(AVHWDeviceContext *ctx)
|
|||||||
if (p->libvulkan)
|
if (p->libvulkan)
|
||||||
dlclose(p->libvulkan);
|
dlclose(p->libvulkan);
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < p->nb_tot_qfs; i++) {
|
||||||
|
pthread_mutex_destroy(p->qf_mutex[i]);
|
||||||
|
av_freep(&p->qf_mutex[i]);
|
||||||
|
}
|
||||||
|
av_freep(&p->qf_mutex);
|
||||||
|
|
||||||
RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions);
|
RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions);
|
||||||
RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions);
|
RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions);
|
||||||
}
|
}
|
||||||
@ -1437,13 +1448,26 @@ end:
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void lock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index)
|
||||||
|
{
|
||||||
|
VulkanDevicePriv *p = ctx->internal->priv;
|
||||||
|
pthread_mutex_lock(&p->qf_mutex[queue_family][index]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void unlock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index)
|
||||||
|
{
|
||||||
|
VulkanDevicePriv *p = ctx->internal->priv;
|
||||||
|
pthread_mutex_unlock(&p->qf_mutex[queue_family][index]);
|
||||||
|
}
|
||||||
|
|
||||||
static int vulkan_device_init(AVHWDeviceContext *ctx)
|
static int vulkan_device_init(AVHWDeviceContext *ctx)
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
uint32_t queue_num;
|
uint32_t qf_num;
|
||||||
AVVulkanDeviceContext *hwctx = ctx->hwctx;
|
AVVulkanDeviceContext *hwctx = ctx->hwctx;
|
||||||
VulkanDevicePriv *p = ctx->internal->priv;
|
VulkanDevicePriv *p = ctx->internal->priv;
|
||||||
FFVulkanFunctions *vk = &p->vkfn;
|
FFVulkanFunctions *vk = &p->vkfn;
|
||||||
|
VkQueueFamilyProperties *qf;
|
||||||
int graph_index, comp_index, tx_index, enc_index, dec_index;
|
int graph_index, comp_index, tx_index, enc_index, dec_index;
|
||||||
|
|
||||||
/* Set device extension flags */
|
/* Set device extension flags */
|
||||||
@ -1482,12 +1506,37 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
|
|||||||
p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
|
p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
|
||||||
p->dev_is_intel = (p->props.properties.vendorID == 0x8086);
|
p->dev_is_intel = (p->props.properties.vendorID == 0x8086);
|
||||||
|
|
||||||
vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
|
vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL);
|
||||||
if (!queue_num) {
|
if (!qf_num) {
|
||||||
av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
|
av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
|
||||||
return AVERROR_EXTERNAL;
|
return AVERROR_EXTERNAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties));
|
||||||
|
if (!qf)
|
||||||
|
return AVERROR(ENOMEM);
|
||||||
|
|
||||||
|
vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, qf);
|
||||||
|
|
||||||
|
p->qf_mutex = av_calloc(qf_num, sizeof(*p->qf_mutex));
|
||||||
|
if (!p->qf_mutex)
|
||||||
|
return AVERROR(ENOMEM);
|
||||||
|
p->nb_tot_qfs = qf_num;
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < qf_num; i++) {
|
||||||
|
p->qf_mutex[i] = av_calloc(qf[i].queueCount, sizeof(**p->qf_mutex));
|
||||||
|
if (!p->qf_mutex[i])
|
||||||
|
return AVERROR(ENOMEM);
|
||||||
|
for (uint32_t j = 0; j < qf[i].queueCount; j++) {
|
||||||
|
err = pthread_mutex_init(&p->qf_mutex[i][j], NULL);
|
||||||
|
if (err != 0) {
|
||||||
|
av_log(ctx, AV_LOG_ERROR, "pthread_mutex_init failed : %s\n",
|
||||||
|
av_err2str(err));
|
||||||
|
return AVERROR(err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
graph_index = hwctx->queue_family_index;
|
graph_index = hwctx->queue_family_index;
|
||||||
comp_index = hwctx->queue_family_comp_index;
|
comp_index = hwctx->queue_family_comp_index;
|
||||||
tx_index = hwctx->queue_family_tx_index;
|
tx_index = hwctx->queue_family_tx_index;
|
||||||
@ -1502,9 +1551,9 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
|
|||||||
return AVERROR(EINVAL); \
|
return AVERROR(EINVAL); \
|
||||||
} else if (fidx < 0 || ctx_qf < 0) { \
|
} else if (fidx < 0 || ctx_qf < 0) { \
|
||||||
break; \
|
break; \
|
||||||
} else if (ctx_qf >= queue_num) { \
|
} else if (ctx_qf >= qf_num) { \
|
||||||
av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
|
av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
|
||||||
type, ctx_qf, queue_num); \
|
type, ctx_qf, qf_num); \
|
||||||
return AVERROR(EINVAL); \
|
return AVERROR(EINVAL); \
|
||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
@ -1521,7 +1570,7 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
|
|||||||
tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \
|
tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \
|
||||||
enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \
|
enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \
|
||||||
dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \
|
dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \
|
||||||
p->qfs[p->num_qfs++] = ctx_qf; \
|
p->img_qfs[p->nb_img_qfs++] = ctx_qf; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index, hwctx->nb_graphics_queues);
|
CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index, hwctx->nb_graphics_queues);
|
||||||
@ -1532,6 +1581,11 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
|
|||||||
|
|
||||||
#undef CHECK_QUEUE
|
#undef CHECK_QUEUE
|
||||||
|
|
||||||
|
if (!hwctx->lock_queue)
|
||||||
|
hwctx->lock_queue = lock_queue;
|
||||||
|
if (!hwctx->unlock_queue)
|
||||||
|
hwctx->unlock_queue = unlock_queue;
|
||||||
|
|
||||||
/* Get device capabilities */
|
/* Get device capabilities */
|
||||||
vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
|
vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
|
||||||
|
|
||||||
@ -1733,9 +1787,6 @@ static void vulkan_free_internal(AVVkFrame *f)
|
|||||||
{
|
{
|
||||||
AVVkFrameInternal *internal = f->internal;
|
AVVkFrameInternal *internal = f->internal;
|
||||||
|
|
||||||
if (!internal)
|
|
||||||
return;
|
|
||||||
|
|
||||||
#if CONFIG_CUDA
|
#if CONFIG_CUDA
|
||||||
if (internal->cuda_fc_ref) {
|
if (internal->cuda_fc_ref) {
|
||||||
AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
|
AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
|
||||||
@ -1764,6 +1815,7 @@ static void vulkan_free_internal(AVVkFrame *f)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
pthread_mutex_destroy(&internal->update_mutex);
|
||||||
av_freep(&f->internal);
|
av_freep(&f->internal);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1924,9 +1976,11 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
|
|||||||
uint32_t src_qf, dst_qf;
|
uint32_t src_qf, dst_qf;
|
||||||
VkImageLayout new_layout;
|
VkImageLayout new_layout;
|
||||||
VkAccessFlags new_access;
|
VkAccessFlags new_access;
|
||||||
|
AVVulkanFramesContext *vkfc = hwfc->hwctx;
|
||||||
const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
|
const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
|
||||||
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
|
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
|
||||||
FFVulkanFunctions *vk = &p->vkfn;
|
FFVulkanFunctions *vk = &p->vkfn;
|
||||||
|
AVFrame tmp = { .data[0] = (uint8_t *)frame };
|
||||||
uint64_t sem_sig_val[AV_NUM_DATA_POINTERS];
|
uint64_t sem_sig_val[AV_NUM_DATA_POINTERS];
|
||||||
|
|
||||||
VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
|
VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
|
||||||
@ -1945,6 +1999,12 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
|
|||||||
};
|
};
|
||||||
|
|
||||||
VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS];
|
VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS];
|
||||||
|
|
||||||
|
if ((err = wait_start_exec_ctx(hwfc, ectx)))
|
||||||
|
return err;
|
||||||
|
|
||||||
|
vkfc->lock_frame(hwfc, frame);
|
||||||
|
|
||||||
for (int i = 0; i < planes; i++) {
|
for (int i = 0; i < planes; i++) {
|
||||||
wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
|
wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
|
||||||
sem_sig_val[i] = frame->sem_value[i] + 1;
|
sem_sig_val[i] = frame->sem_value[i] + 1;
|
||||||
@ -1981,9 +2041,6 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((err = wait_start_exec_ctx(hwfc, ectx)))
|
|
||||||
return err;
|
|
||||||
|
|
||||||
/* Change the image layout to something more optimal for writes.
|
/* Change the image layout to something more optimal for writes.
|
||||||
* This also signals the newly created semaphore, making it usable
|
* This also signals the newly created semaphore, making it usable
|
||||||
* for synchronization */
|
* for synchronization */
|
||||||
@ -2009,7 +2066,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
|
|||||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
0, 0, NULL, 0, NULL, planes, img_bar);
|
0, 0, NULL, 0, NULL, planes, img_bar);
|
||||||
|
|
||||||
return submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
|
err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
|
||||||
|
vkfc->unlock_frame(hwfc, frame);
|
||||||
|
|
||||||
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format,
|
static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format,
|
||||||
@ -2091,10 +2151,10 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
|
|||||||
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
||||||
.usage = usage,
|
.usage = usage,
|
||||||
.samples = VK_SAMPLE_COUNT_1_BIT,
|
.samples = VK_SAMPLE_COUNT_1_BIT,
|
||||||
.pQueueFamilyIndices = p->qfs,
|
.pQueueFamilyIndices = p->img_qfs,
|
||||||
.queueFamilyIndexCount = p->num_qfs,
|
.queueFamilyIndexCount = p->nb_img_qfs,
|
||||||
.sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
|
.sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
|
||||||
VK_SHARING_MODE_EXCLUSIVE,
|
VK_SHARING_MODE_EXCLUSIVE,
|
||||||
};
|
};
|
||||||
|
|
||||||
get_plane_wh(&create_info.extent.width, &create_info.extent.height,
|
get_plane_wh(&create_info.extent.width, &create_info.extent.height,
|
||||||
@ -2118,6 +2178,7 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
|
|||||||
return AVERROR_EXTERNAL;
|
return AVERROR_EXTERNAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0];
|
||||||
f->layout[i] = create_info.initialLayout;
|
f->layout[i] = create_info.initialLayout;
|
||||||
f->access[i] = 0x0;
|
f->access[i] = 0x0;
|
||||||
f->sem_value[i] = 0;
|
f->sem_value[i] = 0;
|
||||||
@ -2162,10 +2223,10 @@ static void try_export_flags(AVHWFramesContext *hwfc,
|
|||||||
VkPhysicalDeviceImageDrmFormatModifierInfoEXT phy_dev_mod_info = {
|
VkPhysicalDeviceImageDrmFormatModifierInfoEXT phy_dev_mod_info = {
|
||||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
|
||||||
.pNext = NULL,
|
.pNext = NULL,
|
||||||
.pQueueFamilyIndices = p->qfs,
|
.pQueueFamilyIndices = p->img_qfs,
|
||||||
.queueFamilyIndexCount = p->num_qfs,
|
.queueFamilyIndexCount = p->nb_img_qfs,
|
||||||
.sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
|
.sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
|
||||||
VK_SHARING_MODE_EXCLUSIVE,
|
VK_SHARING_MODE_EXCLUSIVE,
|
||||||
};
|
};
|
||||||
VkPhysicalDeviceExternalImageFormatInfo enext = {
|
VkPhysicalDeviceExternalImageFormatInfo enext = {
|
||||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
|
||||||
@ -2260,6 +2321,16 @@ fail:
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void lock_frame(AVHWFramesContext *fc, AVVkFrame *vkf)
|
||||||
|
{
|
||||||
|
pthread_mutex_lock(&vkf->internal->update_mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void unlock_frame(AVHWFramesContext *fc, AVVkFrame *vkf)
|
||||||
|
{
|
||||||
|
pthread_mutex_unlock(&vkf->internal->update_mutex);
|
||||||
|
}
|
||||||
|
|
||||||
static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
|
static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
|
||||||
{
|
{
|
||||||
VulkanFramesPriv *fp = hwfc->internal->priv;
|
VulkanFramesPriv *fp = hwfc->internal->priv;
|
||||||
@ -2422,6 +2493,11 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
|
|||||||
return AVERROR(ENOMEM);
|
return AVERROR(ENOMEM);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!hwctx->lock_frame)
|
||||||
|
hwctx->lock_frame = lock_frame;
|
||||||
|
if (!hwctx->unlock_frame)
|
||||||
|
hwctx->unlock_frame = unlock_frame;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2728,10 +2804,10 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
|
|||||||
.usage = VK_IMAGE_USAGE_SAMPLED_BIT |
|
.usage = VK_IMAGE_USAGE_SAMPLED_BIT |
|
||||||
VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
|
VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
|
||||||
.samples = VK_SAMPLE_COUNT_1_BIT,
|
.samples = VK_SAMPLE_COUNT_1_BIT,
|
||||||
.pQueueFamilyIndices = p->qfs,
|
.pQueueFamilyIndices = p->img_qfs,
|
||||||
.queueFamilyIndexCount = p->num_qfs,
|
.queueFamilyIndexCount = p->nb_img_qfs,
|
||||||
.sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
|
.sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
|
||||||
VK_SHARING_MODE_EXCLUSIVE,
|
VK_SHARING_MODE_EXCLUSIVE,
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Image format verification */
|
/* Image format verification */
|
||||||
@ -2810,6 +2886,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
|
|||||||
* offer us anything we could import and sync with, so instead
|
* offer us anything we could import and sync with, so instead
|
||||||
* just signal the semaphore we created. */
|
* just signal the semaphore we created. */
|
||||||
|
|
||||||
|
f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0];
|
||||||
f->layout[i] = create_info.initialLayout;
|
f->layout[i] = create_info.initialLayout;
|
||||||
f->access[i] = 0x0;
|
f->access[i] = 0x0;
|
||||||
f->sem_value[i] = 0;
|
f->sem_value[i] = 0;
|
||||||
@ -3018,20 +3095,12 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
|
|||||||
CU_AD_FORMAT_UNSIGNED_INT8;
|
CU_AD_FORMAT_UNSIGNED_INT8;
|
||||||
|
|
||||||
dst_f = (AVVkFrame *)frame->data[0];
|
dst_f = (AVVkFrame *)frame->data[0];
|
||||||
|
|
||||||
dst_int = dst_f->internal;
|
dst_int = dst_f->internal;
|
||||||
if (!dst_int || !dst_int->cuda_fc_ref) {
|
|
||||||
if (!dst_f->internal)
|
|
||||||
dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal));
|
|
||||||
|
|
||||||
if (!dst_int)
|
|
||||||
return AVERROR(ENOMEM);
|
|
||||||
|
|
||||||
|
if (!dst_int->cuda_fc_ref) {
|
||||||
dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc);
|
dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc);
|
||||||
if (!dst_int->cuda_fc_ref) {
|
if (!dst_int->cuda_fc_ref)
|
||||||
av_freep(&dst_f->internal);
|
|
||||||
return AVERROR(ENOMEM);
|
return AVERROR(ENOMEM);
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < planes; i++) {
|
for (int i = 0; i < planes; i++) {
|
||||||
CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
|
CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
|
||||||
@ -3705,13 +3774,14 @@ static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs,
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
|
static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
|
||||||
AVBufferRef **bufs, size_t *buf_offsets,
|
AVBufferRef **bufs, size_t *buf_offsets,
|
||||||
const int *buf_stride, int w,
|
const int *buf_stride, int w,
|
||||||
int h, enum AVPixelFormat pix_fmt, int to_buf)
|
int h, enum AVPixelFormat pix_fmt, int to_buf)
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
AVVkFrame *frame = (AVVkFrame *)f->data[0];
|
AVVkFrame *frame = (AVVkFrame *)f->data[0];
|
||||||
|
AVVulkanFramesContext *vkfc = hwfc->hwctx;
|
||||||
VulkanFramesPriv *fp = hwfc->internal->priv;
|
VulkanFramesPriv *fp = hwfc->internal->priv;
|
||||||
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
|
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
|
||||||
FFVulkanFunctions *vk = &p->vkfn;
|
FFVulkanFunctions *vk = &p->vkfn;
|
||||||
@ -3746,11 +3816,13 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
|
|||||||
.waitSemaphoreCount = planes,
|
.waitSemaphoreCount = planes,
|
||||||
};
|
};
|
||||||
|
|
||||||
for (int i = 0; i < planes; i++)
|
vkfc->lock_frame(hwfc, frame);
|
||||||
sem_signal_values[i] = frame->sem_value[i] + 1;
|
|
||||||
|
|
||||||
if ((err = wait_start_exec_ctx(hwfc, ectx)))
|
if ((err = wait_start_exec_ctx(hwfc, ectx)))
|
||||||
return err;
|
goto end;
|
||||||
|
|
||||||
|
for (int i = 0; i < planes; i++)
|
||||||
|
sem_signal_values[i] = frame->sem_value[i] + 1;
|
||||||
|
|
||||||
/* Change the image layout to something more optimal for transfers */
|
/* Change the image layout to something more optimal for transfers */
|
||||||
for (int i = 0; i < planes; i++) {
|
for (int i = 0; i < planes; i++) {
|
||||||
@ -3825,14 +3897,18 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
|
|||||||
if (!f->buf[ref])
|
if (!f->buf[ref])
|
||||||
break;
|
break;
|
||||||
if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1)))
|
if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1)))
|
||||||
return err;
|
goto end;
|
||||||
}
|
}
|
||||||
if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes)))
|
if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes)))
|
||||||
return err;
|
goto end;
|
||||||
return submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref);
|
err = submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref);
|
||||||
} else {
|
} else {
|
||||||
return submit_exec_ctx(hwfc, ectx, &s_info, frame, 1);
|
err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
end:
|
||||||
|
vkfc->unlock_frame(hwfc, frame);
|
||||||
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
|
static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
|
||||||
@ -3961,8 +4037,9 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Copy buffers into/from image */
|
/* Copy buffers into/from image */
|
||||||
err = transfer_image_buf(hwfc, vkf, bufs, buf_offsets, tmp.linesize,
|
err = transfer_image_buf(hwfc, (AVFrame *)vkf, bufs, buf_offsets,
|
||||||
swf->width, swf->height, swf->format, from);
|
tmp.linesize, swf->width, swf->height, swf->format,
|
||||||
|
from);
|
||||||
|
|
||||||
if (from) {
|
if (from) {
|
||||||
/* Map, copy buffer (which came FROM the VkImage) to the frame, unmap */
|
/* Map, copy buffer (which came FROM the VkImage) to the frame, unmap */
|
||||||
@ -4143,7 +4220,25 @@ static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc,
|
|||||||
|
|
||||||
AVVkFrame *av_vk_frame_alloc(void)
|
AVVkFrame *av_vk_frame_alloc(void)
|
||||||
{
|
{
|
||||||
return av_mallocz(sizeof(AVVkFrame));
|
int err;
|
||||||
|
AVVkFrame *f = av_mallocz(sizeof(AVVkFrame));
|
||||||
|
if (!f)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
f->internal = av_mallocz(sizeof(*f->internal));
|
||||||
|
if (!f->internal) {
|
||||||
|
av_free(f);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = pthread_mutex_init(&f->internal->update_mutex, NULL);
|
||||||
|
if (err != 0) {
|
||||||
|
av_free(f->internal);
|
||||||
|
av_free(f);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return f;
|
||||||
}
|
}
|
||||||
|
|
||||||
const HWContextType ff_hwcontext_type_vulkan = {
|
const HWContextType ff_hwcontext_type_vulkan = {
|
||||||
|
@ -27,6 +27,8 @@
|
|||||||
#include "pixfmt.h"
|
#include "pixfmt.h"
|
||||||
#include "frame.h"
|
#include "frame.h"
|
||||||
|
|
||||||
|
typedef struct AVVkFrame AVVkFrame;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @file
|
* @file
|
||||||
* API-specific header for AV_HWDEVICE_TYPE_VULKAN.
|
* API-specific header for AV_HWDEVICE_TYPE_VULKAN.
|
||||||
@ -135,6 +137,19 @@ typedef struct AVVulkanDeviceContext {
|
|||||||
*/
|
*/
|
||||||
int queue_family_decode_index;
|
int queue_family_decode_index;
|
||||||
int nb_decode_queues;
|
int nb_decode_queues;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Locks a queue, preventing other threads from submitting any command
|
||||||
|
* buffers to this queue.
|
||||||
|
* If set to NULL, will be set to lavu-internal functions that utilize a
|
||||||
|
* mutex.
|
||||||
|
*/
|
||||||
|
void (*lock_queue)(struct AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Similar to lock_queue(), unlocks a queue. Must only be called after locking.
|
||||||
|
*/
|
||||||
|
void (*unlock_queue)(struct AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index);
|
||||||
} AVVulkanDeviceContext;
|
} AVVulkanDeviceContext;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -195,6 +210,21 @@ typedef struct AVVulkanFramesContext {
|
|||||||
* av_hwframe_ctx_init().
|
* av_hwframe_ctx_init().
|
||||||
*/
|
*/
|
||||||
AVVkFrameFlags flags;
|
AVVkFrameFlags flags;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Locks a frame, preventing other threads from changing frame properties.
|
||||||
|
* Users SHOULD only ever lock just before command submission in order
|
||||||
|
* to get accurate frame properties, and unlock immediately after command
|
||||||
|
* submission without waiting for it to finish.
|
||||||
|
*
|
||||||
|
* If unset, will be set to lavu-internal functions that utilize a mutex.
|
||||||
|
*/
|
||||||
|
void (*lock_frame)(struct AVHWFramesContext *fc, AVVkFrame *vkf);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Similar to lock_frame(), unlocks a frame. Must only be called after locking.
|
||||||
|
*/
|
||||||
|
void (*unlock_frame)(struct AVHWFramesContext *fc, AVVkFrame *vkf);
|
||||||
} AVVulkanFramesContext;
|
} AVVulkanFramesContext;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -210,7 +240,7 @@ typedef struct AVVulkanFramesContext {
|
|||||||
* @note the size of this structure is not part of the ABI, to allocate
|
* @note the size of this structure is not part of the ABI, to allocate
|
||||||
* you must use @av_vk_frame_alloc().
|
* you must use @av_vk_frame_alloc().
|
||||||
*/
|
*/
|
||||||
typedef struct AVVkFrame {
|
struct AVVkFrame {
|
||||||
/**
|
/**
|
||||||
* Vulkan images to which the memory is bound to.
|
* Vulkan images to which the memory is bound to.
|
||||||
*/
|
*/
|
||||||
@ -264,6 +294,12 @@ typedef struct AVVkFrame {
|
|||||||
* Describes the binding offset of each plane to the VkDeviceMemory.
|
* Describes the binding offset of each plane to the VkDeviceMemory.
|
||||||
*/
|
*/
|
||||||
ptrdiff_t offset[AV_NUM_DATA_POINTERS];
|
ptrdiff_t offset[AV_NUM_DATA_POINTERS];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Queue family of the images. Must be VK_QUEUE_FAMILY_IGNORED if
|
||||||
|
* the image was allocated with the CONCURRENT concurrency option.
|
||||||
|
*/
|
||||||
|
uint32_t queue_family[AV_NUM_DATA_POINTERS];
|
||||||
} AVVkFrame;
|
} AVVkFrame;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Loading…
Reference in New Issue
Block a user