From c06ad641ec36ea0e9011be8d6182612c22f6196b Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 19 Jul 2023 05:39:07 +0200 Subject: [PATCH] lavc/vulkan_decode: use a single execution pool per thread The spec says command buffer pools must be externally synchronized objects. This still lets us pool some, just not as much. --- libavcodec/vulkan_decode.c | 86 ++++++++++++++++++++++++++++---------- libavcodec/vulkan_decode.h | 3 +- 2 files changed, 66 insertions(+), 23 deletions(-) diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c index 973c7ca548..f20733fb39 100644 --- a/libavcodec/vulkan_decode.c +++ b/libavcodec/vulkan_decode.c @@ -42,12 +42,53 @@ static const VkExtensionProperties *dec_ext[] = { #endif }; +static const VkVideoProfileInfoKHR *get_video_profile(FFVulkanDecodeShared *ctx, enum AVCodecID codec_id) +{ + const VkVideoProfileListInfoKHR *profile_list; + + VkStructureType profile_struct_type = + codec_id == AV_CODEC_ID_H264 ? VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PROFILE_INFO_KHR : + codec_id == AV_CODEC_ID_HEVC ? VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR : + codec_id == AV_CODEC_ID_AV1 ? VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PROFILE_INFO_MESA : + 0; + + profile_list = ff_vk_find_struct(ctx->s.hwfc->create_pnext, + VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR); + if (!profile_list) + return NULL; + + for (int i = 0; i < profile_list->profileCount; i++) + if (ff_vk_find_struct(profile_list->pProfiles[i].pNext, profile_struct_type)) + return &profile_list->pProfiles[i]; + + return NULL; +} + int ff_vk_update_thread_context(AVCodecContext *dst, const AVCodecContext *src) { int err; FFVulkanDecodeContext *src_ctx = src->internal->hwaccel_priv_data; FFVulkanDecodeContext *dst_ctx = dst->internal->hwaccel_priv_data; + if (!dst_ctx->exec_pool.cmd_bufs) { + FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)src_ctx->shared_ref->data; + + const VkVideoProfileInfoKHR *profile = get_video_profile(ctx, dst->codec_id); + if (!profile) { + av_log(dst, AV_LOG_ERROR, "Video profile missing from frames context!"); + return AVERROR(EINVAL); + } + + err = ff_vk_exec_pool_init(&ctx->s, &ctx->qf, + &dst_ctx->exec_pool, + src_ctx->exec_pool.pool_size, + src_ctx->exec_pool.nb_queries, + VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR, 0, + profile); + if (err < 0) + return err; + } + err = av_buffer_replace(&dst_ctx->shared_ref, src_ctx->shared_ref); if (err < 0) return err; @@ -271,7 +312,7 @@ void ff_vk_decode_flush(AVCodecContext *avctx) }; VkCommandBuffer cmd_buf; - FFVkExecContext *exec = ff_vk_exec_get(&ctx->exec_pool); + FFVkExecContext *exec = ff_vk_exec_get(&dec->exec_pool); ff_vk_exec_start(&ctx->s, exec); cmd_buf = exec->buf; @@ -317,7 +358,7 @@ int ff_vk_decode_frame(AVCodecContext *avctx, size_t data_size = FFALIGN(vp->slices_size, ctx->caps.minBitstreamBufferSizeAlignment); - FFVkExecContext *exec = ff_vk_exec_get(&ctx->exec_pool); + FFVkExecContext *exec = ff_vk_exec_get(&dec->exec_pool); /* The current decoding reference has to be bound as an inactive reference */ VkVideoReferenceSlotInfoKHR *cur_vk_ref; @@ -326,7 +367,7 @@ int ff_vk_decode_frame(AVCodecContext *avctx, cur_vk_ref[0].slotIndex = -1; decode_start.referenceSlotCount++; - if (ctx->exec_pool.nb_queries) { + if (dec->exec_pool.nb_queries) { int64_t prev_sub_res = 0; ff_vk_exec_wait(&ctx->s, exec); ret = ff_vk_exec_get_query(&ctx->s, exec, NULL, &prev_sub_res); @@ -495,14 +536,14 @@ int ff_vk_decode_frame(AVCodecContext *avctx, vk->CmdBeginVideoCodingKHR(cmd_buf, &decode_start); /* Start status query */ - if (ctx->exec_pool.nb_queries) - vk->CmdBeginQuery(cmd_buf, ctx->exec_pool.query_pool, exec->query_idx + 0, 0); + if (dec->exec_pool.nb_queries) + vk->CmdBeginQuery(cmd_buf, dec->exec_pool.query_pool, exec->query_idx + 0, 0); vk->CmdDecodeVideoKHR(cmd_buf, &vp->decode_info); /* End status query */ - if (ctx->exec_pool.nb_queries) - vk->CmdEndQuery(cmd_buf, ctx->exec_pool.query_pool, exec->query_idx + 0); + if (dec->exec_pool.nb_queries) + vk->CmdEndQuery(cmd_buf, dec->exec_pool.query_pool, exec->query_idx + 0); vk->CmdEndVideoCodingKHR(cmd_buf, &decode_end); @@ -555,9 +596,6 @@ static void free_common(void *opaque, uint8_t *data) FFVulkanContext *s = &ctx->s; FFVulkanFunctions *vk = &ctx->s.vkfn; - /* Wait on and free execution pool */ - ff_vk_exec_pool_free(s, &ctx->exec_pool); - /* Destroy layered view */ if (ctx->layered_view) vk->DestroyImageView(s->hwctx->act_dev, ctx->layered_view, s->hwctx->alloc); @@ -1029,6 +1067,11 @@ void ff_vk_decode_free_params(void *opaque, uint8_t *data) int ff_vk_decode_uninit(AVCodecContext *avctx) { FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)dec->shared_ref->data; + + /* Wait on and free execution pool */ + ff_vk_exec_pool_free(&ctx->s, &dec->exec_pool); + av_buffer_pool_uninit(&dec->tmp_pool); av_buffer_unref(&dec->session_params); av_buffer_unref(&dec->shared_ref); @@ -1044,8 +1087,7 @@ int ff_vk_decode_init(AVCodecContext *avctx) FFVulkanDecodeShared *ctx; FFVulkanContext *s; FFVulkanFunctions *vk; - FFVkQueueFamilyCtx qf_dec; - const VkVideoProfileListInfoKHR *profile_list; + const VkVideoProfileInfoKHR *profile; VkVideoDecodeH264SessionParametersCreateInfoKHR h264_params = { .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR, @@ -1089,10 +1131,9 @@ int ff_vk_decode_init(AVCodecContext *avctx) s->device = (AVHWDeviceContext *)s->frames->device_ref->data; s->hwctx = s->device->hwctx; - profile_list = ff_vk_find_struct(s->hwfc->create_pnext, - VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR); - if (!profile_list) { - av_log(avctx, AV_LOG_ERROR, "Profile list missing from frames context!"); + profile = get_video_profile(ctx, avctx->codec_id); + if (!profile) { + av_log(avctx, AV_LOG_ERROR, "Video profile missing from frames context!"); return AVERROR(EINVAL); } @@ -1101,7 +1142,7 @@ int ff_vk_decode_init(AVCodecContext *avctx) goto fail; /* Create queue context */ - qf = ff_vk_qf_init(s, &qf_dec, VK_QUEUE_VIDEO_DECODE_BIT_KHR); + qf = ff_vk_qf_init(s, &ctx->qf, VK_QUEUE_VIDEO_DECODE_BIT_KHR); /* Check for support */ if (!(s->video_props[qf].videoCodecOperations & @@ -1123,14 +1164,14 @@ int ff_vk_decode_init(AVCodecContext *avctx) session_create.pictureFormat = s->hwfc->format[0]; session_create.referencePictureFormat = session_create.pictureFormat; session_create.pStdHeaderVersion = dec_ext[avctx->codec_id]; - session_create.pVideoProfile = &profile_list->pProfiles[0]; + session_create.pVideoProfile = profile; - /* Create decode exec context. + /* Create decode exec context for this specific main thread. * 2 async contexts per thread was experimentally determined to be optimal * for a majority of streams. */ - err = ff_vk_exec_pool_init(s, &qf_dec, &ctx->exec_pool, 2*avctx->thread_count, + err = ff_vk_exec_pool_init(s, &ctx->qf, &dec->exec_pool, 2, nb_q, VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR, 0, - session_create.pVideoProfile); + profile); if (err < 0) goto fail; @@ -1168,7 +1209,8 @@ int ff_vk_decode_init(AVCodecContext *avctx) dpb_frames->height = s->frames->height; dpb_hwfc = dpb_frames->hwctx; - dpb_hwfc->create_pnext = (void *)profile_list; + dpb_hwfc->create_pnext = (void *)ff_vk_find_struct(ctx->s.hwfc->create_pnext, + VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR); dpb_hwfc->format[0] = s->hwfc->format[0]; dpb_hwfc->tiling = VK_IMAGE_TILING_OPTIMAL; dpb_hwfc->usage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h index 4e45cbde71..1b4e1cc712 100644 --- a/libavcodec/vulkan_decode.h +++ b/libavcodec/vulkan_decode.h @@ -37,7 +37,7 @@ typedef struct FFVulkanDecodeProfileData { typedef struct FFVulkanDecodeShared { FFVulkanContext s; FFVkVideoCommon common; - FFVkExecPool exec_pool; + FFVkQueueFamilyCtx qf; VkVideoCapabilitiesKHR caps; VkVideoDecodeCapabilitiesKHR dec_caps; @@ -56,6 +56,7 @@ typedef struct FFVulkanDecodeShared { typedef struct FFVulkanDecodeContext { AVBufferRef *shared_ref; AVBufferRef *session_params; + FFVkExecPool exec_pool; int dedicated_dpb; /* Oddity #1 - separate DPB images */ int layered_dpb; /* Madness #1 - layered DPB images */