vulkan_decode: halve execution pool size

Determined experimentally, on various videos and hardware. On Intel, using less resources in-flight is around 15% faster, with similar results on Nvidia hardware.
2025-08-10 06:10:52 +02:00 · 2023-06-07 02:59:55 +02:00
parent 5f1be341c2
commit 24c4307b80
1 changed files with 3 additions and 2 deletions
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -1105,8 +1105,9 @@ int ff_vk_decode_init(AVCodecContext *avctx)
    session_create.pVideoProfile = &prof->profile_list.pProfiles[0];
    /* Create decode exec context.
-     * 4 async contexts per thread seems like a good number. */
+     * 2 async contexts per thread was experimentally determined to be optimal
-    err = ff_vk_exec_pool_init(s, &qf_dec, &ctx->exec_pool, 4*avctx->thread_count,
+     * for a majority of streams. */
    err = ff_vk_exec_pool_init(s, &qf_dec, &ctx->exec_pool, 2*avctx->thread_count,
                               nb_q, VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR, 0,
                               session_create.pVideoProfile);
    if (err < 0)