vulkan_decode: support multiple image views

Enables non-monochrome video decoding using all our existing functions in the context of an SDR decoder.
2025-08-04 22:03:09 +02:00 · 2025-02-21 01:33:54 +00:00
parent 491b65e343
commit 4495802bdb
5 changed files with 60 additions and 54 deletions
--- a/libavcodec/vulkan_av1.c
+++ b/libavcodec/vulkan_av1.c
@ -123,7 +123,7 @@ static int vk_av1_fill_pict(AVCodecContext *avctx, const AV1Frame **ref_src,
        .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
        .baseArrayLayer = ((has_grain || dec->dedicated_dpb) && ctx->common.layered_dpb) ?
                          hp->frame_id : 0,
-        .imageViewBinding = vkpic->img_view_ref,
+        .imageViewBinding = vkpic->view.ref[0],
    };
    *ref_slot = (VkVideoReferenceSlotInfoKHR) {
@ -346,7 +346,7 @@ static int vk_av1_start_frame(AVCodecContext          *avctx,
            .codedOffset = (VkOffset2D){ 0, 0 },
            .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
            .baseArrayLayer = 0,
-            .imageViewBinding = vp->img_view_out,
+            .imageViewBinding = vp->view.out[0],
        },
    };
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@ -130,9 +130,11 @@ static void init_frame(FFVulkanDecodeContext *dec, FFVulkanDecodePicture *vkpic)
    FFVulkanFunctions *vk = &ctx->s.vkfn;
    vkpic->dpb_frame     = NULL;
-    vkpic->img_view_ref  = VK_NULL_HANDLE;
+    for (int i = 0; i < AV_NUM_DATA_POINTERS; i++) {
-    vkpic->img_view_out  = VK_NULL_HANDLE;
+        vkpic->view.ref[i]  = VK_NULL_HANDLE;
-    vkpic->img_view_dest = VK_NULL_HANDLE;
+        vkpic->view.out[i]  = VK_NULL_HANDLE;
        vkpic->view.dst[i]  = VK_NULL_HANDLE;
    }
    vkpic->destroy_image_view = vk->DestroyImageView;
    vkpic->wait_semaphores = vk->WaitSemaphores;
@ -149,14 +151,14 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic,
    /* If the decoder made a blank frame to make up for a missing ref, or the
     * frame is the current frame so it's missing one, create a re-representation */
-    if (vkpic->img_view_ref)
+    if (vkpic->view.ref[0])
        return 0;
    init_frame(dec, vkpic);
    if (ctx->common.layered_dpb && alloc_dpb) {
-        vkpic->img_view_ref = ctx->common.layered_view;
+        vkpic->view.ref[0] = ctx->common.layered_view;
-        vkpic->img_aspect_ref = ctx->common.layered_aspect;
+        vkpic->view.aspect_ref[0] = ctx->common.layered_aspect;
    } else if (alloc_dpb) {
        AVHWFramesContext *dpb_frames = (AVHWFramesContext *)ctx->common.dpb_hwfc_ref->data;
        AVVulkanFramesContext *dpb_hwfc = dpb_frames->hwctx;
@ -166,13 +168,13 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic,
            return AVERROR(ENOMEM);
        err = ff_vk_create_view(&ctx->s, &ctx->common,
-                                &vkpic->img_view_ref, &vkpic->img_aspect_ref,
+                                &vkpic->view.ref[0], &vkpic->view.aspect_ref[0],
                                (AVVkFrame *)vkpic->dpb_frame->data[0],
                                dpb_hwfc->format[0], !is_current);
        if (err < 0)
            return err;
-        vkpic->img_view_dest = vkpic->img_view_ref;
+        vkpic->view.dst[0] = vkpic->view.ref[0];
    }
    if (!alloc_dpb || is_current) {
@ -180,15 +182,15 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic,
        AVVulkanFramesContext *hwfc = frames->hwctx;
        err = ff_vk_create_view(&ctx->s, &ctx->common,
-                                &vkpic->img_view_out, &vkpic->img_aspect,
+                                &vkpic->view.out[0], &vkpic->view.aspect[0],
                                (AVVkFrame *)pic->data[0],
                                hwfc->format[0], !is_current);
        if (err < 0)
            return err;
        if (!alloc_dpb) {
-            vkpic->img_view_ref = vkpic->img_view_out;
+            vkpic->view.ref[0] = vkpic->view.out[0];
-            vkpic->img_aspect_ref = vkpic->img_aspect;
+            vkpic->view.aspect_ref[0] = vkpic->view.aspect[0];
        }
    }
@ -201,41 +203,41 @@ int ff_vk_decode_prepare_frame_sdr(FFVulkanDecodeContext *dec, AVFrame *pic,
 {
    int err;
    FFVulkanDecodeShared *ctx = dec->shared_ctx;
    AVHWFramesContext *frames = (AVHWFramesContext *)pic->hw_frames_ctx->data;
    vkpic->slices_size = 0;
-    if (vkpic->img_view_ref)
+    if (vkpic->view.ref[0])
        return 0;
    init_frame(dec, vkpic);
-    if (ctx->common.layered_dpb && alloc_dpb) {
+    for (int i = 0; i < av_pix_fmt_count_planes(frames->sw_format); i++) {
-        vkpic->img_view_ref = ctx->common.layered_view;
+        if (alloc_dpb) {
-        vkpic->img_aspect_ref = ctx->common.layered_aspect;
+            vkpic->dpb_frame = vk_get_dpb_pool(ctx);
-    } else if (alloc_dpb) {
+            if (!vkpic->dpb_frame)
-        vkpic->dpb_frame = vk_get_dpb_pool(ctx);
+                return AVERROR(ENOMEM);
        if (!vkpic->dpb_frame)
            return AVERROR(ENOMEM);
-        err = ff_vk_create_imageview(&ctx->s,
+            err = ff_vk_create_imageview(&ctx->s,
-                                     &vkpic->img_view_ref, &vkpic->img_aspect_ref,
+                                         &vkpic->view.ref[i], &vkpic->view.aspect_ref[i],
-                                     vkpic->dpb_frame, 0, rep_fmt);
+                                         vkpic->dpb_frame, i, rep_fmt);
-        if (err < 0)
+            if (err < 0)
-            return err;
+                return err;
-        vkpic->img_view_dest = vkpic->img_view_ref;
+            vkpic->view.dst[i] = vkpic->view.ref[i];
-    }
+        }
-    if (!alloc_dpb || is_current) {
+        if (!alloc_dpb || is_current) {
-        err = ff_vk_create_imageview(&ctx->s,
+            err = ff_vk_create_imageview(&ctx->s,
-                                     &vkpic->img_view_out, &vkpic->img_aspect,
+                                         &vkpic->view.out[i], &vkpic->view.aspect[i],
-                                     pic, 0, rep_fmt);
+                                         pic, i, rep_fmt);
-        if (err < 0)
+            if (err < 0)
-            return err;
+                return err;
-        if (!alloc_dpb) {
+            if (!alloc_dpb) {
-            vkpic->img_view_ref = vkpic->img_view_out;
+                vkpic->view.ref[i] = vkpic->view.out[i];
-            vkpic->img_aspect_ref = vkpic->img_aspect;
+                vkpic->view.aspect_ref[i] = vkpic->view.aspect[i];
            }
        }
    }
@ -467,7 +469,7 @@ int ff_vk_decode_frame(AVCodecContext *avctx,
        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
        .image = vkf->img[0],
        .subresourceRange = (VkImageSubresourceRange) {
-            .aspectMask = vp->img_aspect,
+            .aspectMask = vp->view.aspect[0],
            .layerCount = 1,
            .levelCount = 1,
        },
@ -523,7 +525,7 @@ int ff_vk_decode_frame(AVCodecContext *avctx,
                    .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
                    .image = rvkf->img[0],
                    .subresourceRange = (VkImageSubresourceRange) {
-                        .aspectMask = rvp->img_aspect_ref,
+                        .aspectMask = rvp->view.aspect_ref[0],
                        .layerCount = 1,
                        .levelCount = 1,
                    },
@ -533,7 +535,7 @@ int ff_vk_decode_frame(AVCodecContext *avctx,
            }
        }
    } else if (vp->decode_info.referenceSlotCount ||
-               vp->img_view_out != vp->img_view_ref) {
+               vp->view.out[0] != vp->view.ref[0]) {
        /* Single barrier for a single layered ref */
        err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx->common.layered_frame,
                                       VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR,
@ -580,12 +582,14 @@ void ff_vk_decode_free_frame(AVHWDeviceContext *dev_ctx, FFVulkanDecodePicture *
    av_buffer_unref(&vp->slices_buf);
    /* Destroy image view (out) */
-    if (vp->img_view_out && vp->img_view_out != vp->img_view_dest)
+    for (int i = 0; i < AV_NUM_DATA_POINTERS; i++) {
-        vp->destroy_image_view(hwctx->act_dev, vp->img_view_out, hwctx->alloc);
+        if (vp->view.out[i] && vp->view.out[i] != vp->view.dst[i])
            vp->destroy_image_view(hwctx->act_dev, vp->view.out[i], hwctx->alloc);
-    /* Destroy image view (ref, unlayered) */
+        /* Destroy image view (ref, unlayered) */
-    if (vp->img_view_dest)
+        if (vp->view.dst[i])
-        vp->destroy_image_view(hwctx->act_dev, vp->img_view_dest, hwctx->alloc);
+            vp->destroy_image_view(hwctx->act_dev, vp->view.dst[i], hwctx->alloc);
    }
    av_frame_free(&vp->dpb_frame);
 }
--- a/libavcodec/vulkan_decode.h
+++ b/libavcodec/vulkan_decode.h
@ -85,11 +85,13 @@ typedef struct FFVulkanDecodeContext {
 typedef struct FFVulkanDecodePicture {
    AVFrame                        *dpb_frame;      /* Only used for out-of-place decoding. */
-    VkImageView                     img_view_ref;   /* Image representation view (reference) */
+    struct {
-    VkImageView                     img_view_out;   /* Image representation view (output-only) */
+        VkImageView                     ref[AV_NUM_DATA_POINTERS];        /* Image representation view (reference) */
-    VkImageView                     img_view_dest;  /* Set to img_view_out if no layered refs are used */
+        VkImageView                     out[AV_NUM_DATA_POINTERS];        /* Image representation view (output-only) */
-    VkImageAspectFlags              img_aspect;     /* Image plane mask bits */
+        VkImageView                     dst[AV_NUM_DATA_POINTERS];        /* Set to img_view_out if no layered refs are used */
-    VkImageAspectFlags              img_aspect_ref; /* Only used for out-of-place decoding */
+        VkImageAspectFlags              aspect[AV_NUM_DATA_POINTERS];     /* Image plane mask bits */
        VkImageAspectFlags              aspect_ref[AV_NUM_DATA_POINTERS]; /* Only used for out-of-place decoding */
    } view;
    VkSemaphore                     sem;
    uint64_t                        sem_value;
--- a/libavcodec/vulkan_h264.c
+++ b/libavcodec/vulkan_h264.c
@ -98,7 +98,7 @@ static int vk_h264_fill_pict(AVCodecContext *avctx, H264Picture **ref_src,
        .codedOffset = (VkOffset2D){ 0, 0 },
        .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
        .baseArrayLayer = ctx->common.layered_dpb ? dpb_slot_index : 0,
-        .imageViewBinding = vkpic->img_view_ref,
+        .imageViewBinding = vkpic->view.ref[0],
    };
    *ref_slot = (VkVideoReferenceSlotInfoKHR) {
@ -471,7 +471,7 @@ static int vk_h264_start_frame(AVCodecContext          *avctx,
            .codedOffset = (VkOffset2D){ 0, 0 },
            .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
            .baseArrayLayer = 0,
-            .imageViewBinding = vp->img_view_out,
+            .imageViewBinding = vp->view.out[0],
        },
    };
--- a/libavcodec/vulkan_hevc.c
+++ b/libavcodec/vulkan_hevc.c
@ -164,7 +164,7 @@ static int vk_hevc_fill_pict(AVCodecContext *avctx, HEVCFrame **ref_src,
        .codedOffset = (VkOffset2D){ 0, 0 },
        .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
        .baseArrayLayer = ctx->common.layered_dpb ? pic_id : 0,
-        .imageViewBinding = vkpic->img_view_ref,
+        .imageViewBinding = vkpic->view.ref[0],
    };
    *ref_slot = (VkVideoReferenceSlotInfoKHR) {
@ -823,7 +823,7 @@ static int vk_hevc_start_frame(AVCodecContext          *avctx,
            .codedOffset = (VkOffset2D){ 0, 0 },
            .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
            .baseArrayLayer = 0,
-            .imageViewBinding = vp->img_view_out,
+            .imageViewBinding = vp->view.out[0],
        },
    };