lavc: add a ProRes Vulkan hwaccel

Add a shader-based Apple ProRes decoder. It supports all codec features for profiles up to the 4444 XQ profile, ie.: - 4:2:2 and 4:4:4 chroma subsampling - 10- and 12-bit component depth - Interlacing - Alpha The implementation consists in two shaders: the VLD kernel does entropy decoding for color/alpha, and the IDCT kernel performs the inverse transform on color components. Benchmarks for a 4k yuv422p10 sample: - AMD Radeon 6700XT: 178 fps - Intel i7 Tiger Lake: 37 fps - NVidia Orin Nano: 70 fps
2025-10-30 23:18:11 +02:00 · 2025-06-02 21:31:59 +02:00
parent 3fd55d952e
commit 98412edfed
10 changed files with 1044 additions and 3 deletions
--- a/2
+++ b/2
@@ -3343,6 +3343,8 @@ prores_videotoolbox_hwaccel_deps="videotoolbox"
 prores_videotoolbox_hwaccel_select="prores_decoder"
 prores_raw_vulkan_hwaccel_deps="vulkan spirv_compiler"
 prores_raw_vulkan_hwaccel_select="prores_raw_decoder"
+prores_vulkan_hwaccel_deps="vulkan spirv_compiler"
+prores_vulkan_hwaccel_select="prores_decoder"
 vc1_d3d11va_hwaccel_deps="d3d11va"
 vc1_d3d11va_hwaccel_select="vc1_decoder"
 vc1_d3d11va2_hwaccel_deps="d3d11va"
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1106,6 +1106,7 @@ OBJS-$(CONFIG_VP9_VULKAN_HWACCEL)         += vulkan_decode.o vulkan_vp9.o
 OBJS-$(CONFIG_VP8_QSV_HWACCEL)            += qsvdec.o
 OBJS-$(CONFIG_VVC_VAAPI_HWACCEL)          += vaapi_vvc.o
 OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL)  += vulkan_decode.o vulkan_prores_raw.o
+OBJS-$(CONFIG_PRORES_VULKAN_HWACCEL)      += vulkan_decode.o vulkan_prores.o

 # Objects duplicated from other libraries for shared builds
 SHLIBOBJS                              += log2_tab.o reverse.o
@@ -1350,7 +1351,7 @@ SKIPHEADERS-$(CONFIG_QSVENC)           += qsvenc.h
 SKIPHEADERS-$(CONFIG_VAAPI)            += vaapi_decode.h vaapi_hevc.h vaapi_encode.h
 SKIPHEADERS-$(CONFIG_VDPAU)            += vdpau.h vdpau_internal.h
 SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX)     += videotoolbox.h vt_internal.h
-SKIPHEADERS-$(CONFIG_VULKAN)           += ffv1_vulkan.h vulkan_video.h \
+SKIPHEADERS-$(CONFIG_VULKAN)           += ffv1_vulkan.h prores_vulkan.h vulkan_video.h \
                                          vulkan_encode.h vulkan_decode.h
 SKIPHEADERS-$(CONFIG_V4L2_M2M)         += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
 SKIPHEADERS-$(CONFIG_ZLIB)             += zlib_wrapper.h
--- a/libavcodec/hwaccels.h
+++ b/libavcodec/hwaccels.h
@@ -68,6 +68,7 @@ extern const struct FFHWAccel ff_mpeg4_vdpau_hwaccel;
 extern const struct FFHWAccel ff_mpeg4_videotoolbox_hwaccel;
 extern const struct FFHWAccel ff_prores_videotoolbox_hwaccel;
 extern const struct FFHWAccel ff_prores_raw_vulkan_hwaccel;
+extern const struct FFHWAccel ff_prores_vulkan_hwaccel;
 extern const struct FFHWAccel ff_vc1_d3d11va_hwaccel;
 extern const struct FFHWAccel ff_vc1_d3d11va2_hwaccel;
 extern const struct FFHWAccel ff_vc1_d3d12va_hwaccel;
--- a/libavcodec/proresdec.c
+++ b/libavcodec/proresdec.c
@@ -251,7 +251,7 @@ static int decode_frame_header(ProresContext *ctx, const uint8_t *buf,
    }

    if (pix_fmt != ctx->pix_fmt) {
-#define HWACCEL_MAX (CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL)
+#define HWACCEL_MAX (CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL + CONFIG_PRORES_VULKAN_HWACCEL)
 #if HWACCEL_MAX
        enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
        int ret;
@@ -260,6 +260,9 @@ static int decode_frame_header(ProresContext *ctx, const uint8_t *buf,

 #if CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL
        *fmtp++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+#if CONFIG_PRORES_VULKAN_HWACCEL
+        *fmtp++ = AV_PIX_FMT_VULKAN;
 #endif
        *fmtp++ = ctx->pix_fmt;
        *fmtp = AV_PIX_FMT_NONE;
@@ -872,6 +875,9 @@ const FFCodec ff_prores_decoder = {
    .hw_configs     = (const AVCodecHWConfigInternal *const []) {
 #if CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL
        HWACCEL_VIDEOTOOLBOX(prores),
+#endif
+#if CONFIG_PRORES_VULKAN_HWACCEL
+        HWACCEL_VULKAN(prores),
 #endif
        NULL
    },
--- a/libavcodec/vulkan/Makefile
+++ b/libavcodec/vulkan/Makefile
@@ -17,6 +17,11 @@ OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL)  +=  vulkan/common.o \
 OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan/common.o \
                                            vulkan/prores_raw.o

+OBJS-$(CONFIG_PRORES_VULKAN_HWACCEL) += vulkan/common.o \
+                                        vulkan/prores_reset.o \
+                                        vulkan/prores_vld.o \
+                                        vulkan/prores_idct.o
+
 VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavcodec/vulkan/*.comp))
 .SECONDARY: $(VULKAN:.comp=.c)
 libavcodec/vulkan/%.c: TAG = VULKAN
--- a/libavcodec/vulkan/prores_idct.comp
+++ b/libavcodec/vulkan/prores_idct.comp
@@ -0,0 +1,123 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Two macroblocks, padded to avoid bank conflicts */
+shared float blocks[4*2][8*(8+1)];
+
+uint get_px(uint tex_idx, ivec2 pos)
+{
+#ifndef INTERLACED
+    return imageLoad(dst[tex_idx], pos).x;
+#else
+    return imageLoad(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field)).x;
+#endif
+}
+
+void put_px(uint tex_idx, ivec2 pos, uint v)
+{
+#ifndef INTERLACED
+    imageStore(dst[tex_idx], pos, uvec4(v));
+#else
+    imageStore(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field), uvec4(v));
+#endif
+}
+
+/* 7.4 Inverse Transform */
+void idct(uint block, uint offset, uint stride)
+{
+    float c0 = blocks[block][0*stride + offset];
+    float c1 = blocks[block][1*stride + offset];
+    float c2 = blocks[block][2*stride + offset];
+    float c3 = blocks[block][3*stride + offset];
+    float c4 = blocks[block][4*stride + offset];
+    float c5 = blocks[block][5*stride + offset];
+    float c6 = blocks[block][6*stride + offset];
+    float c7 = blocks[block][7*stride + offset];
+
+    float tmp1 = c6 * 1.4142134189605712891 + (c2 - c6);
+    float tmp2 = c6 * 1.4142134189605712891 - (c2 - c6);
+
+    float a1 = (c0 + c4) * 0.35355341434478759766 + tmp1 * 0.46193981170654296875;
+    float a4 = (c0 + c4) * 0.35355341434478759766 - tmp1 * 0.46193981170654296875;
+
+    float a3 = (c0 - c4) * 0.35355341434478759766 + tmp2 * 0.19134169816970825195;
+    float a2 = (c0 - c4) * 0.35355341434478759766 - tmp2 * 0.19134169816970825195;
+
+    float tmp3 = (c3 - c5) * 0.70710682868957519531 + c7;
+    float tmp4 = (c3 - c5) * 0.70710682868957519531 - c7;
+
+    float tmp5 = (c5 - c7) *  1.4142134189605712891 + (c5 - c7) + (c1 - c3);
+    float tmp6 = (c5 - c7) * -1.4142134189605712891 + (c5 - c7) + (c1 - c3);
+
+    float m1 = tmp3 *  2.6131260395050048828 + tmp5;
+    float m4 = tmp3 * -2.6131260395050048828 + tmp5;
+
+    float m2 = tmp4 *  1.0823919773101806641 + tmp6;
+    float m3 = tmp4 * -1.0823919773101806641 + tmp6;
+
+    blocks[block][0*stride + offset] = m1 *  0.49039259552955627441  + a1;
+    blocks[block][7*stride + offset] = m1 * -0.49039259552955627441  + a1;
+    blocks[block][1*stride + offset] = m2 *  0.41573479771614074707  + a2;
+    blocks[block][6*stride + offset] = m2 * -0.41573479771614074707  + a2;
+    blocks[block][2*stride + offset] = m3 *  0.27778509259223937988  + a3;
+    blocks[block][5*stride + offset] = m3 * -0.27778509259223937988  + a3;
+    blocks[block][3*stride + offset] = m4 *  0.097545139491558074951 + a4;
+    blocks[block][4*stride + offset] = m4 * -0.097545139491558074951 + a4;
+}
+
+void main(void)
+{
+    uvec3 gid = gl_GlobalInvocationID, lid = gl_LocalInvocationID;
+    uint comp = gid.z, block = (lid.y << 2) | (lid.x >> 3), idx = lid.x & 0x7;
+    uint chroma_shift = comp != 0 ? log2_chroma_w : 0;
+    bool act = gid.x < mb_width << (4 - chroma_shift);
+
+    /* Coalesced load of DCT coeffs in shared memory, second part of inverse quantization */
+    if (act) {
+        /**
+         * According to spec indexing an array in push constant memory with
+         * a non-dynamically uniform value is illegal ($15.9.1 in v1.4.326),
+         * so copy the whole matrix locally.
+         */
+        uint8_t[64] qmat = comp == 0 ? qmat_luma : qmat_chroma;
+        [[unroll]] for (uint i = 0; i < 8; ++i) {
+            int v = sign_extend(int(get_px(comp, ivec2(gid.x, (gid.y << 3) | i))), 16);
+            blocks[block][i * 9 + idx] = float(v * int(qmat[(i << 3) + idx]));
+        }
+    }
+
+    /* Row-wise iDCT */
+    barrier();
+    idct(block, idx * 9, 1);
+
+    /* Column-wise iDCT */
+    barrier();
+    idct(block, idx, 9);
+
+    float fact = 1.0f / (1 << (12 - depth)), off = 1 << (depth - 1);
+    int maxv = (1 << depth) - 1;
+
+    /* 7.5.1 Color Component Samples. Rescale, clamp and write back to global memory */
+    barrier();
+    if (act) {
+        [[unroll]] for (uint i = 0; i < 8; ++i) {
+            float v = blocks[block][i * 9 + idx] * fact + off;
+            put_px(comp, ivec2(gid.x, (gid.y << 3) | i), clamp(int(v), 0, maxv));
+        }
+    }
+}
--- a/libavcodec/vulkan/prores_reset.comp
+++ b/libavcodec/vulkan/prores_reset.comp
@@ -0,0 +1,38 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+void main(void)
+{
+    uvec3 gid = gl_GlobalInvocationID;
+#ifndef INTERLACED
+    ivec2 pos = ivec2(gid);
+#else
+    ivec2 pos = ivec2(gid.x, (gid.y << 1) + bottom_field);
+#endif
+
+    /* Clear luma plane */
+    imageStore(dst[0], pos, uvec4(0));
+
+    /* Clear chroma plane */
+    if (gid.x < mb_width << (4 - log2_chroma_w)) {
+        imageStore(dst[1], pos, uvec4(0));
+        imageStore(dst[2], pos, uvec4(0));
+    }
+
+    /* Alpha plane doesn't need a clear because it is not sparsely encoded */
+}
--- a/libavcodec/vulkan/prores_vld.comp
+++ b/libavcodec/vulkan/prores_vld.comp
@@ -0,0 +1,317 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define U8(x)  (uint8_t (x))
+#define U16(x) (uint16_t(x))
+
+void put_px(uint tex_idx, ivec2 pos, uint v)
+{
+#ifndef INTERLACED
+    imageStore(dst[tex_idx], pos, uvec4(v));
+#else
+    imageStore(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field), uvec4(v));
+#endif
+}
+
+/* 7.5.3 Pixel Arrangement */
+ivec2 pos_to_block(uint pos, uint luma)
+{
+    return ivec2((pos & -luma - 2) + luma >> 1, pos >> luma & 1) << 3;
+}
+
+/* 7.1.1.2 Signed Golomb Combination Codes */
+uint to_signed(uint x)
+{
+    return (x >> 1) ^ -(x & 1);
+}
+
+/* 7.1.1.1 Golomb Combination Codes */
+uint decode_codeword(inout GetBitContext gb, int codebook)
+{
+    int last_rice_q = bitfieldExtract(codebook, 0, 4),
+        krice       = bitfieldExtract(codebook, 4, 4),
+        kexp        = bitfieldExtract(codebook, 8, 4);
+
+    int q = 31 - findMSB(show_bits(gb, 32));
+    if (q <= last_rice_q) {
+        /* Golomb-Rice encoding */
+        return (get_bits(gb, krice + q + 1) & ~(1 << krice)) + (q << krice);
+    } else {
+        /* exp-Golomb encoding */
+        return get_bits(gb, (q << 1) + kexp - last_rice_q) - (1 << kexp) + ((last_rice_q + 1) << krice);
+    }
+}
+
+void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint mb_count, uint qscale)
+{
+    uvec3 gid = gl_GlobalInvocationID;
+    uint is_luma = uint(gid.z == 0);
+    uint chroma_shift = bool(is_luma) ? 0 : log2_chroma_w;
+
+    uint num_blocks = mb_count << (2 - chroma_shift);
+    ivec2 base_pos = ivec2(mb_pos.x << (4 - chroma_shift), mb_pos.y << 4);
+
+    /* 7.1.1.3 DC Coefficients */
+    {
+        /* First coeff */
+        uint c = to_signed(decode_codeword(gb, 0x650));
+        put_px(gid.z, base_pos, c * qscale & 0xffff);
+
+        /**
+         * Table 9, encoded as (last_rice_q << 0) | (krice or kexp << 4) | ((kexp or kexp + 1) << 8)
+         * According to the SMPTE document, abs(prev_dc_diff) should be used
+         * to index the table, duplicating the entries removes the abs operation.
+         */
+        const uint16_t dc_codebook[] = { U16(0x100),
+                                         U16(0x210), U16(0x210),
+                                         U16(0x321), U16(0x321),
+                                         U16(0x430), U16(0x430), };
+
+        uint cw = 5, prev_dc_diff = 0;
+        for (int i = 1; i < num_blocks; ++i) {
+            cw = decode_codeword(gb, dc_codebook[min(cw, 6)]);
+
+            int s = int(prev_dc_diff) >> 31;
+            c += prev_dc_diff = (to_signed(cw) ^ s) - s;
+
+            put_px(gid.z, base_pos + pos_to_block(i, is_luma), c * qscale & 0xffff);
+        }
+    }
+
+    /* 7.1.1.4 AC Coefficients */
+    {
+        /* Table 10 */
+        const uint16_t ac_run_codebook  [] = { U16(0x102), U16(0x102), U16(0x101), U16(0x101),
+                                               U16(0x100), U16(0x211), U16(0x211), U16(0x211),
+                                               U16(0x211), U16(0x210), U16(0x210), U16(0x210),
+                                               U16(0x210), U16(0x210), U16(0x210), U16(0x320), };
+
+        /* Table 11 */
+        const uint16_t ac_level_codebook[] = { U16(0x202), U16(0x101), U16(0x102), U16(0x100),
+                                               U16(0x210), U16(0x210), U16(0x210), U16(0x210),
+                                               U16(0x320) };
+
+#ifndef INTERLACED
+        /* Figure 4, encoded as (x << 0) | (y << 4) */
+        const uint8_t scan_tbl[] = {
+            U8(0x00), U8(0x01), U8(0x10), U8(0x11), U8(0x02), U8(0x03), U8(0x12), U8(0x13),
+            U8(0x20), U8(0x21), U8(0x30), U8(0x31), U8(0x22), U8(0x23), U8(0x32), U8(0x33),
+            U8(0x04), U8(0x05), U8(0x14), U8(0x24), U8(0x15), U8(0x06), U8(0x07), U8(0x16),
+            U8(0x25), U8(0x34), U8(0x35), U8(0x26), U8(0x17), U8(0x27), U8(0x36), U8(0x37),
+            U8(0x40), U8(0x41), U8(0x50), U8(0x60), U8(0x51), U8(0x42), U8(0x43), U8(0x52),
+            U8(0x61), U8(0x70), U8(0x71), U8(0x62), U8(0x53), U8(0x44), U8(0x45), U8(0x54),
+            U8(0x63), U8(0x72), U8(0x73), U8(0x64), U8(0x55), U8(0x46), U8(0x47), U8(0x56),
+            U8(0x65), U8(0x74), U8(0x75), U8(0x66), U8(0x57), U8(0x67), U8(0x76), U8(0x77),
+        };
+#else
+        /* Figure 5 */
+        const uint8_t scan_tbl[] = {
+            U8(0x00), U8(0x10), U8(0x01), U8(0x11), U8(0x20), U8(0x30), U8(0x21), U8(0x31),
+            U8(0x02), U8(0x12), U8(0x03), U8(0x13), U8(0x22), U8(0x32), U8(0x23), U8(0x33),
+            U8(0x40), U8(0x50), U8(0x41), U8(0x42), U8(0x51), U8(0x60), U8(0x70), U8(0x61),
+            U8(0x52), U8(0x43), U8(0x53), U8(0x62), U8(0x71), U8(0x72), U8(0x63), U8(0x73),
+            U8(0x04), U8(0x14), U8(0x05), U8(0x06), U8(0x15), U8(0x24), U8(0x34), U8(0x25),
+            U8(0x16), U8(0x07), U8(0x17), U8(0x26), U8(0x35), U8(0x44), U8(0x54), U8(0x45),
+            U8(0x36), U8(0x27), U8(0x37), U8(0x46), U8(0x55), U8(0x64), U8(0x74), U8(0x65),
+            U8(0x56), U8(0x47), U8(0x57), U8(0x66), U8(0x75), U8(0x76), U8(0x67), U8(0x77),
+        };
+#endif
+
+        uint block_mask  = num_blocks - 1;
+        uint block_shift = findLSB(num_blocks);
+
+        uint pos = num_blocks - 1, run = 4, level = 1, s;
+        while (pos < num_blocks << 6) {
+            int left = left_bits(gb);
+            if (left <= 0 || (left < 32 && show_bits(gb, left) == 0))
+                break;
+
+            run   = decode_codeword(gb, ac_run_codebook  [min(run,   15)]);
+            level = decode_codeword(gb, ac_level_codebook[min(level, 8 )]);
+            s     = get_bits(gb, 1);
+
+            pos += run + 1;
+
+            uint bidx  = pos & block_mask, scan = scan_tbl[pos >> block_shift];
+            ivec2 spos = pos_to_block(bidx, is_luma);
+            ivec2 bpos = ivec2(scan & 0xf, scan >> 4);
+
+            uint c = ((level + 1) ^ -s) + s;
+            put_px(gid.z, base_pos + spos + bpos, c * qscale & 0xffff);
+        }
+    }
+}
+
+/* 7.1.2 Scanned Alpha */
+void decode_alpha(in GetBitContext gb, uvec2 mb_pos, uint mb_count)
+{
+    uvec3 gid = gl_GlobalInvocationID;
+
+    ivec2 base_pos = ivec2(mb_pos) << 4;
+    uint block_shift = findMSB(mb_count) + 4, block_mask = (1 << block_shift) - 1;
+
+    uint mask = (1 << (4 << alpha_info)) - 1;
+    uint num_values = (mb_count << 4) * min(height - (gid.y << 4), 16);
+
+    int num_cw_bits  = alpha_info == 1 ? 5 : 8,
+        num_flc_bits = alpha_info == 1 ? 9 : 17;
+
+    uint alpha_rescale_lshift = alpha_info == 1 ? depth - 8 : 16,
+         alpha_rescale_rshift = 16 - depth;
+
+    uint alpha = -1;
+    for (uint pos = 0; pos < num_values;) {
+        uint diff, run;
+
+        /* Decode run value */
+        {
+            uint bits = show_bits(gb, num_cw_bits), q = num_cw_bits - 1 - findMSB(bits);
+
+            /* Tables 13/14 */
+            if (q != 0) {
+                uint m = (bits >> 1) + 1, s = bits & 1;
+                diff = (m ^ -s) + s;
+                skip_bits(gb, num_cw_bits);
+            } else {
+                diff = get_bits(gb, num_flc_bits);
+            }
+
+            alpha = alpha + diff & mask;
+        }
+
+        /* Decode run length */
+        {
+            uint bits = show_bits(gb, 5), q = 4 - findMSB(bits);
+
+            /* Table 12 */
+            if (q == 0) {
+                run = 1;
+                skip_bits(gb, 1);
+            } else if (q <= 4) {
+                run = bits + 1;
+                skip_bits(gb, 5);
+            } else {
+                run = get_bits(gb, 16) + 1;
+            }
+
+            run = min(run, num_values - pos);
+        }
+
+        /**
+         * FFmpeg doesn't support color and alpha with different precision,
+         * so we need to rescale to the color range.
+         */
+        uint val = (alpha << alpha_rescale_lshift) | (alpha >> alpha_rescale_rshift);
+        for (uint end = pos + run; pos < end; ++pos)
+            put_px(3, base_pos + ivec2(pos & block_mask, pos >> block_shift), val & 0xffff);
+    }
+}
+
+void main(void)
+{
+    uvec3 gid = gl_GlobalInvocationID;
+    if (gid.x >= slice_width || gid.y >= slice_height)
+        return;
+
+    uint slice_idx = gid.y * slice_width + gid.x;
+    uint slice_off  = slice_offsets[slice_idx],
+         slice_size = slice_offsets[slice_idx + 1] - slice_off;
+
+    u8buf bs = u8buf(slice_data + slice_off);
+
+    /* Decode slice header */
+    uint hdr_size, y_size, u_size, v_size, a_size;
+    hdr_size = bs[0].v >> 3;
+
+    /* Table 15 */
+    uint qidx   = clamp(bs[1].v, 1, 224),
+         qscale = qidx > 128 ? (qidx - 96) << 2 : qidx;
+
+    y_size = (uint(bs[2].v) << 8) | bs[3].v;
+    u_size = (uint(bs[4].v) << 8) | bs[5].v;
+
+    /**
+     * The alpha_info field can be 0 even when an alpha plane is present,
+     * if skip_alpha is enabled, so use the header size instead.
+     */
+    if (hdr_size > 6)
+        v_size = (uint(bs[6].v) << 8) | bs[7].v;
+    else
+        v_size = slice_size - hdr_size - y_size - u_size;
+
+    a_size = slice_size - hdr_size - y_size - u_size - v_size;
+
+    GetBitContext gb;
+    switch (gid.z) {
+        case 0:
+            init_get_bits(gb, u8buf(bs + hdr_size),                            int(y_size));
+            break;
+        case 1:
+            init_get_bits(gb, u8buf(bs + hdr_size + y_size),                   int(u_size));
+            break;
+        case 2:
+            init_get_bits(gb, u8buf(bs + hdr_size + y_size + u_size),          int(v_size));
+            break;
+        case 3:
+            init_get_bits(gb, u8buf(bs + hdr_size + y_size + u_size + v_size), int(a_size));
+            break;
+    }
+
+    /**
+     * Support for the grayscale "extension" in the prores_aw encoder.
+     * According to the spec, entropy coded data should never be empty,
+     * and instead contain at least the DC coefficients.
+     * This avoids undefined behavior.
+     */
+    if (left_bits(gb) == 0)
+        return;
+
+    /**
+     * 4 ProRes Frame Structure
+     * ProRes tiles pictures into a grid of slices, whose size is determined
+     * by the log2_slice_width parameter (height is always 1 MB).
+     * Each slice has a width of (1 << log2_slice_width) MBs, until the picture
+     * cannot accommodate a full one. At this point, the remaining space
+     * is recursively completed using the first smaller power of two that fits
+     * (see Figure 1).
+     * The maximum number of extra slices is 3, when log2_slice_width is 3,
+     * with sizes 4, 2 and 1 MBs.
+     * The mb_width parameter therefore also represents the number of full slices,
+     * when interpreted as a fixed-point number with log2_slice_width fractional bits.
+     */
+    uint frac      = bitfieldExtract(uint(mb_width), 0, log2_slice_width),
+         num_extra = bitCount(frac);
+
+    uint diff = slice_width - gid.x - 1,
+         off  = max(int(diff - num_extra + 1) << 2, 0);
+
+    uint log2_width = min(findLSB(frac - diff >> diff) + diff + off, log2_slice_width);
+
+    uint mb_x = (min(gid.x, slice_width - num_extra) << log2_slice_width) +
+                (frac & (0xf << log2_width + 1)),
+         mb_y = gid.y;
+    uint mb_count = 1 << log2_width;
+
+    if (gid.z < 3) {
+        /* Color entropy decoding, inverse scanning, first part of inverse quantization */
+        decode_comp(gb, uvec2(mb_x, mb_y), mb_count, qscale);
+    } else {
+        /* Alpha entropy decoding */
+        decode_alpha(gb, uvec2(mb_x, mb_y), mb_count);
+    }
+}
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -26,7 +26,8 @@

 #define DECODER_IS_SDR(codec_id) \
    (((codec_id) == AV_CODEC_ID_FFV1) || \
-     ((codec_id) == AV_CODEC_ID_PRORES_RAW))
+     ((codec_id) == AV_CODEC_ID_PRORES_RAW) || \
+     ((codec_id) == AV_CODEC_ID_PRORES))

 #if CONFIG_H264_VULKAN_HWACCEL
 extern const FFVulkanDecodeDescriptor ff_vk_dec_h264_desc;
@@ -46,6 +47,9 @@ extern const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc;
 #if CONFIG_PRORES_RAW_VULKAN_HWACCEL
 extern const FFVulkanDecodeDescriptor ff_vk_dec_prores_raw_desc;
 #endif
+#if CONFIG_PRORES_VULKAN_HWACCEL
+extern const FFVulkanDecodeDescriptor ff_vk_dec_prores_desc;
+#endif

 static const FFVulkanDecodeDescriptor *dec_descs[] = {
 #if CONFIG_H264_VULKAN_HWACCEL
@@ -66,6 +70,9 @@ static const FFVulkanDecodeDescriptor *dec_descs[] = {
 #if CONFIG_PRORES_RAW_VULKAN_HWACCEL
    &ff_vk_dec_prores_raw_desc,
 #endif
+#if CONFIG_PRORES_VULKAN_HWACCEL
+    &ff_vk_dec_prores_desc,
+#endif
 };

 typedef struct FFVulkanDecodeProfileData {
--- a/libavcodec/vulkan_prores.c
+++ b/libavcodec/vulkan_prores.c
@@ -0,0 +1,541 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "proresdec.h"
+#include "vulkan_decode.h"
+#include "hwaccel_internal.h"
+#include "libavutil/mem.h"
+#include "libavutil/vulkan.h"
+#include "libavutil/vulkan_loader.h"
+#include "libavutil/vulkan_spirv.h"
+
+extern const char *ff_source_common_comp;
+extern const char *ff_source_prores_reset_comp;
+extern const char *ff_source_prores_vld_comp;
+extern const char *ff_source_prores_idct_comp;
+
+const FFVulkanDecodeDescriptor ff_vk_dec_prores_desc = {
+    .codec_id    = AV_CODEC_ID_PRORES,
+    .queue_flags = VK_QUEUE_COMPUTE_BIT,
+};
+
+typedef struct ProresVulkanDecodePicture {
+    FFVulkanDecodePicture vp;
+
+    AVBufferRef *slice_offset_buf;
+    uint32_t slice_num;
+
+    uint32_t bitstream_start;
+    uint32_t bitstream_size;
+} ProresVulkanDecodePicture;
+
+typedef struct ProresVulkanDecodeContext {
+    struct ProresVulkanShaderVariants {
+        FFVulkanShader reset;
+        FFVulkanShader vld;
+        FFVulkanShader idct;
+    } shaders[2]; /* Progressive/interlaced */
+
+    AVBufferPool *slice_offset_pool;
+} ProresVulkanDecodeContext;
+
+typedef struct ProresVkParameters {
+    VkDeviceAddress slice_data;
+    uint32_t bitstream_size;
+
+    uint16_t width;
+    uint16_t height;
+    uint16_t mb_width;
+    uint16_t mb_height;
+    uint16_t slice_width;
+    uint16_t slice_height;
+    uint8_t  log2_slice_width;
+    uint8_t  log2_chroma_w;
+    uint8_t  depth;
+    uint8_t  alpha_info;
+    uint8_t  bottom_field;
+
+    uint8_t  qmat_luma  [64];
+    uint8_t  qmat_chroma[64];
+} ProresVkParameters;
+
+static int vk_prores_start_frame(AVCodecContext          *avctx,
+                                 const AVBufferRef       *buffer_ref,
+                                 av_unused const uint8_t *buffer,
+                                 av_unused uint32_t       size)
+{
+    ProresContext             *pr = avctx->priv_data;
+    FFVulkanDecodeContext    *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared     *ctx = dec->shared_ctx;
+    ProresVulkanDecodeContext *pv = ctx->sd_ctx;
+    ProresVulkanDecodePicture *pp = pr->hwaccel_picture_private;
+    FFVulkanDecodePicture     *vp = &pp->vp;
+
+    int err;
+
+    /* Host map the input slices data if supported */
+    if (!vp->slices_buf && ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY)
+        RET(ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, buffer_ref->data,
+                                  buffer_ref,
+                                  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+                                  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT));
+
+    /* Allocate slice offsets buffer */
+    RET(ff_vk_get_pooled_buffer(&ctx->s, &pv->slice_offset_pool,
+                                &pp->slice_offset_buf,
+                                VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+                                VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+                                NULL, (pr->slice_count + 1) * sizeof(uint32_t),
+                                VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+                                VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
+
+    /* Prepare frame to be used */
+    RET(ff_vk_decode_prepare_frame_sdr(dec, pr->frame, vp, 1,
+                                       FF_VK_REP_NATIVE, 0));
+
+    pp->slice_num = 0;
+    pp->bitstream_start = pp->bitstream_size = 0;
+
+fail:
+    return err;
+}
+
+static int vk_prores_decode_slice(AVCodecContext *avctx,
+                                  const uint8_t  *data,
+                                  uint32_t        size)
+{
+    ProresContext             *pr = avctx->priv_data;
+    ProresVulkanDecodePicture *pp = pr->hwaccel_picture_private;
+    FFVulkanDecodePicture     *vp = &pp->vp;
+
+    FFVkBuffer *slice_offset = (FFVkBuffer *)pp->slice_offset_buf->data;
+    FFVkBuffer *slices_buf   = vp->slices_buf ? (FFVkBuffer *)vp->slices_buf->data : NULL;
+
+    /* Skip picture header */
+    if (slices_buf && slices_buf->host_ref && !pp->slice_num)
+        pp->bitstream_size = data - slices_buf->mapped_mem;
+
+    AV_WN32(slice_offset->mapped_mem + (pp->slice_num + 0) * sizeof(uint32_t),
+            pp->bitstream_size);
+    AV_WN32(slice_offset->mapped_mem + (pp->slice_num + 1) * sizeof(uint32_t),
+            pp->bitstream_size += size);
+
+    if (!slices_buf || !slices_buf->host_ref) {
+        int err = ff_vk_decode_add_slice(avctx, vp, data, size, 0,
+                                         &pp->slice_num, NULL);
+        if (err < 0)
+            return err;
+    } else {
+        pp->slice_num++;
+    }
+
+    return 0;
+}
+
+static int vk_prores_end_frame(AVCodecContext *avctx)
+{
+    ProresContext             *pr = avctx->priv_data;
+    FFVulkanDecodeContext    *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared     *ctx = dec->shared_ctx;
+    FFVulkanFunctions         *vk = &ctx->s.vkfn;
+    ProresVulkanDecodeContext *pv = ctx->sd_ctx;
+    ProresVulkanDecodePicture *pp = pr->hwaccel_picture_private;
+    FFVulkanDecodePicture     *vp = &pp->vp;
+
+    ProresVkParameters pd;
+    FFVkBuffer *slice_data, *slice_offsets;
+    struct ProresVulkanShaderVariants *shaders;
+    VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
+    VkBufferMemoryBarrier2 buf_bar[2];
+    int nb_img_bar = 0, nb_buf_bar = 0, err;
+    const AVPixFmtDescriptor *pix_desc;
+
+    if (!pp->slice_num)
+        return 0;
+
+    pix_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
+    if (!pix_desc)
+        return AVERROR(EINVAL);
+
+    slice_data    = (FFVkBuffer *)vp->slices_buf->data;
+    slice_offsets = (FFVkBuffer *)pp->slice_offset_buf->data;
+
+    shaders = &pv->shaders[pr->frame_type != 0];
+
+    pd = (ProresVkParameters) {
+        .slice_data       = slice_data->address,
+        .bitstream_size   = pp->bitstream_size,
+
+        .width            = avctx->width,
+        .height           = avctx->height,
+        .mb_width         = pr->mb_width,
+        .mb_height        = pr->mb_height,
+        .slice_width      = pr->slice_count / pr->mb_height,
+        .slice_height     = pr->mb_height,
+        .log2_slice_width = av_log2(pr->slice_mb_width),
+        .log2_chroma_w    = pix_desc->log2_chroma_w,
+        .depth            = avctx->bits_per_raw_sample,
+        .alpha_info       = pr->alpha_info,
+        .bottom_field     = pr->first_field ^ (pr->frame_type == 1),
+    };
+
+    memcpy(pd.qmat_luma,   pr->qmat_luma,   sizeof(pd.qmat_luma  ));
+    memcpy(pd.qmat_chroma, pr->qmat_chroma, sizeof(pd.qmat_chroma));
+
+    FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool);
+    RET(ff_vk_exec_start(&ctx->s, exec));
+
+    /* Prepare deps */
+    RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, pr->frame,
+                                 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                                 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+
+    RET(ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value,
+                                    pr->frame));
+
+    RET(ff_vk_exec_add_dep_buf(&ctx->s, exec,
+                               (AVBufferRef *[]){ vp->slices_buf, pp->slice_offset_buf },
+                               2, 0));
+
+    /* Transfer ownership to the exec context */
+    vp->slices_buf = pp->slice_offset_buf = NULL;
+
+    /* Input frame barrier */
+    ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
+                        VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                        VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+                        VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
+                        VK_IMAGE_LAYOUT_GENERAL,
+                        VK_QUEUE_FAMILY_IGNORED);
+
+    vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+        .sType                    = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+        .pBufferMemoryBarriers    = buf_bar,
+        .bufferMemoryBarrierCount = nb_buf_bar,
+        .pImageMemoryBarriers     = img_bar,
+        .imageMemoryBarrierCount  = nb_img_bar,
+    });
+    nb_img_bar = nb_buf_bar = 0;
+
+    /* Reset */
+    ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->reset,
+                                  pr->frame, vp->view.out,
+                                  0, 0,
+                                  VK_IMAGE_LAYOUT_GENERAL,
+                                  VK_NULL_HANDLE);
+
+    ff_vk_shader_update_push_const(&ctx->s, exec, &shaders->reset,
+                                   VK_SHADER_STAGE_COMPUTE_BIT,
+                                   0, sizeof(pd), &pd);
+
+    ff_vk_exec_bind_shader(&ctx->s, exec, &shaders->reset);
+
+    vk->CmdDispatch(exec->buf, pr->mb_width << 1, pr->mb_height << 1, 1);
+
+    /* Input frame barrier after reset */
+    ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
+                        VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+                        VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+                        VK_ACCESS_SHADER_WRITE_BIT,
+                        VK_IMAGE_LAYOUT_GENERAL,
+                        VK_QUEUE_FAMILY_IGNORED);
+
+    vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+        .sType                    = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+        .pBufferMemoryBarriers    = buf_bar,
+        .bufferMemoryBarrierCount = nb_buf_bar,
+        .pImageMemoryBarriers     = img_bar,
+        .imageMemoryBarrierCount  = nb_img_bar,
+    });
+    nb_img_bar = nb_buf_bar = 0;
+
+    /* Entropy decode */
+    ff_vk_shader_update_desc_buffer(&ctx->s, exec, &shaders->vld,
+                                    0, 0, 0,
+                                    slice_offsets,
+                                    0, (pp->slice_num + 1) * sizeof(uint32_t),
+                                    VK_FORMAT_UNDEFINED);
+    ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->vld,
+                                  pr->frame, vp->view.out,
+                                  0, 1,
+                                  VK_IMAGE_LAYOUT_GENERAL,
+                                  VK_NULL_HANDLE);
+
+    ff_vk_shader_update_push_const(&ctx->s, exec, &shaders->vld,
+                                   VK_SHADER_STAGE_COMPUTE_BIT,
+                                   0, sizeof(pd), &pd);
+
+    ff_vk_exec_bind_shader(&ctx->s, exec, &shaders->vld);
+
+    vk->CmdDispatch(exec->buf, AV_CEIL_RSHIFT(pr->slice_count / pr->mb_height, 3), AV_CEIL_RSHIFT(pr->mb_height, 3),
+                    3 + !!pr->alpha_info);
+
+    /* Synchronize vld and idct shaders */
+    nb_img_bar = 0;
+    ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
+                        VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+                        VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+                        VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
+                        VK_IMAGE_LAYOUT_GENERAL,
+                        VK_QUEUE_FAMILY_IGNORED);
+
+    vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+        .sType                    = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+        .pBufferMemoryBarriers    = buf_bar,
+        .bufferMemoryBarrierCount = nb_buf_bar,
+        .pImageMemoryBarriers     = img_bar,
+        .imageMemoryBarrierCount  = nb_img_bar,
+    });
+    nb_img_bar = nb_buf_bar = 0;
+
+    /* Inverse transform */
+    ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->idct,
+                                  pr->frame, vp->view.out,
+                                  0, 0,
+                                  VK_IMAGE_LAYOUT_GENERAL,
+                                  VK_NULL_HANDLE);
+
+    ff_vk_exec_bind_shader(&ctx->s, exec, &shaders->idct);
+
+    ff_vk_shader_update_push_const(&ctx->s, exec, &shaders->idct,
+                                   VK_SHADER_STAGE_COMPUTE_BIT,
+                                   0, sizeof(pd), &pd);
+
+    vk->CmdDispatch(exec->buf, AV_CEIL_RSHIFT(pr->mb_width, 1), pr->mb_height, 3);
+
+    RET(ff_vk_exec_submit(&ctx->s, exec));
+
+fail:
+    return err;
+}
+
+static int add_push_data(FFVulkanShader *shd)
+{
+    GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
+    GLSLC(1,    u8buf    slice_data;                               );
+    GLSLC(1,    uint     bitstream_size;                           );
+    GLSLC(0,                                                       );
+    GLSLC(1,    uint16_t width;                                    );
+    GLSLC(1,    uint16_t height;                                   );
+    GLSLC(1,    uint16_t mb_width;                                 );
+    GLSLC(1,    uint16_t mb_height;                                );
+    GLSLC(1,    uint16_t slice_width;                              );
+    GLSLC(1,    uint16_t slice_height;                             );
+    GLSLC(1,    uint8_t  log2_slice_width;                         );
+    GLSLC(1,    uint8_t  log2_chroma_w;                            );
+    GLSLC(1,    uint8_t  depth;                                    );
+    GLSLC(1,    uint8_t  alpha_info;                               );
+    GLSLC(1,    uint8_t  bottom_field;                             );
+    GLSLC(0,                                                       );
+    GLSLC(1,    uint8_t  qmat_luma  [8*8];                         );
+    GLSLC(1,    uint8_t  qmat_chroma[8*8];                         );
+    GLSLC(0, };                                                    );
+
+    return ff_vk_shader_add_push_const(shd, 0, sizeof(ProresVkParameters),
+                                       VK_SHADER_STAGE_COMPUTE_BIT);
+}
+
+static int init_shader(AVCodecContext *avctx, FFVulkanContext *s,
+                       FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
+                       FFVulkanShader *shd, const char *name, const char *entrypoint,
+                       FFVulkanDescriptorSetBinding *descs, int num_descs,
+                       const char *source, int local_size, int interlaced)
+{
+    uint8_t *spv_data;
+    size_t spv_len;
+    void *spv_opaque = NULL;
+    int err;
+
+    RET(ff_vk_shader_init(s, shd, name,
+                          VK_SHADER_STAGE_COMPUTE_BIT,
+                          (const char *[]) { "GL_EXT_buffer_reference",
+                                             "GL_EXT_buffer_reference2" }, 2,
+                          local_size >> 16 & 0xff, local_size >> 8 & 0xff, local_size >> 0 & 0xff,
+                          0));
+
+    /* Common code */
+    GLSLD(ff_source_common_comp);
+
+    /* Push constants layout */
+    RET(add_push_data(shd));
+
+    RET(ff_vk_shader_add_descriptor_set(s, shd, descs, num_descs, 0, 0));
+
+    if (interlaced)
+        av_bprintf(&shd->src, "#define INTERLACED\n");
+
+    /* Main code */
+    GLSLD(source);
+
+    RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, entrypoint,
+                            &spv_opaque));
+    RET(ff_vk_shader_link(s, shd, spv_data, spv_len, entrypoint));
+
+    RET(ff_vk_shader_register_exec(s, pool, shd));
+
+fail:
+    if (spv_opaque)
+        spv->free_shader(spv, &spv_opaque);
+
+    return 0;
+}
+
+static void vk_decode_prores_uninit(FFVulkanDecodeShared *ctx)
+{
+    ProresVulkanDecodeContext *pv = ctx->sd_ctx;
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(pv->shaders); ++i) {
+        ff_vk_shader_free(&ctx->s, &pv->shaders[i].reset);
+        ff_vk_shader_free(&ctx->s, &pv->shaders[i].vld);
+        ff_vk_shader_free(&ctx->s, &pv->shaders[i].idct);
+    }
+
+    av_buffer_pool_uninit(&pv->slice_offset_pool);
+
+    av_freep(&pv);
+}
+
+static int vk_decode_prores_init(AVCodecContext *avctx)
+{
+    FFVulkanDecodeContext        *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared         *ctx = NULL;
+
+    AVHWFramesContext *out_frames_ctx;
+    ProresVulkanDecodeContext *pv;
+    FFVkSPIRVCompiler *spv;
+    FFVulkanDescriptorSetBinding *desc_set;
+    int max_num_slices, i, err;
+
+    max_num_slices = (avctx->coded_width >> 4) * (avctx->coded_height >> 4);
+
+    spv = ff_vk_spirv_init();
+    if (!spv) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    err = ff_vk_decode_init(avctx);
+    if (err < 0)
+        return err;
+    ctx = dec->shared_ctx;
+
+    pv = ctx->sd_ctx = av_mallocz(sizeof(*pv));
+    if (!pv) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    out_frames_ctx = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
+
+    ctx->sd_ctx_free = vk_decode_prores_uninit;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(pv->shaders); ++i) { /* Progressive/interlaced */
+        struct ProresVulkanShaderVariants *shaders = &pv->shaders[i];
+
+        desc_set = (FFVulkanDescriptorSetBinding []) {
+            {
+                .name       = "dst",
+                .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                .dimensions = 2,
+                .mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format,
+                                                   FF_VK_REP_NATIVE),
+                .mem_quali  = "writeonly",
+                .elems      = av_pix_fmt_count_planes(out_frames_ctx->sw_format),
+                .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+            },
+        };
+        RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->reset,
+                        "prores_dec_reset", "main", desc_set, 1,
+                        ff_source_prores_reset_comp, 0x080801, i));
+
+        desc_set = (FFVulkanDescriptorSetBinding []) {
+            {
+                .name        = "slice_offsets_buf",
+                .type        = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+                .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+                .mem_quali   = "readonly",
+                .buf_content = "uint32_t slice_offsets",
+                .buf_elems   = max_num_slices + 1,
+            },
+            {
+                .name       = "dst",
+                .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                .dimensions = 2,
+                .mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format,
+                                                   FF_VK_REP_NATIVE),
+                .mem_quali  = "writeonly",
+                .elems      = av_pix_fmt_count_planes(out_frames_ctx->sw_format),
+                .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+            },
+        };
+        RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->vld,
+                        "prores_dec_vld", "main", desc_set, 2,
+                        ff_source_prores_vld_comp, 0x080801, i));
+
+        desc_set = (FFVulkanDescriptorSetBinding []) {
+            {
+                .name       = "dst",
+                .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                .dimensions = 2,
+                .mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format,
+                                                   FF_VK_REP_NATIVE),
+                .elems      = av_pix_fmt_count_planes(out_frames_ctx->sw_format),
+                .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+            },
+        };
+        RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->idct,
+                        "prores_dec_idct", "main", desc_set, 1,
+                        ff_source_prores_idct_comp, 0x200201, i));
+    }
+
+    err = 0;
+
+fail:
+    spv->uninit(&spv);
+
+    return err;
+}
+
+static void vk_prores_free_frame_priv(AVRefStructOpaque _hwctx, void *data)
+{
+    AVHWDeviceContext    *dev_ctx = _hwctx.nc;
+    ProresVulkanDecodePicture *pp = data;
+
+    ff_vk_decode_free_frame(dev_ctx, &pp->vp);
+}
+
+const FFHWAccel ff_prores_vulkan_hwaccel = {
+    .p.name                = "prores_vulkan",
+    .p.type                = AVMEDIA_TYPE_VIDEO,
+    .p.id                  = AV_CODEC_ID_PRORES,
+    .p.pix_fmt             = AV_PIX_FMT_VULKAN,
+    .start_frame           = &vk_prores_start_frame,
+    .decode_slice          = &vk_prores_decode_slice,
+    .end_frame             = &vk_prores_end_frame,
+    .free_frame_priv       = &vk_prores_free_frame_priv,
+    .frame_priv_data_size  = sizeof(ProresVulkanDecodePicture),
+    .init                  = &vk_decode_prores_init,
+    .update_thread_context = &ff_vk_update_thread_context,
+    .decode_params         = &ff_vk_params_invalidate,
+    .flush                 = &ff_vk_decode_flush,
+    .uninit                = &ff_vk_decode_uninit,
+    .frame_params          = &ff_vk_frame_params,
+    .priv_data_size        = sizeof(FFVulkanDecodeContext),
+    .caps_internal         = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE,
+};