1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-10-30 23:18:11 +02:00

lavc: add a ProRes Vulkan hwaccel

Add a shader-based Apple ProRes decoder.
It supports all codec features for profiles up to
the 4444 XQ profile, ie.:
- 4:2:2 and 4:4:4 chroma subsampling
- 10- and 12-bit component depth
- Interlacing
- Alpha

The implementation consists in two shaders: the
VLD kernel does entropy decoding for color/alpha,
and the IDCT kernel performs the inverse transform
on color components.

Benchmarks for a 4k yuv422p10 sample:
- AMD Radeon 6700XT:   178 fps
- Intel i7 Tiger Lake: 37 fps
- NVidia Orin Nano:    70 fps
This commit is contained in:
averne
2025-06-02 21:31:59 +02:00
committed by Lynne
parent 3fd55d952e
commit 98412edfed
10 changed files with 1044 additions and 3 deletions

2
configure vendored
View File

@@ -3343,6 +3343,8 @@ prores_videotoolbox_hwaccel_deps="videotoolbox"
prores_videotoolbox_hwaccel_select="prores_decoder"
prores_raw_vulkan_hwaccel_deps="vulkan spirv_compiler"
prores_raw_vulkan_hwaccel_select="prores_raw_decoder"
prores_vulkan_hwaccel_deps="vulkan spirv_compiler"
prores_vulkan_hwaccel_select="prores_decoder"
vc1_d3d11va_hwaccel_deps="d3d11va"
vc1_d3d11va_hwaccel_select="vc1_decoder"
vc1_d3d11va2_hwaccel_deps="d3d11va"

View File

@@ -1106,6 +1106,7 @@ OBJS-$(CONFIG_VP9_VULKAN_HWACCEL) += vulkan_decode.o vulkan_vp9.o
OBJS-$(CONFIG_VP8_QSV_HWACCEL) += qsvdec.o
OBJS-$(CONFIG_VVC_VAAPI_HWACCEL) += vaapi_vvc.o
OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan_decode.o vulkan_prores_raw.o
OBJS-$(CONFIG_PRORES_VULKAN_HWACCEL) += vulkan_decode.o vulkan_prores.o
# Objects duplicated from other libraries for shared builds
SHLIBOBJS += log2_tab.o reverse.o
@@ -1350,7 +1351,7 @@ SKIPHEADERS-$(CONFIG_QSVENC) += qsvenc.h
SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_hevc.h vaapi_encode.h
SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h vdpau_internal.h
SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.h vt_internal.h
SKIPHEADERS-$(CONFIG_VULKAN) += ffv1_vulkan.h vulkan_video.h \
SKIPHEADERS-$(CONFIG_VULKAN) += ffv1_vulkan.h prores_vulkan.h vulkan_video.h \
vulkan_encode.h vulkan_decode.h
SKIPHEADERS-$(CONFIG_V4L2_M2M) += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
SKIPHEADERS-$(CONFIG_ZLIB) += zlib_wrapper.h

View File

@@ -68,6 +68,7 @@ extern const struct FFHWAccel ff_mpeg4_vdpau_hwaccel;
extern const struct FFHWAccel ff_mpeg4_videotoolbox_hwaccel;
extern const struct FFHWAccel ff_prores_videotoolbox_hwaccel;
extern const struct FFHWAccel ff_prores_raw_vulkan_hwaccel;
extern const struct FFHWAccel ff_prores_vulkan_hwaccel;
extern const struct FFHWAccel ff_vc1_d3d11va_hwaccel;
extern const struct FFHWAccel ff_vc1_d3d11va2_hwaccel;
extern const struct FFHWAccel ff_vc1_d3d12va_hwaccel;

View File

@@ -251,7 +251,7 @@ static int decode_frame_header(ProresContext *ctx, const uint8_t *buf,
}
if (pix_fmt != ctx->pix_fmt) {
#define HWACCEL_MAX (CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL)
#define HWACCEL_MAX (CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL + CONFIG_PRORES_VULKAN_HWACCEL)
#if HWACCEL_MAX
enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
int ret;
@@ -260,6 +260,9 @@ static int decode_frame_header(ProresContext *ctx, const uint8_t *buf,
#if CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL
*fmtp++ = AV_PIX_FMT_VIDEOTOOLBOX;
#endif
#if CONFIG_PRORES_VULKAN_HWACCEL
*fmtp++ = AV_PIX_FMT_VULKAN;
#endif
*fmtp++ = ctx->pix_fmt;
*fmtp = AV_PIX_FMT_NONE;
@@ -872,6 +875,9 @@ const FFCodec ff_prores_decoder = {
.hw_configs = (const AVCodecHWConfigInternal *const []) {
#if CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL
HWACCEL_VIDEOTOOLBOX(prores),
#endif
#if CONFIG_PRORES_VULKAN_HWACCEL
HWACCEL_VULKAN(prores),
#endif
NULL
},

View File

@@ -17,6 +17,11 @@ OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/common.o \
OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan/common.o \
vulkan/prores_raw.o
OBJS-$(CONFIG_PRORES_VULKAN_HWACCEL) += vulkan/common.o \
vulkan/prores_reset.o \
vulkan/prores_vld.o \
vulkan/prores_idct.o
VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavcodec/vulkan/*.comp))
.SECONDARY: $(VULKAN:.comp=.c)
libavcodec/vulkan/%.c: TAG = VULKAN

View File

@@ -0,0 +1,123 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/* Two macroblocks, padded to avoid bank conflicts */
shared float blocks[4*2][8*(8+1)];
uint get_px(uint tex_idx, ivec2 pos)
{
#ifndef INTERLACED
return imageLoad(dst[tex_idx], pos).x;
#else
return imageLoad(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field)).x;
#endif
}
void put_px(uint tex_idx, ivec2 pos, uint v)
{
#ifndef INTERLACED
imageStore(dst[tex_idx], pos, uvec4(v));
#else
imageStore(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field), uvec4(v));
#endif
}
/* 7.4 Inverse Transform */
void idct(uint block, uint offset, uint stride)
{
float c0 = blocks[block][0*stride + offset];
float c1 = blocks[block][1*stride + offset];
float c2 = blocks[block][2*stride + offset];
float c3 = blocks[block][3*stride + offset];
float c4 = blocks[block][4*stride + offset];
float c5 = blocks[block][5*stride + offset];
float c6 = blocks[block][6*stride + offset];
float c7 = blocks[block][7*stride + offset];
float tmp1 = c6 * 1.4142134189605712891 + (c2 - c6);
float tmp2 = c6 * 1.4142134189605712891 - (c2 - c6);
float a1 = (c0 + c4) * 0.35355341434478759766 + tmp1 * 0.46193981170654296875;
float a4 = (c0 + c4) * 0.35355341434478759766 - tmp1 * 0.46193981170654296875;
float a3 = (c0 - c4) * 0.35355341434478759766 + tmp2 * 0.19134169816970825195;
float a2 = (c0 - c4) * 0.35355341434478759766 - tmp2 * 0.19134169816970825195;
float tmp3 = (c3 - c5) * 0.70710682868957519531 + c7;
float tmp4 = (c3 - c5) * 0.70710682868957519531 - c7;
float tmp5 = (c5 - c7) * 1.4142134189605712891 + (c5 - c7) + (c1 - c3);
float tmp6 = (c5 - c7) * -1.4142134189605712891 + (c5 - c7) + (c1 - c3);
float m1 = tmp3 * 2.6131260395050048828 + tmp5;
float m4 = tmp3 * -2.6131260395050048828 + tmp5;
float m2 = tmp4 * 1.0823919773101806641 + tmp6;
float m3 = tmp4 * -1.0823919773101806641 + tmp6;
blocks[block][0*stride + offset] = m1 * 0.49039259552955627441 + a1;
blocks[block][7*stride + offset] = m1 * -0.49039259552955627441 + a1;
blocks[block][1*stride + offset] = m2 * 0.41573479771614074707 + a2;
blocks[block][6*stride + offset] = m2 * -0.41573479771614074707 + a2;
blocks[block][2*stride + offset] = m3 * 0.27778509259223937988 + a3;
blocks[block][5*stride + offset] = m3 * -0.27778509259223937988 + a3;
blocks[block][3*stride + offset] = m4 * 0.097545139491558074951 + a4;
blocks[block][4*stride + offset] = m4 * -0.097545139491558074951 + a4;
}
void main(void)
{
uvec3 gid = gl_GlobalInvocationID, lid = gl_LocalInvocationID;
uint comp = gid.z, block = (lid.y << 2) | (lid.x >> 3), idx = lid.x & 0x7;
uint chroma_shift = comp != 0 ? log2_chroma_w : 0;
bool act = gid.x < mb_width << (4 - chroma_shift);
/* Coalesced load of DCT coeffs in shared memory, second part of inverse quantization */
if (act) {
/**
* According to spec indexing an array in push constant memory with
* a non-dynamically uniform value is illegal ($15.9.1 in v1.4.326),
* so copy the whole matrix locally.
*/
uint8_t[64] qmat = comp == 0 ? qmat_luma : qmat_chroma;
[[unroll]] for (uint i = 0; i < 8; ++i) {
int v = sign_extend(int(get_px(comp, ivec2(gid.x, (gid.y << 3) | i))), 16);
blocks[block][i * 9 + idx] = float(v * int(qmat[(i << 3) + idx]));
}
}
/* Row-wise iDCT */
barrier();
idct(block, idx * 9, 1);
/* Column-wise iDCT */
barrier();
idct(block, idx, 9);
float fact = 1.0f / (1 << (12 - depth)), off = 1 << (depth - 1);
int maxv = (1 << depth) - 1;
/* 7.5.1 Color Component Samples. Rescale, clamp and write back to global memory */
barrier();
if (act) {
[[unroll]] for (uint i = 0; i < 8; ++i) {
float v = blocks[block][i * 9 + idx] * fact + off;
put_px(comp, ivec2(gid.x, (gid.y << 3) | i), clamp(int(v), 0, maxv));
}
}
}

View File

@@ -0,0 +1,38 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
void main(void)
{
uvec3 gid = gl_GlobalInvocationID;
#ifndef INTERLACED
ivec2 pos = ivec2(gid);
#else
ivec2 pos = ivec2(gid.x, (gid.y << 1) + bottom_field);
#endif
/* Clear luma plane */
imageStore(dst[0], pos, uvec4(0));
/* Clear chroma plane */
if (gid.x < mb_width << (4 - log2_chroma_w)) {
imageStore(dst[1], pos, uvec4(0));
imageStore(dst[2], pos, uvec4(0));
}
/* Alpha plane doesn't need a clear because it is not sparsely encoded */
}

View File

@@ -0,0 +1,317 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define U8(x) (uint8_t (x))
#define U16(x) (uint16_t(x))
void put_px(uint tex_idx, ivec2 pos, uint v)
{
#ifndef INTERLACED
imageStore(dst[tex_idx], pos, uvec4(v));
#else
imageStore(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field), uvec4(v));
#endif
}
/* 7.5.3 Pixel Arrangement */
ivec2 pos_to_block(uint pos, uint luma)
{
return ivec2((pos & -luma - 2) + luma >> 1, pos >> luma & 1) << 3;
}
/* 7.1.1.2 Signed Golomb Combination Codes */
uint to_signed(uint x)
{
return (x >> 1) ^ -(x & 1);
}
/* 7.1.1.1 Golomb Combination Codes */
uint decode_codeword(inout GetBitContext gb, int codebook)
{
int last_rice_q = bitfieldExtract(codebook, 0, 4),
krice = bitfieldExtract(codebook, 4, 4),
kexp = bitfieldExtract(codebook, 8, 4);
int q = 31 - findMSB(show_bits(gb, 32));
if (q <= last_rice_q) {
/* Golomb-Rice encoding */
return (get_bits(gb, krice + q + 1) & ~(1 << krice)) + (q << krice);
} else {
/* exp-Golomb encoding */
return get_bits(gb, (q << 1) + kexp - last_rice_q) - (1 << kexp) + ((last_rice_q + 1) << krice);
}
}
void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint mb_count, uint qscale)
{
uvec3 gid = gl_GlobalInvocationID;
uint is_luma = uint(gid.z == 0);
uint chroma_shift = bool(is_luma) ? 0 : log2_chroma_w;
uint num_blocks = mb_count << (2 - chroma_shift);
ivec2 base_pos = ivec2(mb_pos.x << (4 - chroma_shift), mb_pos.y << 4);
/* 7.1.1.3 DC Coefficients */
{
/* First coeff */
uint c = to_signed(decode_codeword(gb, 0x650));
put_px(gid.z, base_pos, c * qscale & 0xffff);
/**
* Table 9, encoded as (last_rice_q << 0) | (krice or kexp << 4) | ((kexp or kexp + 1) << 8)
* According to the SMPTE document, abs(prev_dc_diff) should be used
* to index the table, duplicating the entries removes the abs operation.
*/
const uint16_t dc_codebook[] = { U16(0x100),
U16(0x210), U16(0x210),
U16(0x321), U16(0x321),
U16(0x430), U16(0x430), };
uint cw = 5, prev_dc_diff = 0;
for (int i = 1; i < num_blocks; ++i) {
cw = decode_codeword(gb, dc_codebook[min(cw, 6)]);
int s = int(prev_dc_diff) >> 31;
c += prev_dc_diff = (to_signed(cw) ^ s) - s;
put_px(gid.z, base_pos + pos_to_block(i, is_luma), c * qscale & 0xffff);
}
}
/* 7.1.1.4 AC Coefficients */
{
/* Table 10 */
const uint16_t ac_run_codebook [] = { U16(0x102), U16(0x102), U16(0x101), U16(0x101),
U16(0x100), U16(0x211), U16(0x211), U16(0x211),
U16(0x211), U16(0x210), U16(0x210), U16(0x210),
U16(0x210), U16(0x210), U16(0x210), U16(0x320), };
/* Table 11 */
const uint16_t ac_level_codebook[] = { U16(0x202), U16(0x101), U16(0x102), U16(0x100),
U16(0x210), U16(0x210), U16(0x210), U16(0x210),
U16(0x320) };
#ifndef INTERLACED
/* Figure 4, encoded as (x << 0) | (y << 4) */
const uint8_t scan_tbl[] = {
U8(0x00), U8(0x01), U8(0x10), U8(0x11), U8(0x02), U8(0x03), U8(0x12), U8(0x13),
U8(0x20), U8(0x21), U8(0x30), U8(0x31), U8(0x22), U8(0x23), U8(0x32), U8(0x33),
U8(0x04), U8(0x05), U8(0x14), U8(0x24), U8(0x15), U8(0x06), U8(0x07), U8(0x16),
U8(0x25), U8(0x34), U8(0x35), U8(0x26), U8(0x17), U8(0x27), U8(0x36), U8(0x37),
U8(0x40), U8(0x41), U8(0x50), U8(0x60), U8(0x51), U8(0x42), U8(0x43), U8(0x52),
U8(0x61), U8(0x70), U8(0x71), U8(0x62), U8(0x53), U8(0x44), U8(0x45), U8(0x54),
U8(0x63), U8(0x72), U8(0x73), U8(0x64), U8(0x55), U8(0x46), U8(0x47), U8(0x56),
U8(0x65), U8(0x74), U8(0x75), U8(0x66), U8(0x57), U8(0x67), U8(0x76), U8(0x77),
};
#else
/* Figure 5 */
const uint8_t scan_tbl[] = {
U8(0x00), U8(0x10), U8(0x01), U8(0x11), U8(0x20), U8(0x30), U8(0x21), U8(0x31),
U8(0x02), U8(0x12), U8(0x03), U8(0x13), U8(0x22), U8(0x32), U8(0x23), U8(0x33),
U8(0x40), U8(0x50), U8(0x41), U8(0x42), U8(0x51), U8(0x60), U8(0x70), U8(0x61),
U8(0x52), U8(0x43), U8(0x53), U8(0x62), U8(0x71), U8(0x72), U8(0x63), U8(0x73),
U8(0x04), U8(0x14), U8(0x05), U8(0x06), U8(0x15), U8(0x24), U8(0x34), U8(0x25),
U8(0x16), U8(0x07), U8(0x17), U8(0x26), U8(0x35), U8(0x44), U8(0x54), U8(0x45),
U8(0x36), U8(0x27), U8(0x37), U8(0x46), U8(0x55), U8(0x64), U8(0x74), U8(0x65),
U8(0x56), U8(0x47), U8(0x57), U8(0x66), U8(0x75), U8(0x76), U8(0x67), U8(0x77),
};
#endif
uint block_mask = num_blocks - 1;
uint block_shift = findLSB(num_blocks);
uint pos = num_blocks - 1, run = 4, level = 1, s;
while (pos < num_blocks << 6) {
int left = left_bits(gb);
if (left <= 0 || (left < 32 && show_bits(gb, left) == 0))
break;
run = decode_codeword(gb, ac_run_codebook [min(run, 15)]);
level = decode_codeword(gb, ac_level_codebook[min(level, 8 )]);
s = get_bits(gb, 1);
pos += run + 1;
uint bidx = pos & block_mask, scan = scan_tbl[pos >> block_shift];
ivec2 spos = pos_to_block(bidx, is_luma);
ivec2 bpos = ivec2(scan & 0xf, scan >> 4);
uint c = ((level + 1) ^ -s) + s;
put_px(gid.z, base_pos + spos + bpos, c * qscale & 0xffff);
}
}
}
/* 7.1.2 Scanned Alpha */
void decode_alpha(in GetBitContext gb, uvec2 mb_pos, uint mb_count)
{
uvec3 gid = gl_GlobalInvocationID;
ivec2 base_pos = ivec2(mb_pos) << 4;
uint block_shift = findMSB(mb_count) + 4, block_mask = (1 << block_shift) - 1;
uint mask = (1 << (4 << alpha_info)) - 1;
uint num_values = (mb_count << 4) * min(height - (gid.y << 4), 16);
int num_cw_bits = alpha_info == 1 ? 5 : 8,
num_flc_bits = alpha_info == 1 ? 9 : 17;
uint alpha_rescale_lshift = alpha_info == 1 ? depth - 8 : 16,
alpha_rescale_rshift = 16 - depth;
uint alpha = -1;
for (uint pos = 0; pos < num_values;) {
uint diff, run;
/* Decode run value */
{
uint bits = show_bits(gb, num_cw_bits), q = num_cw_bits - 1 - findMSB(bits);
/* Tables 13/14 */
if (q != 0) {
uint m = (bits >> 1) + 1, s = bits & 1;
diff = (m ^ -s) + s;
skip_bits(gb, num_cw_bits);
} else {
diff = get_bits(gb, num_flc_bits);
}
alpha = alpha + diff & mask;
}
/* Decode run length */
{
uint bits = show_bits(gb, 5), q = 4 - findMSB(bits);
/* Table 12 */
if (q == 0) {
run = 1;
skip_bits(gb, 1);
} else if (q <= 4) {
run = bits + 1;
skip_bits(gb, 5);
} else {
run = get_bits(gb, 16) + 1;
}
run = min(run, num_values - pos);
}
/**
* FFmpeg doesn't support color and alpha with different precision,
* so we need to rescale to the color range.
*/
uint val = (alpha << alpha_rescale_lshift) | (alpha >> alpha_rescale_rshift);
for (uint end = pos + run; pos < end; ++pos)
put_px(3, base_pos + ivec2(pos & block_mask, pos >> block_shift), val & 0xffff);
}
}
void main(void)
{
uvec3 gid = gl_GlobalInvocationID;
if (gid.x >= slice_width || gid.y >= slice_height)
return;
uint slice_idx = gid.y * slice_width + gid.x;
uint slice_off = slice_offsets[slice_idx],
slice_size = slice_offsets[slice_idx + 1] - slice_off;
u8buf bs = u8buf(slice_data + slice_off);
/* Decode slice header */
uint hdr_size, y_size, u_size, v_size, a_size;
hdr_size = bs[0].v >> 3;
/* Table 15 */
uint qidx = clamp(bs[1].v, 1, 224),
qscale = qidx > 128 ? (qidx - 96) << 2 : qidx;
y_size = (uint(bs[2].v) << 8) | bs[3].v;
u_size = (uint(bs[4].v) << 8) | bs[5].v;
/**
* The alpha_info field can be 0 even when an alpha plane is present,
* if skip_alpha is enabled, so use the header size instead.
*/
if (hdr_size > 6)
v_size = (uint(bs[6].v) << 8) | bs[7].v;
else
v_size = slice_size - hdr_size - y_size - u_size;
a_size = slice_size - hdr_size - y_size - u_size - v_size;
GetBitContext gb;
switch (gid.z) {
case 0:
init_get_bits(gb, u8buf(bs + hdr_size), int(y_size));
break;
case 1:
init_get_bits(gb, u8buf(bs + hdr_size + y_size), int(u_size));
break;
case 2:
init_get_bits(gb, u8buf(bs + hdr_size + y_size + u_size), int(v_size));
break;
case 3:
init_get_bits(gb, u8buf(bs + hdr_size + y_size + u_size + v_size), int(a_size));
break;
}
/**
* Support for the grayscale "extension" in the prores_aw encoder.
* According to the spec, entropy coded data should never be empty,
* and instead contain at least the DC coefficients.
* This avoids undefined behavior.
*/
if (left_bits(gb) == 0)
return;
/**
* 4 ProRes Frame Structure
* ProRes tiles pictures into a grid of slices, whose size is determined
* by the log2_slice_width parameter (height is always 1 MB).
* Each slice has a width of (1 << log2_slice_width) MBs, until the picture
* cannot accommodate a full one. At this point, the remaining space
* is recursively completed using the first smaller power of two that fits
* (see Figure 1).
* The maximum number of extra slices is 3, when log2_slice_width is 3,
* with sizes 4, 2 and 1 MBs.
* The mb_width parameter therefore also represents the number of full slices,
* when interpreted as a fixed-point number with log2_slice_width fractional bits.
*/
uint frac = bitfieldExtract(uint(mb_width), 0, log2_slice_width),
num_extra = bitCount(frac);
uint diff = slice_width - gid.x - 1,
off = max(int(diff - num_extra + 1) << 2, 0);
uint log2_width = min(findLSB(frac - diff >> diff) + diff + off, log2_slice_width);
uint mb_x = (min(gid.x, slice_width - num_extra) << log2_slice_width) +
(frac & (0xf << log2_width + 1)),
mb_y = gid.y;
uint mb_count = 1 << log2_width;
if (gid.z < 3) {
/* Color entropy decoding, inverse scanning, first part of inverse quantization */
decode_comp(gb, uvec2(mb_x, mb_y), mb_count, qscale);
} else {
/* Alpha entropy decoding */
decode_alpha(gb, uvec2(mb_x, mb_y), mb_count);
}
}

View File

@@ -26,7 +26,8 @@
#define DECODER_IS_SDR(codec_id) \
(((codec_id) == AV_CODEC_ID_FFV1) || \
((codec_id) == AV_CODEC_ID_PRORES_RAW))
((codec_id) == AV_CODEC_ID_PRORES_RAW) || \
((codec_id) == AV_CODEC_ID_PRORES))
#if CONFIG_H264_VULKAN_HWACCEL
extern const FFVulkanDecodeDescriptor ff_vk_dec_h264_desc;
@@ -46,6 +47,9 @@ extern const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc;
#if CONFIG_PRORES_RAW_VULKAN_HWACCEL
extern const FFVulkanDecodeDescriptor ff_vk_dec_prores_raw_desc;
#endif
#if CONFIG_PRORES_VULKAN_HWACCEL
extern const FFVulkanDecodeDescriptor ff_vk_dec_prores_desc;
#endif
static const FFVulkanDecodeDescriptor *dec_descs[] = {
#if CONFIG_H264_VULKAN_HWACCEL
@@ -66,6 +70,9 @@ static const FFVulkanDecodeDescriptor *dec_descs[] = {
#if CONFIG_PRORES_RAW_VULKAN_HWACCEL
&ff_vk_dec_prores_raw_desc,
#endif
#if CONFIG_PRORES_VULKAN_HWACCEL
&ff_vk_dec_prores_desc,
#endif
};
typedef struct FFVulkanDecodeProfileData {

541
libavcodec/vulkan_prores.c Normal file
View File

@@ -0,0 +1,541 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "proresdec.h"
#include "vulkan_decode.h"
#include "hwaccel_internal.h"
#include "libavutil/mem.h"
#include "libavutil/vulkan.h"
#include "libavutil/vulkan_loader.h"
#include "libavutil/vulkan_spirv.h"
extern const char *ff_source_common_comp;
extern const char *ff_source_prores_reset_comp;
extern const char *ff_source_prores_vld_comp;
extern const char *ff_source_prores_idct_comp;
const FFVulkanDecodeDescriptor ff_vk_dec_prores_desc = {
.codec_id = AV_CODEC_ID_PRORES,
.queue_flags = VK_QUEUE_COMPUTE_BIT,
};
typedef struct ProresVulkanDecodePicture {
FFVulkanDecodePicture vp;
AVBufferRef *slice_offset_buf;
uint32_t slice_num;
uint32_t bitstream_start;
uint32_t bitstream_size;
} ProresVulkanDecodePicture;
typedef struct ProresVulkanDecodeContext {
struct ProresVulkanShaderVariants {
FFVulkanShader reset;
FFVulkanShader vld;
FFVulkanShader idct;
} shaders[2]; /* Progressive/interlaced */
AVBufferPool *slice_offset_pool;
} ProresVulkanDecodeContext;
typedef struct ProresVkParameters {
VkDeviceAddress slice_data;
uint32_t bitstream_size;
uint16_t width;
uint16_t height;
uint16_t mb_width;
uint16_t mb_height;
uint16_t slice_width;
uint16_t slice_height;
uint8_t log2_slice_width;
uint8_t log2_chroma_w;
uint8_t depth;
uint8_t alpha_info;
uint8_t bottom_field;
uint8_t qmat_luma [64];
uint8_t qmat_chroma[64];
} ProresVkParameters;
static int vk_prores_start_frame(AVCodecContext *avctx,
const AVBufferRef *buffer_ref,
av_unused const uint8_t *buffer,
av_unused uint32_t size)
{
ProresContext *pr = avctx->priv_data;
FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
FFVulkanDecodeShared *ctx = dec->shared_ctx;
ProresVulkanDecodeContext *pv = ctx->sd_ctx;
ProresVulkanDecodePicture *pp = pr->hwaccel_picture_private;
FFVulkanDecodePicture *vp = &pp->vp;
int err;
/* Host map the input slices data if supported */
if (!vp->slices_buf && ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY)
RET(ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, buffer_ref->data,
buffer_ref,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT));
/* Allocate slice offsets buffer */
RET(ff_vk_get_pooled_buffer(&ctx->s, &pv->slice_offset_pool,
&pp->slice_offset_buf,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
NULL, (pr->slice_count + 1) * sizeof(uint32_t),
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
/* Prepare frame to be used */
RET(ff_vk_decode_prepare_frame_sdr(dec, pr->frame, vp, 1,
FF_VK_REP_NATIVE, 0));
pp->slice_num = 0;
pp->bitstream_start = pp->bitstream_size = 0;
fail:
return err;
}
static int vk_prores_decode_slice(AVCodecContext *avctx,
const uint8_t *data,
uint32_t size)
{
ProresContext *pr = avctx->priv_data;
ProresVulkanDecodePicture *pp = pr->hwaccel_picture_private;
FFVulkanDecodePicture *vp = &pp->vp;
FFVkBuffer *slice_offset = (FFVkBuffer *)pp->slice_offset_buf->data;
FFVkBuffer *slices_buf = vp->slices_buf ? (FFVkBuffer *)vp->slices_buf->data : NULL;
/* Skip picture header */
if (slices_buf && slices_buf->host_ref && !pp->slice_num)
pp->bitstream_size = data - slices_buf->mapped_mem;
AV_WN32(slice_offset->mapped_mem + (pp->slice_num + 0) * sizeof(uint32_t),
pp->bitstream_size);
AV_WN32(slice_offset->mapped_mem + (pp->slice_num + 1) * sizeof(uint32_t),
pp->bitstream_size += size);
if (!slices_buf || !slices_buf->host_ref) {
int err = ff_vk_decode_add_slice(avctx, vp, data, size, 0,
&pp->slice_num, NULL);
if (err < 0)
return err;
} else {
pp->slice_num++;
}
return 0;
}
static int vk_prores_end_frame(AVCodecContext *avctx)
{
ProresContext *pr = avctx->priv_data;
FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
FFVulkanDecodeShared *ctx = dec->shared_ctx;
FFVulkanFunctions *vk = &ctx->s.vkfn;
ProresVulkanDecodeContext *pv = ctx->sd_ctx;
ProresVulkanDecodePicture *pp = pr->hwaccel_picture_private;
FFVulkanDecodePicture *vp = &pp->vp;
ProresVkParameters pd;
FFVkBuffer *slice_data, *slice_offsets;
struct ProresVulkanShaderVariants *shaders;
VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
VkBufferMemoryBarrier2 buf_bar[2];
int nb_img_bar = 0, nb_buf_bar = 0, err;
const AVPixFmtDescriptor *pix_desc;
if (!pp->slice_num)
return 0;
pix_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
if (!pix_desc)
return AVERROR(EINVAL);
slice_data = (FFVkBuffer *)vp->slices_buf->data;
slice_offsets = (FFVkBuffer *)pp->slice_offset_buf->data;
shaders = &pv->shaders[pr->frame_type != 0];
pd = (ProresVkParameters) {
.slice_data = slice_data->address,
.bitstream_size = pp->bitstream_size,
.width = avctx->width,
.height = avctx->height,
.mb_width = pr->mb_width,
.mb_height = pr->mb_height,
.slice_width = pr->slice_count / pr->mb_height,
.slice_height = pr->mb_height,
.log2_slice_width = av_log2(pr->slice_mb_width),
.log2_chroma_w = pix_desc->log2_chroma_w,
.depth = avctx->bits_per_raw_sample,
.alpha_info = pr->alpha_info,
.bottom_field = pr->first_field ^ (pr->frame_type == 1),
};
memcpy(pd.qmat_luma, pr->qmat_luma, sizeof(pd.qmat_luma ));
memcpy(pd.qmat_chroma, pr->qmat_chroma, sizeof(pd.qmat_chroma));
FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool);
RET(ff_vk_exec_start(&ctx->s, exec));
/* Prepare deps */
RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, pr->frame,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value,
pr->frame));
RET(ff_vk_exec_add_dep_buf(&ctx->s, exec,
(AVBufferRef *[]){ vp->slices_buf, pp->slice_offset_buf },
2, 0));
/* Transfer ownership to the exec context */
vp->slices_buf = pp->slice_offset_buf = NULL;
/* Input frame barrier */
ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pBufferMemoryBarriers = buf_bar,
.bufferMemoryBarrierCount = nb_buf_bar,
.pImageMemoryBarriers = img_bar,
.imageMemoryBarrierCount = nb_img_bar,
});
nb_img_bar = nb_buf_bar = 0;
/* Reset */
ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->reset,
pr->frame, vp->view.out,
0, 0,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
ff_vk_shader_update_push_const(&ctx->s, exec, &shaders->reset,
VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd), &pd);
ff_vk_exec_bind_shader(&ctx->s, exec, &shaders->reset);
vk->CmdDispatch(exec->buf, pr->mb_width << 1, pr->mb_height << 1, 1);
/* Input frame barrier after reset */
ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pBufferMemoryBarriers = buf_bar,
.bufferMemoryBarrierCount = nb_buf_bar,
.pImageMemoryBarriers = img_bar,
.imageMemoryBarrierCount = nb_img_bar,
});
nb_img_bar = nb_buf_bar = 0;
/* Entropy decode */
ff_vk_shader_update_desc_buffer(&ctx->s, exec, &shaders->vld,
0, 0, 0,
slice_offsets,
0, (pp->slice_num + 1) * sizeof(uint32_t),
VK_FORMAT_UNDEFINED);
ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->vld,
pr->frame, vp->view.out,
0, 1,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
ff_vk_shader_update_push_const(&ctx->s, exec, &shaders->vld,
VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd), &pd);
ff_vk_exec_bind_shader(&ctx->s, exec, &shaders->vld);
vk->CmdDispatch(exec->buf, AV_CEIL_RSHIFT(pr->slice_count / pr->mb_height, 3), AV_CEIL_RSHIFT(pr->mb_height, 3),
3 + !!pr->alpha_info);
/* Synchronize vld and idct shaders */
nb_img_bar = 0;
ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pBufferMemoryBarriers = buf_bar,
.bufferMemoryBarrierCount = nb_buf_bar,
.pImageMemoryBarriers = img_bar,
.imageMemoryBarrierCount = nb_img_bar,
});
nb_img_bar = nb_buf_bar = 0;
/* Inverse transform */
ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->idct,
pr->frame, vp->view.out,
0, 0,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
ff_vk_exec_bind_shader(&ctx->s, exec, &shaders->idct);
ff_vk_shader_update_push_const(&ctx->s, exec, &shaders->idct,
VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd), &pd);
vk->CmdDispatch(exec->buf, AV_CEIL_RSHIFT(pr->mb_width, 1), pr->mb_height, 3);
RET(ff_vk_exec_submit(&ctx->s, exec));
fail:
return err;
}
static int add_push_data(FFVulkanShader *shd)
{
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
GLSLC(1, u8buf slice_data; );
GLSLC(1, uint bitstream_size; );
GLSLC(0, );
GLSLC(1, uint16_t width; );
GLSLC(1, uint16_t height; );
GLSLC(1, uint16_t mb_width; );
GLSLC(1, uint16_t mb_height; );
GLSLC(1, uint16_t slice_width; );
GLSLC(1, uint16_t slice_height; );
GLSLC(1, uint8_t log2_slice_width; );
GLSLC(1, uint8_t log2_chroma_w; );
GLSLC(1, uint8_t depth; );
GLSLC(1, uint8_t alpha_info; );
GLSLC(1, uint8_t bottom_field; );
GLSLC(0, );
GLSLC(1, uint8_t qmat_luma [8*8]; );
GLSLC(1, uint8_t qmat_chroma[8*8]; );
GLSLC(0, }; );
return ff_vk_shader_add_push_const(shd, 0, sizeof(ProresVkParameters),
VK_SHADER_STAGE_COMPUTE_BIT);
}
static int init_shader(AVCodecContext *avctx, FFVulkanContext *s,
FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
FFVulkanShader *shd, const char *name, const char *entrypoint,
FFVulkanDescriptorSetBinding *descs, int num_descs,
const char *source, int local_size, int interlaced)
{
uint8_t *spv_data;
size_t spv_len;
void *spv_opaque = NULL;
int err;
RET(ff_vk_shader_init(s, shd, name,
VK_SHADER_STAGE_COMPUTE_BIT,
(const char *[]) { "GL_EXT_buffer_reference",
"GL_EXT_buffer_reference2" }, 2,
local_size >> 16 & 0xff, local_size >> 8 & 0xff, local_size >> 0 & 0xff,
0));
/* Common code */
GLSLD(ff_source_common_comp);
/* Push constants layout */
RET(add_push_data(shd));
RET(ff_vk_shader_add_descriptor_set(s, shd, descs, num_descs, 0, 0));
if (interlaced)
av_bprintf(&shd->src, "#define INTERLACED\n");
/* Main code */
GLSLD(source);
RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, entrypoint,
&spv_opaque));
RET(ff_vk_shader_link(s, shd, spv_data, spv_len, entrypoint));
RET(ff_vk_shader_register_exec(s, pool, shd));
fail:
if (spv_opaque)
spv->free_shader(spv, &spv_opaque);
return 0;
}
static void vk_decode_prores_uninit(FFVulkanDecodeShared *ctx)
{
ProresVulkanDecodeContext *pv = ctx->sd_ctx;
int i;
for (i = 0; i < FF_ARRAY_ELEMS(pv->shaders); ++i) {
ff_vk_shader_free(&ctx->s, &pv->shaders[i].reset);
ff_vk_shader_free(&ctx->s, &pv->shaders[i].vld);
ff_vk_shader_free(&ctx->s, &pv->shaders[i].idct);
}
av_buffer_pool_uninit(&pv->slice_offset_pool);
av_freep(&pv);
}
static int vk_decode_prores_init(AVCodecContext *avctx)
{
FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
FFVulkanDecodeShared *ctx = NULL;
AVHWFramesContext *out_frames_ctx;
ProresVulkanDecodeContext *pv;
FFVkSPIRVCompiler *spv;
FFVulkanDescriptorSetBinding *desc_set;
int max_num_slices, i, err;
max_num_slices = (avctx->coded_width >> 4) * (avctx->coded_height >> 4);
spv = ff_vk_spirv_init();
if (!spv) {
av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
return AVERROR_EXTERNAL;
}
err = ff_vk_decode_init(avctx);
if (err < 0)
return err;
ctx = dec->shared_ctx;
pv = ctx->sd_ctx = av_mallocz(sizeof(*pv));
if (!pv) {
err = AVERROR(ENOMEM);
goto fail;
}
out_frames_ctx = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
ctx->sd_ctx_free = vk_decode_prores_uninit;
for (i = 0; i < FF_ARRAY_ELEMS(pv->shaders); ++i) { /* Progressive/interlaced */
struct ProresVulkanShaderVariants *shaders = &pv->shaders[i];
desc_set = (FFVulkanDescriptorSetBinding []) {
{
.name = "dst",
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.dimensions = 2,
.mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format,
FF_VK_REP_NATIVE),
.mem_quali = "writeonly",
.elems = av_pix_fmt_count_planes(out_frames_ctx->sw_format),
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
};
RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->reset,
"prores_dec_reset", "main", desc_set, 1,
ff_source_prores_reset_comp, 0x080801, i));
desc_set = (FFVulkanDescriptorSetBinding []) {
{
.name = "slice_offsets_buf",
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_quali = "readonly",
.buf_content = "uint32_t slice_offsets",
.buf_elems = max_num_slices + 1,
},
{
.name = "dst",
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.dimensions = 2,
.mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format,
FF_VK_REP_NATIVE),
.mem_quali = "writeonly",
.elems = av_pix_fmt_count_planes(out_frames_ctx->sw_format),
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
};
RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->vld,
"prores_dec_vld", "main", desc_set, 2,
ff_source_prores_vld_comp, 0x080801, i));
desc_set = (FFVulkanDescriptorSetBinding []) {
{
.name = "dst",
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.dimensions = 2,
.mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format,
FF_VK_REP_NATIVE),
.elems = av_pix_fmt_count_planes(out_frames_ctx->sw_format),
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
};
RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->idct,
"prores_dec_idct", "main", desc_set, 1,
ff_source_prores_idct_comp, 0x200201, i));
}
err = 0;
fail:
spv->uninit(&spv);
return err;
}
static void vk_prores_free_frame_priv(AVRefStructOpaque _hwctx, void *data)
{
AVHWDeviceContext *dev_ctx = _hwctx.nc;
ProresVulkanDecodePicture *pp = data;
ff_vk_decode_free_frame(dev_ctx, &pp->vp);
}
const FFHWAccel ff_prores_vulkan_hwaccel = {
.p.name = "prores_vulkan",
.p.type = AVMEDIA_TYPE_VIDEO,
.p.id = AV_CODEC_ID_PRORES,
.p.pix_fmt = AV_PIX_FMT_VULKAN,
.start_frame = &vk_prores_start_frame,
.decode_slice = &vk_prores_decode_slice,
.end_frame = &vk_prores_end_frame,
.free_frame_priv = &vk_prores_free_frame_priv,
.frame_priv_data_size = sizeof(ProresVulkanDecodePicture),
.init = &vk_decode_prores_init,
.update_thread_context = &ff_vk_update_thread_context,
.decode_params = &ff_vk_params_invalidate,
.flush = &ff_vk_decode_flush,
.uninit = &ff_vk_decode_uninit,
.frame_params = &ff_vk_frame_params,
.priv_data_size = sizeof(FFVulkanDecodeContext),
.caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE,
};