You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-12-25 22:17:24 +02:00
dpxdec: add a Vulkan hwaccel
This commit is contained in:
2
configure
vendored
2
configure
vendored
@@ -3262,6 +3262,8 @@ av1_videotoolbox_hwaccel_deps="videotoolbox"
|
||||
av1_videotoolbox_hwaccel_select="av1_decoder"
|
||||
av1_vulkan_hwaccel_deps="vulkan"
|
||||
av1_vulkan_hwaccel_select="av1_decoder"
|
||||
dpx_vulkan_hwaccel_deps="vulkan spirv_compiler"
|
||||
dpx_vulkan_hwaccel_select="dpx_decoder"
|
||||
ffv1_vulkan_hwaccel_deps="vulkan spirv_compiler"
|
||||
ffv1_vulkan_hwaccel_select="ffv1_decoder"
|
||||
h263_vaapi_hwaccel_deps="vaapi"
|
||||
|
||||
@@ -1050,6 +1050,7 @@ OBJS-$(CONFIG_AV1_VAAPI_HWACCEL) += vaapi_av1.o
|
||||
OBJS-$(CONFIG_AV1_VDPAU_HWACCEL) += vdpau_av1.o
|
||||
OBJS-$(CONFIG_AV1_VIDEOTOOLBOX_HWACCEL) += videotoolbox_av1.o
|
||||
OBJS-$(CONFIG_AV1_VULKAN_HWACCEL) += vulkan_decode.o vulkan_av1.o
|
||||
OBJS-$(CONFIG_DPX_VULKAN_HWACCEL) += vulkan_decode.o vulkan_dpx.o
|
||||
OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan_decode.o ffv1_vulkan.o vulkan_ffv1.o
|
||||
OBJS-$(CONFIG_H263_VAAPI_HWACCEL) += vaapi_mpeg4.o
|
||||
OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o
|
||||
|
||||
@@ -837,7 +837,12 @@ const FFCodec ff_dpx_decoder = {
|
||||
.close = decode_end,
|
||||
UPDATE_THREAD_CONTEXT(update_thread_context),
|
||||
.p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
|
||||
.caps_internal = FF_CODEC_CAP_INIT_CLEANUP |
|
||||
FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM,
|
||||
.hw_configs = (const AVCodecHWConfigInternal *const []) {
|
||||
#if CONFIG_DPX_VULKAN_HWACCEL
|
||||
HWACCEL_VULKAN(dpx),
|
||||
#endif
|
||||
NULL
|
||||
},
|
||||
};
|
||||
|
||||
@@ -28,6 +28,7 @@ extern const struct FFHWAccel ff_av1_vaapi_hwaccel;
|
||||
extern const struct FFHWAccel ff_av1_vdpau_hwaccel;
|
||||
extern const struct FFHWAccel ff_av1_videotoolbox_hwaccel;
|
||||
extern const struct FFHWAccel ff_av1_vulkan_hwaccel;
|
||||
extern const struct FFHWAccel ff_dpx_vulkan_hwaccel;
|
||||
extern const struct FFHWAccel ff_ffv1_vulkan_hwaccel;
|
||||
extern const struct FFHWAccel ff_h263_vaapi_hwaccel;
|
||||
extern const struct FFHWAccel ff_h263_videotoolbox_hwaccel;
|
||||
|
||||
@@ -23,6 +23,10 @@ OBJS-$(CONFIG_PRORES_VULKAN_HWACCEL) += vulkan/common.o \
|
||||
vulkan/prores_vld.o \
|
||||
vulkan/prores_idct.o
|
||||
|
||||
OBJS-$(CONFIG_DPX_VULKAN_HWACCEL) += vulkan/common.o \
|
||||
vulkan/dpx_unpack.o \
|
||||
vulkan/dpx_copy.o
|
||||
|
||||
VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavcodec/vulkan/*.comp))
|
||||
.SECONDARY: $(VULKAN:.comp=.c)
|
||||
libavcodec/vulkan/%.c: TAG = VULKAN
|
||||
|
||||
51
libavcodec/vulkan/dpx_copy.comp
Normal file
51
libavcodec/vulkan/dpx_copy.comp
Normal file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
TYPE read_data(uint off)
|
||||
{
|
||||
#ifdef BIG_ENDIAN
|
||||
return TYPE_REVERSE(data[off]);
|
||||
#else
|
||||
return data[off];
|
||||
#endif
|
||||
}
|
||||
|
||||
void main(void)
|
||||
{
|
||||
ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
|
||||
if (!IS_WITHIN(pos, imageSize(dst[0])))
|
||||
return;
|
||||
|
||||
uint offs = (pos.y*imageSize(dst[0]).x + pos.x)*COMPONENTS;
|
||||
#if NB_IMAGES == 1
|
||||
TYPE_VEC val;
|
||||
for (int i = 0; i < COMPONENTS; i++)
|
||||
val[i] = read_data(offs + i);
|
||||
val >>= SHIFT;
|
||||
imageStore(dst[0], pos, val);
|
||||
#else
|
||||
const ivec4 fmt_lut = ivec4(2, 0, 1, 3);
|
||||
for (int i = 0; i < COMPONENTS; i++) {
|
||||
TYPE val = read_data(offs + i);
|
||||
val >>= SHIFT;
|
||||
imageStore(dst[fmt_lut[i]], pos, TYPE_VEC(val));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
83
libavcodec/vulkan/dpx_unpack.comp
Normal file
83
libavcodec/vulkan/dpx_unpack.comp
Normal file
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
uint32_t read_data(uint off)
|
||||
{
|
||||
#ifdef BIG_ENDIAN
|
||||
return reverse4(data[off]);
|
||||
#else
|
||||
return data[off];
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef PACKED_10BIT
|
||||
i16vec4 parse_packed_in_32(ivec2 pos, int stride)
|
||||
{
|
||||
uint32_t d = read_data(pos.y*stride + pos.x);
|
||||
i16vec4 v;
|
||||
d = d << 10 | d >> 22 & 0x3FFFFF;
|
||||
v[0] = int16_t(d & 0x3FF);
|
||||
d = d << 10 | d >> 22 & 0x3FFFFF;
|
||||
v[1] = int16_t(d & 0x3FF);
|
||||
d = d << 10 | d >> 22 & 0x3FFFFF;
|
||||
v[2] = int16_t(d & 0x3FF);
|
||||
v[3] = int16_t(0);
|
||||
return v;
|
||||
}
|
||||
#else
|
||||
i16vec4 parse_packed_in_32(ivec2 pos, int stride)
|
||||
{
|
||||
uint line_off = pos.y*(stride*BITS_PER_COMP*COMPONENTS +
|
||||
(need_align << 3));
|
||||
uint pix_off = pos.x*BITS_PER_COMP*COMPONENTS;
|
||||
|
||||
uint off = (line_off + pix_off >> 5);
|
||||
uint bit = pix_off & 0x1f;
|
||||
|
||||
uint32_t d0 = read_data(off + 0);
|
||||
uint32_t d1 = read_data(off + 1);
|
||||
|
||||
uint64_t combined = (uint64_t(d1) << 32) | d0;
|
||||
combined >>= bit;
|
||||
|
||||
return i16vec4(combined,
|
||||
combined >> (BITS_PER_COMP*1),
|
||||
combined >> (BITS_PER_COMP*2),
|
||||
combined >> (BITS_PER_COMP*3)) &
|
||||
int16_t((1 << BITS_PER_COMP) - 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
void main(void)
|
||||
{
|
||||
ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
|
||||
if (!IS_WITHIN(pos, imageSize(dst[0])))
|
||||
return;
|
||||
|
||||
i16vec4 p = parse_packed_in_32(pos, imageSize(dst[0]).x);
|
||||
|
||||
#if NB_IMAGES == 1
|
||||
imageStore(dst[0], pos, p);
|
||||
#else
|
||||
const ivec4 fmt_lut = COMPONENTS == 1 ? ivec4(0) : ivec4(2, 0, 1, 3);
|
||||
for (uint i = 0; i < COMPONENTS; i++)
|
||||
imageStore(dst[fmt_lut[i]], pos, i16vec4(p[i]));
|
||||
#endif
|
||||
}
|
||||
@@ -26,6 +26,7 @@
|
||||
|
||||
#define DECODER_IS_SDR(codec_id) \
|
||||
(((codec_id) == AV_CODEC_ID_FFV1) || \
|
||||
((codec_id) == AV_CODEC_ID_DPX) || \
|
||||
((codec_id) == AV_CODEC_ID_PRORES_RAW) || \
|
||||
((codec_id) == AV_CODEC_ID_PRORES))
|
||||
|
||||
@@ -50,6 +51,9 @@ extern const FFVulkanDecodeDescriptor ff_vk_dec_prores_raw_desc;
|
||||
#if CONFIG_PRORES_VULKAN_HWACCEL
|
||||
extern const FFVulkanDecodeDescriptor ff_vk_dec_prores_desc;
|
||||
#endif
|
||||
#if CONFIG_DPX_VULKAN_HWACCEL
|
||||
extern const FFVulkanDecodeDescriptor ff_vk_dec_dpx_desc;
|
||||
#endif
|
||||
|
||||
static const FFVulkanDecodeDescriptor *dec_descs[] = {
|
||||
#if CONFIG_H264_VULKAN_HWACCEL
|
||||
@@ -73,6 +77,9 @@ static const FFVulkanDecodeDescriptor *dec_descs[] = {
|
||||
#if CONFIG_PRORES_VULKAN_HWACCEL
|
||||
&ff_vk_dec_prores_desc,
|
||||
#endif
|
||||
#if CONFIG_DPX_VULKAN_HWACCEL
|
||||
&ff_vk_dec_dpx_desc,
|
||||
#endif
|
||||
};
|
||||
|
||||
typedef struct FFVulkanDecodeProfileData {
|
||||
@@ -1117,10 +1124,19 @@ int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx)
|
||||
/* This should be more efficient for downloading and using */
|
||||
frames_ctx->sw_format = AV_PIX_FMT_RGBA64;
|
||||
break;
|
||||
case AV_PIX_FMT_RGB48LE:
|
||||
case AV_PIX_FMT_RGB48BE: /* DPX outputs RGB48BE, so we need both */
|
||||
/* Almost nothing supports native 3-component RGB */
|
||||
frames_ctx->sw_format = AV_PIX_FMT_GBRP16;
|
||||
break;
|
||||
case AV_PIX_FMT_RGBA64BE: /* DPX again, fix for little-endian systems */
|
||||
frames_ctx->sw_format = AV_PIX_FMT_RGBA64;
|
||||
break;
|
||||
case AV_PIX_FMT_GBRP10:
|
||||
/* This saves memory bandwidth when downloading */
|
||||
frames_ctx->sw_format = AV_PIX_FMT_X2BGR10;
|
||||
break;
|
||||
case AV_PIX_FMT_RGB24:
|
||||
case AV_PIX_FMT_BGR0:
|
||||
/* mpv has issues with bgr0 mapping, so just remap it */
|
||||
frames_ctx->sw_format = AV_PIX_FMT_RGB0;
|
||||
|
||||
475
libavcodec/vulkan_dpx.c
Normal file
475
libavcodec/vulkan_dpx.c
Normal file
@@ -0,0 +1,475 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "vulkan_decode.h"
|
||||
#include "hwaccel_internal.h"
|
||||
|
||||
#include "dpx.h"
|
||||
#include "libavutil/vulkan_spirv.h"
|
||||
#include "libavutil/mem.h"
|
||||
|
||||
extern const char *ff_source_common_comp;
|
||||
extern const char *ff_source_dpx_unpack_comp;
|
||||
extern const char *ff_source_dpx_copy_comp;
|
||||
|
||||
const FFVulkanDecodeDescriptor ff_vk_dec_dpx_desc = {
|
||||
.codec_id = AV_CODEC_ID_DPX,
|
||||
.decode_extension = FF_VK_EXT_PUSH_DESCRIPTOR,
|
||||
.queue_flags = VK_QUEUE_COMPUTE_BIT,
|
||||
};
|
||||
|
||||
typedef struct DPXVulkanDecodePicture {
|
||||
FFVulkanDecodePicture vp;
|
||||
} DPXVulkanDecodePicture;
|
||||
|
||||
typedef struct DPXVulkanDecodeContext {
|
||||
FFVulkanShader shader;
|
||||
AVBufferPool *frame_data_pool;
|
||||
} DPXVulkanDecodeContext;
|
||||
|
||||
typedef struct DecodePushData {
|
||||
int stride;
|
||||
int need_align;
|
||||
int padded_10bit;
|
||||
} DecodePushData;
|
||||
|
||||
static int host_upoad_image(AVCodecContext *avctx,
|
||||
FFVulkanDecodeContext *dec, DPXDecContext *dpx,
|
||||
const uint8_t *src, uint32_t size)
|
||||
{
|
||||
int err;
|
||||
VkImage temp;
|
||||
|
||||
FFVulkanDecodeShared *ctx = dec->shared_ctx;
|
||||
DPXVulkanDecodeContext *dxv = ctx->sd_ctx;
|
||||
VkPhysicalDeviceLimits *limits = &ctx->s.props.properties.limits;
|
||||
FFVulkanFunctions *vk = &ctx->s.vkfn;
|
||||
|
||||
DPXVulkanDecodePicture *pp = dpx->hwaccel_picture_private;
|
||||
FFVulkanDecodePicture *vp = &pp->vp;
|
||||
|
||||
int unpack = (avctx->bits_per_raw_sample == 12 && !dpx->packing) ||
|
||||
avctx->bits_per_raw_sample == 10;
|
||||
if (unpack)
|
||||
return 0;
|
||||
|
||||
VkImageCreateInfo create_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||||
.imageType = VK_IMAGE_TYPE_2D,
|
||||
.format = avctx->bits_per_raw_sample == 8 ? VK_FORMAT_R8_UINT :
|
||||
avctx->bits_per_raw_sample == 32 ? VK_FORMAT_R32_UINT :
|
||||
VK_FORMAT_R16_UINT,
|
||||
.extent.width = dpx->frame->width*dpx->components,
|
||||
.extent.height = dpx->frame->height,
|
||||
.extent.depth = 1,
|
||||
.mipLevels = 1,
|
||||
.arrayLayers = 1,
|
||||
.tiling = VK_IMAGE_TILING_LINEAR,
|
||||
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
.usage = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT,
|
||||
.samples = VK_SAMPLE_COUNT_1_BIT,
|
||||
.pQueueFamilyIndices = &ctx->qf[0].idx,
|
||||
.queueFamilyIndexCount = 1,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
};
|
||||
|
||||
if (create_info.extent.width >= limits->maxImageDimension2D ||
|
||||
create_info.extent.height >= limits->maxImageDimension2D)
|
||||
return 0;
|
||||
|
||||
vk->CreateImage(ctx->s.hwctx->act_dev, &create_info, ctx->s.hwctx->alloc,
|
||||
&temp);
|
||||
|
||||
err = ff_vk_get_pooled_buffer(&ctx->s, &dxv->frame_data_pool,
|
||||
&vp->slices_buf,
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
||||
NULL, size,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
FFVkBuffer *vkb = (FFVkBuffer *)vp->slices_buf->data;
|
||||
VkBindImageMemoryInfo bind_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO,
|
||||
.image = temp,
|
||||
.memory = vkb->mem,
|
||||
};
|
||||
vk->BindImageMemory2(ctx->s.hwctx->act_dev, 1, &bind_info);
|
||||
|
||||
VkHostImageLayoutTransitionInfo layout_change = {
|
||||
.sType = VK_STRUCTURE_TYPE_HOST_IMAGE_LAYOUT_TRANSITION_INFO,
|
||||
.image = temp,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.subresourceRange.layerCount = 1,
|
||||
.subresourceRange.levelCount = 1,
|
||||
};
|
||||
vk->TransitionImageLayoutEXT(ctx->s.hwctx->act_dev, 1, &layout_change);
|
||||
|
||||
VkMemoryToImageCopy copy_region = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_TO_IMAGE_COPY,
|
||||
.pHostPointer = src,
|
||||
.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.imageSubresource.layerCount = 1,
|
||||
.imageExtent = (VkExtent3D){ dpx->frame->width*dpx->components,
|
||||
dpx->frame->height,
|
||||
1 },
|
||||
};
|
||||
VkCopyMemoryToImageInfo copy_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_COPY_MEMORY_TO_IMAGE_INFO,
|
||||
.flags = VK_HOST_IMAGE_COPY_MEMCPY_BIT_EXT,
|
||||
.dstImage = temp,
|
||||
.dstImageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.regionCount = 1,
|
||||
.pRegions = ©_region,
|
||||
};
|
||||
vk->CopyMemoryToImageEXT(ctx->s.hwctx->act_dev, ©_info);
|
||||
|
||||
vk->DestroyImage(ctx->s.hwctx->act_dev, temp, ctx->s.hwctx->alloc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vk_dpx_start_frame(AVCodecContext *avctx,
|
||||
const AVBufferRef *buffer_ref,
|
||||
av_unused const uint8_t *buffer,
|
||||
av_unused uint32_t size)
|
||||
{
|
||||
int err;
|
||||
FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
|
||||
FFVulkanDecodeShared *ctx = dec->shared_ctx;
|
||||
DPXDecContext *dpx = avctx->priv_data;
|
||||
|
||||
DPXVulkanDecodePicture *pp = dpx->hwaccel_picture_private;
|
||||
FFVulkanDecodePicture *vp = &pp->vp;
|
||||
|
||||
if (ctx->s.extensions & FF_VK_EXT_HOST_IMAGE_COPY)
|
||||
host_upoad_image(avctx, dec, dpx, buffer, size);
|
||||
|
||||
/* Host map the frame data if supported */
|
||||
if (!vp->slices_buf &&
|
||||
ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY)
|
||||
ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, (uint8_t *)buffer,
|
||||
buffer_ref,
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
|
||||
|
||||
/* Prepare frame to be used */
|
||||
err = ff_vk_decode_prepare_frame_sdr(dec, dpx->frame, vp, 1,
|
||||
FF_VK_REP_NATIVE, 0);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vk_dpx_decode_slice(AVCodecContext *avctx,
|
||||
const uint8_t *data,
|
||||
uint32_t size)
|
||||
{
|
||||
DPXDecContext *dpx = avctx->priv_data;
|
||||
|
||||
DPXVulkanDecodePicture *pp = dpx->hwaccel_picture_private;
|
||||
FFVulkanDecodePicture *vp = &pp->vp;
|
||||
|
||||
if (!vp->slices_buf) {
|
||||
int err = ff_vk_decode_add_slice(avctx, vp, data, size, 0,
|
||||
NULL, NULL);
|
||||
if (err < 0)
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vk_dpx_end_frame(AVCodecContext *avctx)
|
||||
{
|
||||
int err;
|
||||
FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
|
||||
FFVulkanDecodeShared *ctx = dec->shared_ctx;
|
||||
FFVulkanFunctions *vk = &ctx->s.vkfn;
|
||||
|
||||
DPXDecContext *dpx = avctx->priv_data;
|
||||
DPXVulkanDecodeContext *dxv = ctx->sd_ctx;
|
||||
|
||||
DPXVulkanDecodePicture *pp = dpx->hwaccel_picture_private;
|
||||
FFVulkanDecodePicture *vp = &pp->vp;
|
||||
|
||||
FFVkBuffer *slices_buf = (FFVkBuffer *)vp->slices_buf->data;
|
||||
|
||||
VkImageMemoryBarrier2 img_bar[8];
|
||||
int nb_img_bar = 0;
|
||||
|
||||
FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool);
|
||||
ff_vk_exec_start(&ctx->s, exec);
|
||||
|
||||
/* Prepare deps */
|
||||
RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, dpx->frame,
|
||||
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
|
||||
|
||||
err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value,
|
||||
dpx->frame);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0));
|
||||
vp->slices_buf = NULL;
|
||||
|
||||
AVVkFrame *vkf = (AVVkFrame *)dpx->frame->data[0];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
vkf->layout[i] = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
vkf->access[i] = VK_ACCESS_2_NONE;
|
||||
}
|
||||
|
||||
ff_vk_frame_barrier(&ctx->s, exec, dpx->frame, img_bar, &nb_img_bar,
|
||||
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_QUEUE_FAMILY_IGNORED);
|
||||
|
||||
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
||||
.pImageMemoryBarriers = img_bar,
|
||||
.imageMemoryBarrierCount = nb_img_bar,
|
||||
});
|
||||
nb_img_bar = 0;
|
||||
|
||||
FFVulkanShader *shd = &dxv->shader;
|
||||
ff_vk_shader_update_img_array(&ctx->s, exec, shd,
|
||||
dpx->frame, vp->view.out,
|
||||
0, 0,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_NULL_HANDLE);
|
||||
ff_vk_shader_update_desc_buffer(&ctx->s, exec, shd,
|
||||
0, 1, 0,
|
||||
slices_buf,
|
||||
0, slices_buf->size,
|
||||
VK_FORMAT_UNDEFINED);
|
||||
|
||||
ff_vk_exec_bind_shader(&ctx->s, exec, shd);
|
||||
|
||||
/* Update push data */
|
||||
DecodePushData pd = (DecodePushData) {
|
||||
.stride = dpx->stride,
|
||||
.need_align = dpx->need_align,
|
||||
.padded_10bit = !dpx->unpadded_10bit,
|
||||
};
|
||||
|
||||
ff_vk_shader_update_push_const(&ctx->s, exec, shd,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
0, sizeof(pd), &pd);
|
||||
|
||||
vk->CmdDispatch(exec->buf,
|
||||
FFALIGN(dpx->frame->width, shd->lg_size[0])/shd->lg_size[0],
|
||||
FFALIGN(dpx->frame->height, shd->lg_size[1])/shd->lg_size[1],
|
||||
1);
|
||||
|
||||
err = ff_vk_exec_submit(&ctx->s, exec);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
fail:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int init_shader(AVCodecContext *avctx, FFVulkanContext *s,
|
||||
FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
|
||||
FFVulkanShader *shd, int bits)
|
||||
{
|
||||
int err;
|
||||
DPXDecContext *dpx = avctx->priv_data;
|
||||
FFVulkanDescriptorSetBinding *desc_set;
|
||||
AVHWFramesContext *dec_frames_ctx;
|
||||
dec_frames_ctx = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
|
||||
int planes = av_pix_fmt_count_planes(dec_frames_ctx->sw_format);
|
||||
|
||||
uint8_t *spv_data;
|
||||
size_t spv_len;
|
||||
void *spv_opaque = NULL;
|
||||
|
||||
RET(ff_vk_shader_init(s, shd, "dpx",
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
(const char *[]) { "GL_EXT_buffer_reference",
|
||||
"GL_EXT_buffer_reference2" }, 2,
|
||||
512, 1, 1,
|
||||
0));
|
||||
|
||||
/* Common codec header */
|
||||
GLSLD(ff_source_common_comp);
|
||||
|
||||
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
|
||||
GLSLC(1, int stride; );
|
||||
GLSLC(1, int need_align; );
|
||||
GLSLC(1, int padded_10bit; );
|
||||
GLSLC(0, }; );
|
||||
GLSLC(0, );
|
||||
ff_vk_shader_add_push_const(shd, 0, sizeof(DecodePushData),
|
||||
VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
|
||||
int unpack = (avctx->bits_per_raw_sample == 12 && !dpx->packing) ||
|
||||
avctx->bits_per_raw_sample == 10;
|
||||
|
||||
desc_set = (FFVulkanDescriptorSetBinding []) {
|
||||
{
|
||||
.name = "dst",
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.dimensions = 2,
|
||||
.mem_quali = "writeonly",
|
||||
.mem_layout = ff_vk_shader_rep_fmt(dec_frames_ctx->sw_format,
|
||||
FF_VK_REP_NATIVE),
|
||||
.elems = planes,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
},
|
||||
{
|
||||
.name = "data_buf",
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.mem_quali = "readonly",
|
||||
.buf_content = (unpack || bits == 32) ? "uint32_t data[];" :
|
||||
bits == 8 ? "uint8_t data[];" : "uint16_t data[];",
|
||||
},
|
||||
};
|
||||
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 0, 0));
|
||||
|
||||
if (dpx->endian)
|
||||
GLSLC(0, #define BIG_ENDIAN );
|
||||
GLSLF(0, #define COMPONENTS (%i) ,dpx->components);
|
||||
GLSLF(0, #define BITS_PER_COMP (%i) ,bits);
|
||||
GLSLF(0, #define NB_IMAGES (%i) ,planes);
|
||||
if (unpack) {
|
||||
if (bits == 10)
|
||||
GLSLC(0, #define PACKED_10BIT );
|
||||
GLSLD(ff_source_dpx_unpack_comp);
|
||||
} else {
|
||||
GLSLF(0, #define SHIFT (%i) ,FFALIGN(bits, 8) - bits);
|
||||
GLSLF(0, #define TYPE uint%i_t ,FFALIGN(bits, 8));
|
||||
GLSLF(0, #define TYPE_VEC u%ivec4 ,FFALIGN(bits, 8));
|
||||
GLSLF(0, #define TYPE_REVERSE(x) (reverse%i(x)), FFALIGN(bits, 8)/8);
|
||||
GLSLD(ff_source_dpx_copy_comp);
|
||||
}
|
||||
|
||||
RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
|
||||
&spv_opaque));
|
||||
RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
|
||||
|
||||
RET(ff_vk_shader_register_exec(s, pool, shd));
|
||||
|
||||
fail:
|
||||
if (spv_opaque)
|
||||
spv->free_shader(spv, &spv_opaque);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void vk_decode_dpx_uninit(FFVulkanDecodeShared *ctx)
|
||||
{
|
||||
DPXVulkanDecodeContext *fv = ctx->sd_ctx;
|
||||
|
||||
ff_vk_shader_free(&ctx->s, &fv->shader);
|
||||
|
||||
av_buffer_pool_uninit(&fv->frame_data_pool);
|
||||
|
||||
av_freep(&fv);
|
||||
}
|
||||
|
||||
static int vk_decode_dpx_init(AVCodecContext *avctx)
|
||||
{
|
||||
int err;
|
||||
DPXDecContext *dpx = avctx->priv_data;
|
||||
FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
|
||||
|
||||
switch (dpx->pix_fmt) {
|
||||
case AV_PIX_FMT_GRAY10:
|
||||
case AV_PIX_FMT_GRAY12:
|
||||
case AV_PIX_FMT_GBRAP10:
|
||||
case AV_PIX_FMT_GBRAP12:
|
||||
case AV_PIX_FMT_UYVY422:
|
||||
case AV_PIX_FMT_YUV444P:
|
||||
case AV_PIX_FMT_YUVA444P:
|
||||
return AVERROR(ENOTSUP);
|
||||
case AV_PIX_FMT_GBRP10:
|
||||
if (dpx->unpadded_10bit)
|
||||
return AVERROR(ENOTSUP);
|
||||
/* fallthrough */
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
FFVkSPIRVCompiler *spv = ff_vk_spirv_init();
|
||||
if (!spv) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
|
||||
return AVERROR_EXTERNAL;
|
||||
}
|
||||
|
||||
err = ff_vk_decode_init(avctx);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
FFVulkanDecodeShared *ctx = dec->shared_ctx;
|
||||
DPXVulkanDecodeContext *dxv = ctx->sd_ctx = av_mallocz(sizeof(*dxv));
|
||||
if (!dxv) {
|
||||
err = AVERROR(ENOMEM);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
ctx->sd_ctx_free = &vk_decode_dpx_uninit;
|
||||
|
||||
RET(init_shader(avctx, &ctx->s, &ctx->exec_pool,
|
||||
spv, &dxv->shader, avctx->bits_per_raw_sample));
|
||||
|
||||
fail:
|
||||
spv->uninit(&spv);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void vk_dpx_free_frame_priv(AVRefStructOpaque _hwctx, void *data)
|
||||
{
|
||||
AVHWDeviceContext *dev_ctx = _hwctx.nc;
|
||||
|
||||
DPXVulkanDecodePicture *pp = data;
|
||||
FFVulkanDecodePicture *vp = &pp->vp;
|
||||
|
||||
ff_vk_decode_free_frame(dev_ctx, vp);
|
||||
}
|
||||
|
||||
const FFHWAccel ff_dpx_vulkan_hwaccel = {
|
||||
.p.name = "dpx_vulkan",
|
||||
.p.type = AVMEDIA_TYPE_VIDEO,
|
||||
.p.id = AV_CODEC_ID_DPX,
|
||||
.p.pix_fmt = AV_PIX_FMT_VULKAN,
|
||||
.start_frame = &vk_dpx_start_frame,
|
||||
.decode_slice = &vk_dpx_decode_slice,
|
||||
.end_frame = &vk_dpx_end_frame,
|
||||
.free_frame_priv = &vk_dpx_free_frame_priv,
|
||||
.frame_priv_data_size = sizeof(DPXVulkanDecodePicture),
|
||||
.init = &vk_decode_dpx_init,
|
||||
.update_thread_context = &ff_vk_update_thread_context,
|
||||
.decode_params = &ff_vk_params_invalidate,
|
||||
.flush = &ff_vk_decode_flush,
|
||||
.uninit = &ff_vk_decode_uninit,
|
||||
.frame_params = &ff_vk_frame_params,
|
||||
.priv_data_size = sizeof(FFVulkanDecodeContext),
|
||||
.caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE,
|
||||
};
|
||||
Reference in New Issue
Block a user