1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-12-28 20:53:54 +02:00
FFmpeg/libavfilter/vulkan_filter.c
Lynne 4b128de44a
vulkan: enable selecting a compatible representation of format
When using **integer** images inside shaders, it turns out
that conversion doesn't automatically happen, but we need to
explicitly use the imageviews to get the image exposed as
a suitable representation for the shader.

Finally enables bitexact image representations.
2024-10-16 12:48:15 +02:00

475 lines
19 KiB
C

/*
* Copyright (c) Lynne
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "filters.h"
#include "vulkan_filter.h"
#include "libavutil/vulkan_loader.h"
int ff_vk_filter_init_context(AVFilterContext *avctx, FFVulkanContext *s,
AVBufferRef *frames_ref,
int width, int height, enum AVPixelFormat sw_format)
{
int err;
AVHWFramesContext *frames_ctx;
AVHWDeviceContext *device_ctx;
AVVulkanFramesContext *vk_frames;
AVVulkanDeviceContext *vk_dev;
AVBufferRef *device_ref = avctx->hw_device_ctx;
/* Check if context is reusable as-is */
if (frames_ref) {
int no_storage = 0;
FFVulkanFunctions *vk;
VkImageUsageFlagBits usage_req;
const VkFormat *sub = av_vkfmt_from_pixfmt(sw_format);
frames_ctx = (AVHWFramesContext *)frames_ref->data;
device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data;
vk_frames = frames_ctx->hwctx;
vk_dev = device_ctx->hwctx;
/* Width and height mismatch */
if (width != frames_ctx->width ||
height != frames_ctx->height)
goto skip;
/* Format mismatch */
if (sw_format != frames_ctx->sw_format)
goto skip;
/* Don't let linear through. */
if (vk_frames->tiling == VK_IMAGE_TILING_LINEAR)
goto skip;
s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions,
vk_dev->nb_enabled_dev_extensions);
/* More advanced format checks */
err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1);
if (err < 0)
return err;
vk = &s->vkfn;
/* Usage mismatch */
usage_req = VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_STORAGE_BIT;
/* If format supports hardware encoding, make sure
* the context includes it. */
if (vk_frames->format[1] == VK_FORMAT_UNDEFINED &&
(s->extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE |
FF_VK_EXT_VIDEO_MAINTENANCE_1))) {
VkFormatProperties3 fprops = {
.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3,
};
VkFormatProperties2 prop = {
.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
.pNext = &fprops,
};
vk->GetPhysicalDeviceFormatProperties2(vk_dev->phys_dev,
vk_frames->format[0],
&prop);
if (fprops.optimalTilingFeatures & VK_FORMAT_FEATURE_2_VIDEO_ENCODE_INPUT_BIT_KHR)
usage_req |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR;
}
if ((vk_frames->usage & usage_req) != usage_req)
goto skip;
/* Check if the subformats can do storage */
for (int i = 0; sub[i] != VK_FORMAT_UNDEFINED; i++) {
VkFormatProperties2 prop = {
.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
};
vk->GetPhysicalDeviceFormatProperties2(vk_dev->phys_dev, sub[i],
&prop);
no_storage |= !(prop.formatProperties.optimalTilingFeatures &
VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT);
}
/* Check if it's usable */
if (no_storage) {
skip:
av_log(avctx, AV_LOG_VERBOSE, "Cannot reuse context, creating a new one\n");
device_ref = frames_ctx->device_ref;
frames_ref = NULL;
} else {
av_log(avctx, AV_LOG_VERBOSE, "Reusing existing frames context\n");
frames_ref = av_buffer_ref(frames_ref);
if (!frames_ref)
return AVERROR(ENOMEM);
}
}
if (!frames_ref) {
if (!device_ref) {
av_log(avctx, AV_LOG_ERROR,
"Vulkan filtering requires a device context!\n");
return AVERROR(EINVAL);
}
frames_ref = av_hwframe_ctx_alloc(device_ref);
frames_ctx = (AVHWFramesContext *)frames_ref->data;
frames_ctx->format = AV_PIX_FMT_VULKAN;
frames_ctx->sw_format = sw_format;
frames_ctx->width = width;
frames_ctx->height = height;
vk_frames = frames_ctx->hwctx;
vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL;
vk_frames->usage = VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_STORAGE_BIT |
VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
err = av_hwframe_ctx_init(frames_ref);
if (err < 0) {
av_buffer_unref(&frames_ref);
return err;
}
device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data;
vk_dev = device_ctx->hwctx;
}
s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions,
vk_dev->nb_enabled_dev_extensions);
s->extensions |= ff_vk_extensions_to_mask(vk_dev->enabled_inst_extensions,
vk_dev->nb_enabled_inst_extensions);
err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1);
if (err < 0) {
av_buffer_unref(&frames_ref);
return err;
}
s->frames_ref = frames_ref;
s->frames = frames_ctx;
s->hwfc = vk_frames;
s->device = device_ctx;
s->hwctx = device_ctx->hwctx;
err = ff_vk_load_props(s);
if (err < 0)
av_buffer_unref(&s->frames_ref);
return err;
}
int ff_vk_filter_config_input(AVFilterLink *inlink)
{
FilterLink *l = ff_filter_link(inlink);
AVHWFramesContext *input_frames;
AVFilterContext *avctx = inlink->dst;
FFVulkanContext *s = inlink->dst->priv;
if (!l->hw_frames_ctx) {
av_log(inlink->dst, AV_LOG_ERROR, "Vulkan filtering requires a "
"hardware frames context on the input.\n");
return AVERROR(EINVAL);
}
input_frames = (AVHWFramesContext *)l->hw_frames_ctx->data;
if (input_frames->format != AV_PIX_FMT_VULKAN)
return AVERROR(EINVAL);
/* Extract the device and default output format from the first input. */
if (avctx->inputs[0] != inlink)
return 0;
/* Save the ref, without reffing it */
s->input_frames_ref = l->hw_frames_ctx;
/* Defaults */
s->input_format = input_frames->sw_format;
s->output_format = input_frames->sw_format;
s->output_width = inlink->w;
s->output_height = inlink->h;
return 0;
}
int ff_vk_filter_config_output(AVFilterLink *outlink)
{
int err;
FilterLink *l = ff_filter_link(outlink);
FFVulkanContext *s = outlink->src->priv;
av_buffer_unref(&l->hw_frames_ctx);
err = ff_vk_filter_init_context(outlink->src, s, s->input_frames_ref,
s->output_width, s->output_height,
s->output_format);
if (err < 0)
return err;
l->hw_frames_ctx = av_buffer_ref(s->frames_ref);
if (!l->hw_frames_ctx)
return AVERROR(ENOMEM);
outlink->w = s->output_width;
outlink->h = s->output_height;
return err;
}
int ff_vk_filter_init(AVFilterContext *avctx)
{
FFVulkanContext *s = avctx->priv;
s->output_format = AV_PIX_FMT_NONE;
return 0;
}
int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e,
FFVulkanShader *shd, AVFrame *out_f, AVFrame *in_f,
VkSampler sampler, void *push_src, size_t push_size)
{
int err = 0;
FFVulkanFunctions *vk = &vkctx->vkfn;
VkImageView in_views[AV_NUM_DATA_POINTERS];
VkImageView out_views[AV_NUM_DATA_POINTERS];
VkImageMemoryBarrier2 img_bar[37];
int nb_img_bar = 0;
/* Update descriptors and init the exec context */
FFVkExecContext *exec = ff_vk_exec_get(vkctx, e);
ff_vk_exec_start(vkctx, exec);
RET(ff_vk_exec_add_dep_frame(vkctx, exec, out_f,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_create_imageviews(vkctx, exec, out_views, out_f, FF_VK_REP_FLOAT));
ff_vk_shader_update_img_array(vkctx, exec, shd, out_f, out_views, 0, !!in_f,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
if (in_f) {
RET(ff_vk_exec_add_dep_frame(vkctx, exec, in_f,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_create_imageviews(vkctx, exec, in_views, in_f, FF_VK_REP_FLOAT));
ff_vk_shader_update_img_array(vkctx, exec, shd, in_f, in_views, 0, 0,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
sampler);
}
/* Bind pipeline, update push data */
ff_vk_exec_bind_shader(vkctx, exec, shd);
if (push_src)
ff_vk_shader_update_push_const(vkctx, exec, shd, VK_SHADER_STAGE_COMPUTE_BIT,
0, push_size, push_src);
/* Add data sync barriers */
ff_vk_frame_barrier(vkctx, exec, out_f, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
if (in_f)
ff_vk_frame_barrier(vkctx, exec, in_f, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_QUEUE_FAMILY_IGNORED);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = img_bar,
.imageMemoryBarrierCount = nb_img_bar,
});
vk->CmdDispatch(exec->buf,
FFALIGN(vkctx->output_width, shd->lg_size[0])/shd->lg_size[0],
FFALIGN(vkctx->output_height, shd->lg_size[1])/shd->lg_size[1],
shd->lg_size[2]);
return ff_vk_exec_submit(vkctx, exec);
fail:
ff_vk_exec_discard_deps(vkctx, exec);
return err;
}
int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e,
FFVulkanShader *shd_list[2],
AVFrame *out, AVFrame *tmp, AVFrame *in,
VkSampler sampler, void *push_src, size_t push_size)
{
int err = 0;
FFVulkanFunctions *vk = &vkctx->vkfn;
VkImageView in_views[AV_NUM_DATA_POINTERS];
VkImageView tmp_views[AV_NUM_DATA_POINTERS];
VkImageView out_views[AV_NUM_DATA_POINTERS];
VkImageMemoryBarrier2 img_bar[37];
int nb_img_bar = 0;
/* Update descriptors and init the exec context */
FFVkExecContext *exec = ff_vk_exec_get(vkctx, e);
ff_vk_exec_start(vkctx, exec);
RET(ff_vk_exec_add_dep_frame(vkctx, exec, in,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_exec_add_dep_frame(vkctx, exec, tmp,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_create_imageviews(vkctx, exec, in_views, in, FF_VK_REP_FLOAT));
RET(ff_vk_create_imageviews(vkctx, exec, tmp_views, tmp, FF_VK_REP_FLOAT));
RET(ff_vk_create_imageviews(vkctx, exec, out_views, out, FF_VK_REP_FLOAT));
ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_QUEUE_FAMILY_IGNORED);
ff_vk_frame_barrier(vkctx, exec, tmp, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = img_bar,
.imageMemoryBarrierCount = nb_img_bar,
});
for (int i = 0; i < 2; i++) {
FFVulkanShader *shd = shd_list[i];
AVFrame *src_f = !i ? in : tmp;
AVFrame *dst_f = !i ? tmp : out;
VkImageView *src_views = !i ? in_views : tmp_views;
VkImageView *dst_views = !i ? tmp_views : out_views;
ff_vk_shader_update_img_array(vkctx, exec, shd, src_f, src_views, 0, 0,
!i ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL :
VK_IMAGE_LAYOUT_GENERAL,
sampler);
ff_vk_shader_update_img_array(vkctx, exec, shd, dst_f, dst_views, 0, 1,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
/* Bind pipeline, update push data */
ff_vk_exec_bind_shader(vkctx, exec, shd);
if (push_src)
ff_vk_shader_update_push_const(vkctx, exec, shd, VK_SHADER_STAGE_COMPUTE_BIT,
0, push_size, push_src);
vk->CmdDispatch(exec->buf,
FFALIGN(vkctx->output_width, shd->lg_size[0])/shd->lg_size[0],
FFALIGN(vkctx->output_height, shd->lg_size[1])/shd->lg_size[1],
shd->lg_size[2]);
}
return ff_vk_exec_submit(vkctx, exec);
fail:
ff_vk_exec_discard_deps(vkctx, exec);
return err;
}
int ff_vk_filter_process_Nin(FFVulkanContext *vkctx, FFVkExecPool *e,
FFVulkanShader *shd,
AVFrame *out, AVFrame *in[], int nb_in,
VkSampler sampler, void *push_src, size_t push_size)
{
int err = 0;
FFVulkanFunctions *vk = &vkctx->vkfn;
VkImageView in_views[16][AV_NUM_DATA_POINTERS];
VkImageView out_views[AV_NUM_DATA_POINTERS];
VkImageMemoryBarrier2 img_bar[128];
int nb_img_bar = 0;
/* Update descriptors and init the exec context */
FFVkExecContext *exec = ff_vk_exec_get(vkctx, e);
ff_vk_exec_start(vkctx, exec);
/* Add deps and create temporary imageviews */
RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_create_imageviews(vkctx, exec, out_views, out, FF_VK_REP_FLOAT));
for (int i = 0; i < nb_in; i++) {
RET(ff_vk_exec_add_dep_frame(vkctx, exec, in[i],
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_create_imageviews(vkctx, exec, in_views[i], in[i], FF_VK_REP_FLOAT));
}
/* Update descriptor sets */
ff_vk_shader_update_img_array(vkctx, exec, shd, out, out_views, 0, nb_in,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
for (int i = 0; i < nb_in; i++)
ff_vk_shader_update_img_array(vkctx, exec, shd, in[i], in_views[i], 0, i,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
sampler);
/* Bind pipeline, update push data */
ff_vk_exec_bind_shader(vkctx, exec, shd);
if (push_src)
ff_vk_shader_update_push_const(vkctx, exec, shd, VK_SHADER_STAGE_COMPUTE_BIT,
0, push_size, push_src);
/* Add data sync barriers */
ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
for (int i = 0; i < nb_in; i++)
ff_vk_frame_barrier(vkctx, exec, in[i], img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_QUEUE_FAMILY_IGNORED);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = img_bar,
.imageMemoryBarrierCount = nb_img_bar,
});
vk->CmdDispatch(exec->buf,
FFALIGN(vkctx->output_width, shd->lg_size[0])/shd->lg_size[0],
FFALIGN(vkctx->output_height, shd->lg_size[1])/shd->lg_size[1],
shd->lg_size[2]);
return ff_vk_exec_submit(vkctx, exec);
fail:
ff_vk_exec_discard_deps(vkctx, exec);
return err;
}