1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-08-04 22:03:09 +02:00

avfilter: add scale_d3d11 filter

This commit introduces a new hardware-accelerated video filter, scale_d3d11,
which performs scaling and format conversion using Direct3D 11. The filter enables
efficient GPU-based scaling and pixel format conversion (p010 to nv12), reducing
CPU overhead and latency in video pipelines.
This commit is contained in:
Dash Santosh
2025-07-25 08:58:38 +05:30
committed by dwbuiten
parent 2ddc3cbd98
commit 96821211c2
6 changed files with 472 additions and 0 deletions

View File

@ -25,6 +25,7 @@ version <next>:
- APV in MP4/ISOBMFF muxing and demuxing - APV in MP4/ISOBMFF muxing and demuxing
- OpenHarmony hardware decoder/encoder - OpenHarmony hardware decoder/encoder
- Colordetect filter - Colordetect filter
- Add vf_scale_d3d11 filter
version 7.1: version 7.1:

1
configure vendored
View File

@ -3370,6 +3370,7 @@ pad_cuda_filter_deps_any="cuda_nvcc cuda_llvm"
sharpen_npp_filter_deps="ffnvcodec libnpp" sharpen_npp_filter_deps="ffnvcodec libnpp"
ddagrab_filter_deps="d3d11va IDXGIOutput1 DXGI_OUTDUPL_FRAME_INFO" ddagrab_filter_deps="d3d11va IDXGIOutput1 DXGI_OUTDUPL_FRAME_INFO"
scale_d3d11_filter_deps="d3d11va"
amf_deps_any="libdl LoadLibrary" amf_deps_any="libdl LoadLibrary"
nvenc_deps="ffnvcodec" nvenc_deps="ffnvcodec"

View File

@ -464,6 +464,7 @@ OBJS-$(CONFIG_ROBERTS_OPENCL_FILTER) += vf_convolution_opencl.o opencl.o
OBJS-$(CONFIG_ROTATE_FILTER) += vf_rotate.o OBJS-$(CONFIG_ROTATE_FILTER) += vf_rotate.o
OBJS-$(CONFIG_SAB_FILTER) += vf_sab.o OBJS-$(CONFIG_SAB_FILTER) += vf_sab.o
OBJS-$(CONFIG_SCALE_FILTER) += vf_scale.o scale_eval.o framesync.o OBJS-$(CONFIG_SCALE_FILTER) += vf_scale.o scale_eval.o framesync.o
OBJS-$(CONFIG_SCALE_D3D11_FILTER) += vf_scale_d3d11.o scale_eval.o
OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o scale_eval.o \ OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o scale_eval.o \
vf_scale_cuda.ptx.o cuda/load_helper.o vf_scale_cuda.ptx.o cuda/load_helper.o
OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale_eval.o OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale_eval.o

View File

@ -438,6 +438,7 @@ extern const FFFilter ff_vf_scale;
extern const FFFilter ff_vf_vpp_amf; extern const FFFilter ff_vf_vpp_amf;
extern const FFFilter ff_vf_sr_amf; extern const FFFilter ff_vf_sr_amf;
extern const FFFilter ff_vf_scale_cuda; extern const FFFilter ff_vf_scale_cuda;
extern const FFFilter ff_vf_scale_d3d11;
extern const FFFilter ff_vf_scale_npp; extern const FFFilter ff_vf_scale_npp;
extern const FFFilter ff_vf_scale_qsv; extern const FFFilter ff_vf_scale_qsv;
extern const FFFilter ff_vf_scale_vaapi; extern const FFFilter ff_vf_scale_vaapi;

View File

@ -0,0 +1,464 @@
/*
* Copyright (C) 2025 MulticorewWare, Inc.
*
* Authors: Dash Santosh <dash.sathanatayanan@multicorewareinc.com>
* Sachin <sachin.prakash@multicorewareinc.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
#include "compat/w32dlfcn.h"
#include "libavutil/hwcontext.h"
#include "libavutil/hwcontext_d3d11va.h"
#include "filters.h"
#include "scale_eval.h"
#include "video.h"
typedef struct ScaleD3D11Context {
const AVClass *classCtx;
char *w_expr;
char *h_expr;
enum AVPixelFormat format;
///< D3D11 objects
ID3D11Device *device;
ID3D11DeviceContext *context;
ID3D11VideoDevice *videoDevice;
ID3D11VideoProcessor *processor;
ID3D11VideoProcessorEnumerator *enumerator;
ID3D11VideoProcessorOutputView *outputView;
ID3D11VideoProcessorInputView *inputView;
///< Buffer references
AVBufferRef *hw_device_ctx;
AVBufferRef *hw_frames_ctx_out;
///< Dimensions and formats
int width, height;
int inputWidth, inputHeight;
DXGI_FORMAT input_format;
DXGI_FORMAT output_format;
} ScaleD3D11Context;
static av_cold int scale_d3d11_init(AVFilterContext *ctx) {
///< all real work is done in config_props and filter_frame
return 0;
}
static void release_d3d11_resources(ScaleD3D11Context *s) {
if (s->outputView) {
s->outputView->lpVtbl->Release(s->outputView);
s->outputView = NULL;
}
if (s->processor) {
s->processor->lpVtbl->Release(s->processor);
s->processor = NULL;
}
if (s->enumerator) {
s->enumerator->lpVtbl->Release(s->enumerator);
s->enumerator = NULL;
}
if (s->videoDevice) {
s->videoDevice->lpVtbl->Release(s->videoDevice);
s->videoDevice = NULL;
}
}
static int scale_d3d11_configure_processor(ScaleD3D11Context *s, AVFilterContext *ctx) {
HRESULT hr;
switch (s->format) {
case AV_PIX_FMT_NV12:
s->output_format = DXGI_FORMAT_NV12;
break;
case AV_PIX_FMT_P010:
s->output_format = DXGI_FORMAT_P010;
break;
default:
av_log(ctx, AV_LOG_ERROR, "Invalid output format specified\n");
return AVERROR(EINVAL);
}
///< Get D3D11 device and context from hardware device context
AVHWDeviceContext *hwctx = (AVHWDeviceContext *)s->hw_device_ctx->data;
AVD3D11VADeviceContext *d3d11_hwctx = (AVD3D11VADeviceContext *)hwctx->hwctx;
s->device = d3d11_hwctx->device;
s->context = d3d11_hwctx->device_context;
av_log(ctx, AV_LOG_VERBOSE, "Configuring D3D11 video processor: %dx%d -> %dx%d\n",
s->inputWidth, s->inputHeight, s->width, s->height);
///< Define the video processor content description
D3D11_VIDEO_PROCESSOR_CONTENT_DESC contentDesc = {
.InputFrameFormat = D3D11_VIDEO_FRAME_FORMAT_PROGRESSIVE,
.InputWidth = s->inputWidth,
.InputHeight = s->inputHeight,
.OutputWidth = s->width,
.OutputHeight = s->height,
.Usage = D3D11_VIDEO_USAGE_PLAYBACK_NORMAL,
};
///< Query video device interface
hr = s->device->lpVtbl->QueryInterface(s->device, &IID_ID3D11VideoDevice, (void **)&s->videoDevice);
if (FAILED(hr)) {
av_log(ctx, AV_LOG_ERROR, "Failed to get D3D11 video device interface: HRESULT 0x%lX\n", hr);
return AVERROR_EXTERNAL;
}
///< Create video processor enumerator
hr = s->videoDevice->lpVtbl->CreateVideoProcessorEnumerator(s->videoDevice, &contentDesc, &s->enumerator);
if (FAILED(hr)) {
av_log(ctx, AV_LOG_ERROR, "Failed to create video processor enumerator: HRESULT 0x%lX\n", hr);
return AVERROR_EXTERNAL;
}
///< Create the video processor
hr = s->videoDevice->lpVtbl->CreateVideoProcessor(s->videoDevice, s->enumerator, 0, &s->processor);
if (FAILED(hr)) {
av_log(ctx, AV_LOG_ERROR, "Failed to create video processor: HRESULT 0x%lX\n", hr);
return AVERROR_EXTERNAL;
}
av_log(ctx, AV_LOG_VERBOSE, "D3D11 video processor successfully configured\n");
return 0;
}
static int scale_d3d11_filter_frame(AVFilterLink *inlink, AVFrame *in)
{
AVFilterContext *ctx = inlink->dst;
ScaleD3D11Context *s = ctx->priv;
AVFilterLink *outlink = ctx->outputs[0];
ID3D11VideoProcessorInputView *inputView = NULL;
ID3D11VideoContext *videoContext = NULL;
AVFrame *out = NULL;
int ret = 0;
HRESULT hr;
///< Validate input frame
if (!in) {
av_log(ctx, AV_LOG_ERROR, "Null input frame\n");
return AVERROR(EINVAL);
}
if (!in->hw_frames_ctx) {
av_log(ctx, AV_LOG_ERROR, "No hardware frames context in input frame\n");
av_frame_free(&in);
return AVERROR(EINVAL);
}
///< Verify hardware device contexts
AVHWFramesContext *frames_ctx = (AVHWFramesContext *)in->hw_frames_ctx->data;
if (!s->hw_device_ctx) {
av_log(ctx, AV_LOG_ERROR, "Filter hardware device context is uninitialized\n");
av_frame_free(&in);
return AVERROR(EINVAL);
}
AVHWDeviceContext *input_device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data;
AVHWDeviceContext *filter_device_ctx = (AVHWDeviceContext *)s->hw_device_ctx->data;
if (input_device_ctx->type != filter_device_ctx->type) {
av_log(ctx, AV_LOG_ERROR, "Mismatch between input and filter hardware device types\n");
av_frame_free(&in);
return AVERROR(EINVAL);
}
///< Allocate output frame
out = av_frame_alloc();
if (!out) {
av_log(ctx, AV_LOG_ERROR, "Failed to allocate output frame\n");
av_frame_free(&in);
return AVERROR(ENOMEM);
}
ret = av_hwframe_get_buffer(s->hw_frames_ctx_out, out, 0);
if (ret < 0) {
av_log(ctx, AV_LOG_ERROR, "Failed to get output frame from pool\n");
av_frame_free(&in);
av_frame_free(&out);
return ret;
}
///< Configure the D3D11 video processor if not already configured
if (!s->processor) {
///< Get info from input texture
D3D11_TEXTURE2D_DESC textureDesc;
ID3D11Texture2D *input_texture = (ID3D11Texture2D *)in->data[0];
input_texture->lpVtbl->GetDesc(input_texture, &textureDesc);
s->inputWidth = textureDesc.Width;
s->inputHeight = textureDesc.Height;
s->input_format = textureDesc.Format;
ret = scale_d3d11_configure_processor(s, ctx);
if (ret < 0) {
av_log(ctx, AV_LOG_ERROR, "Failed to configure processor\n");
av_frame_free(&in);
av_frame_free(&out);
return ret;
}
}
///< Get input texture and prepare input view
ID3D11Texture2D *d3d11_texture = (ID3D11Texture2D *)in->data[0];
int subIdx = (int)(intptr_t)in->data[1];
D3D11_VIDEO_PROCESSOR_INPUT_VIEW_DESC inputViewDesc = {
.FourCC = s->input_format,
.ViewDimension = D3D11_VPIV_DIMENSION_TEXTURE2D,
.Texture2D.ArraySlice = subIdx
};
hr = s->videoDevice->lpVtbl->CreateVideoProcessorInputView(
s->videoDevice, (ID3D11Resource *)d3d11_texture, s->enumerator, &inputViewDesc, &inputView);
if (FAILED(hr)) {
av_log(ctx, AV_LOG_ERROR, "Failed to create input view: HRESULT 0x%lX\n", hr);
av_frame_free(&in);
av_frame_free(&out);
return AVERROR_EXTERNAL;
}
///< Create output view for current texture
ID3D11Texture2D *output_texture = (ID3D11Texture2D *)out->data[0];
D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC outputViewDesc = {
.ViewDimension = D3D11_VPOV_DIMENSION_TEXTURE2D,
.Texture2D = { .MipSlice = 0 },
};
hr = s->videoDevice->lpVtbl->CreateVideoProcessorOutputView(
s->videoDevice, (ID3D11Resource *)output_texture, s->enumerator, &outputViewDesc, &s->outputView);
if (FAILED(hr)) {
av_log(ctx, AV_LOG_ERROR, "Failed to create output view: HRESULT 0x%lX\n", hr);
av_frame_free(&in);
av_frame_free(&out);
return AVERROR_EXTERNAL;
}
///< Set up processing stream
D3D11_VIDEO_PROCESSOR_STREAM stream = {
.Enable = TRUE,
.pInputSurface = inputView,
.OutputIndex = 0
};
///< Get video context
hr = s->context->lpVtbl->QueryInterface(s->context, &IID_ID3D11VideoContext, (void **)&videoContext);
if (FAILED(hr)) {
av_log(ctx, AV_LOG_ERROR, "Failed to get video context: HRESULT 0x%lX\n", hr);
inputView->lpVtbl->Release(inputView);
av_frame_free(&in);
av_frame_free(&out);
return AVERROR_EXTERNAL;
}
///< Process the frame
hr = videoContext->lpVtbl->VideoProcessorBlt(videoContext, s->processor, s->outputView, 0, 1, &stream);
if (FAILED(hr)) {
av_log(ctx, AV_LOG_ERROR, "VideoProcessorBlt failed: HRESULT 0x%lX\n", hr);
videoContext->lpVtbl->Release(videoContext);
inputView->lpVtbl->Release(inputView);
av_frame_free(&in);
av_frame_free(&out);
return AVERROR_EXTERNAL;
}
///< Set up output frame
ret = av_frame_copy_props(out, in);
if (ret < 0) {
av_log(ctx, AV_LOG_ERROR, "Failed to copy frame properties\n");
videoContext->lpVtbl->Release(videoContext);
inputView->lpVtbl->Release(inputView);
av_frame_free(&in);
av_frame_free(&out);
return ret;
}
out->data[0] = (uint8_t *)output_texture;
out->data[1] = (uint8_t *)(intptr_t)0;
out->width = s->width;
out->height = s->height;
out->format = AV_PIX_FMT_D3D11;
///< Clean up resources
inputView->lpVtbl->Release(inputView);
videoContext->lpVtbl->Release(videoContext);
if (s->outputView) {
s->outputView->lpVtbl->Release(s->outputView);
s->outputView = NULL;
}
av_frame_free(&in);
///< Forward the frame
return ff_filter_frame(outlink, out);
}
static int scale_d3d11_config_props(AVFilterLink *outlink)
{
AVFilterContext *ctx = outlink->src;
ScaleD3D11Context *s = ctx->priv;
AVFilterLink *inlink = ctx->inputs[0];
FilterLink *inl = ff_filter_link(inlink);
FilterLink *outl = ff_filter_link(outlink);
int ret;
///< Clean up any previous resources
release_d3d11_resources(s);
///< Evaluate output dimensions
ret = ff_scale_eval_dimensions(s, s->w_expr, s->h_expr, inlink, outlink, &s->width, &s->height);
if (ret < 0) {
av_log(ctx, AV_LOG_ERROR, "Failed to evaluate dimensions\n");
return ret;
}
outlink->w = s->width;
outlink->h = s->height;
///< Validate input hw_frames_ctx
if (!inl->hw_frames_ctx) {
av_log(ctx, AV_LOG_ERROR, "No hw_frames_ctx available on input link\n");
return AVERROR(EINVAL);
}
///< Propagate hw_frames_ctx to output
outl->hw_frames_ctx = av_buffer_ref(inl->hw_frames_ctx);
if (!outl->hw_frames_ctx) {
av_log(ctx, AV_LOG_ERROR, "Failed to propagate hw_frames_ctx to output\n");
return AVERROR(ENOMEM);
}
///< Initialize filter's hardware device context
if (!s->hw_device_ctx) {
AVHWFramesContext *in_frames_ctx = (AVHWFramesContext *)inl->hw_frames_ctx->data;
s->hw_device_ctx = av_buffer_ref(in_frames_ctx->device_ref);
if (!s->hw_device_ctx) {
av_log(ctx, AV_LOG_ERROR, "Failed to initialize filter hardware device context\n");
return AVERROR(ENOMEM);
}
}
///< Get D3D11 device and context (but don't initialize processor yet - done in filter_frame)
AVHWDeviceContext *hwctx = (AVHWDeviceContext *)s->hw_device_ctx->data;
AVD3D11VADeviceContext *d3d11_hwctx = (AVD3D11VADeviceContext *)hwctx->hwctx;
s->device = d3d11_hwctx->device;
s->context = d3d11_hwctx->device_context;
if (!s->device || !s->context) {
av_log(ctx, AV_LOG_ERROR, "Failed to get valid D3D11 device or context\n");
return AVERROR(EINVAL);
}
///< Create new hardware frames context for output
AVHWFramesContext *in_frames_ctx = (AVHWFramesContext *)inl->hw_frames_ctx->data;
s->hw_frames_ctx_out = av_hwframe_ctx_alloc(s->hw_device_ctx);
if (!s->hw_frames_ctx_out)
return AVERROR(ENOMEM);
AVHWFramesContext *frames_ctx = (AVHWFramesContext *)s->hw_frames_ctx_out->data;
frames_ctx->format = AV_PIX_FMT_D3D11;
frames_ctx->sw_format = s->format;
frames_ctx->width = s->width;
frames_ctx->height = s->height;
frames_ctx->initial_pool_size = 10;
if (ctx->extra_hw_frames > 0)
frames_ctx->initial_pool_size += ctx->extra_hw_frames;
AVD3D11VAFramesContext *frames_hwctx = frames_ctx->hwctx;
frames_hwctx->MiscFlags = 0;
frames_hwctx->BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_VIDEO_ENCODER;
ret = av_hwframe_ctx_init(s->hw_frames_ctx_out);
if (ret < 0) {
av_buffer_unref(&s->hw_frames_ctx_out);
return ret;
}
outl->hw_frames_ctx = av_buffer_ref(s->hw_frames_ctx_out);
if (!outl->hw_frames_ctx)
return AVERROR(ENOMEM);
av_log(ctx, AV_LOG_VERBOSE, "D3D11 scale config: %dx%d -> %dx%d\n",
inlink->w, inlink->h, outlink->w, outlink->h);
return 0;
}
static av_cold void scale_d3d11_uninit(AVFilterContext *ctx) {
ScaleD3D11Context *s = ctx->priv;
///< Release D3D11 resources
release_d3d11_resources(s);
///< Free the hardware device context reference
av_buffer_unref(&s->hw_frames_ctx_out);
av_buffer_unref(&s->hw_device_ctx);
///< Free option strings
av_freep(&s->w_expr);
av_freep(&s->h_expr);
}
static const AVFilterPad scale_d3d11_inputs[] = {
{
.name = "default",
.type = AVMEDIA_TYPE_VIDEO,
.filter_frame = scale_d3d11_filter_frame,
},
};
static const AVFilterPad scale_d3d11_outputs[] = {
{
.name = "default",
.type = AVMEDIA_TYPE_VIDEO,
.config_props = scale_d3d11_config_props,
},
};
#define OFFSET(x) offsetof(ScaleD3D11Context, x)
#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
static const AVOption scale_d3d11_options[] = {
{ "width", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = FLAGS },
{ "height", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = FLAGS },
{ "format", "Output video pixel format", OFFSET(format), AV_OPT_TYPE_PIXEL_FMT, { .i64 = AV_PIX_FMT_NONE }, INT_MIN, INT_MAX, .flags=FLAGS },
{ NULL }
};
AVFILTER_DEFINE_CLASS(scale_d3d11);
const FFFilter ff_vf_scale_d3d11 = {
.p.name = "scale_d3d11",
.p.description = NULL_IF_CONFIG_SMALL("Scale video using Direct3D11"),
.priv_size = sizeof(ScaleD3D11Context),
.p.priv_class = &scale_d3d11_class,
.init = scale_d3d11_init,
.uninit = scale_d3d11_uninit,
FILTER_INPUTS(scale_d3d11_inputs),
FILTER_OUTPUTS(scale_d3d11_outputs),
FILTER_SINGLE_PIXFMT(AV_PIX_FMT_D3D11),
.p.flags = AVFILTER_FLAG_HWDEVICE,
.flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
};

View File

@ -42,6 +42,8 @@
#include "thread.h" #include "thread.h"
#include "compat/w32dlfcn.h" #include "compat/w32dlfcn.h"
#define MAX_ARRAY_SIZE 64 // Driver specification limits ArraySize to 64 for decoder-bound resources
typedef HRESULT(WINAPI *PFN_CREATE_DXGI_FACTORY)(REFIID riid, void **ppFactory); typedef HRESULT(WINAPI *PFN_CREATE_DXGI_FACTORY)(REFIID riid, void **ppFactory);
static AVOnce functions_loaded = AV_ONCE_INIT; static AVOnce functions_loaded = AV_ONCE_INIT;
@ -288,6 +290,8 @@ static int d3d11va_frames_init(AVHWFramesContext *ctx)
return AVERROR(EINVAL); return AVERROR(EINVAL);
} }
ctx->initial_pool_size = FFMIN(ctx->initial_pool_size, MAX_ARRAY_SIZE);
texDesc = (D3D11_TEXTURE2D_DESC){ texDesc = (D3D11_TEXTURE2D_DESC){
.Width = ctx->width, .Width = ctx->width,
.Height = ctx->height, .Height = ctx->height,