mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
avfilter/scale_cuda: expose optional algorithm parameter
This commit is contained in:
parent
cfdddec0c8
commit
9a0b702078
@ -20,6 +20,7 @@
|
|||||||
* DEALINGS IN THE SOFTWARE.
|
* DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <float.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
@ -38,6 +39,8 @@
|
|||||||
#include "scale_eval.h"
|
#include "scale_eval.h"
|
||||||
#include "video.h"
|
#include "video.h"
|
||||||
|
|
||||||
|
#include "vf_scale_cuda.h"
|
||||||
|
|
||||||
static const enum AVPixelFormat supported_formats[] = {
|
static const enum AVPixelFormat supported_formats[] = {
|
||||||
AV_PIX_FMT_YUV420P,
|
AV_PIX_FMT_YUV420P,
|
||||||
AV_PIX_FMT_NV12,
|
AV_PIX_FMT_NV12,
|
||||||
@ -106,6 +109,8 @@ typedef struct CUDAScaleContext {
|
|||||||
int interp_algo;
|
int interp_algo;
|
||||||
int interp_use_linear;
|
int interp_use_linear;
|
||||||
int interp_as_integer;
|
int interp_as_integer;
|
||||||
|
|
||||||
|
float param;
|
||||||
} CUDAScaleContext;
|
} CUDAScaleContext;
|
||||||
|
|
||||||
static av_cold int cudascale_init(AVFilterContext *ctx)
|
static av_cold int cudascale_init(AVFilterContext *ctx)
|
||||||
@ -395,7 +400,8 @@ static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, int channel
|
|||||||
CudaFunctions *cu = s->hwctx->internal->cuda_dl;
|
CudaFunctions *cu = s->hwctx->internal->cuda_dl;
|
||||||
CUdeviceptr dst_devptr = (CUdeviceptr)dst_dptr;
|
CUdeviceptr dst_devptr = (CUdeviceptr)dst_dptr;
|
||||||
CUtexObject tex = 0;
|
CUtexObject tex = 0;
|
||||||
void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch, &src_width, &src_height, &bit_depth };
|
void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch,
|
||||||
|
&src_width, &src_height, &bit_depth, &s->param };
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
CUDA_TEXTURE_DESC tex_desc = {
|
CUDA_TEXTURE_DESC tex_desc = {
|
||||||
@ -602,19 +608,20 @@ static AVFrame *cudascale_get_video_buffer(AVFilterLink *inlink, int w, int h)
|
|||||||
#define OFFSET(x) offsetof(CUDAScaleContext, x)
|
#define OFFSET(x) offsetof(CUDAScaleContext, x)
|
||||||
#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
|
#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
|
||||||
static const AVOption options[] = {
|
static const AVOption options[] = {
|
||||||
{ "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS },
|
{ "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS },
|
||||||
{ "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS },
|
{ "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS },
|
||||||
{ "interp_algo", "Interpolation algorithm used for resizing", OFFSET(interp_algo), AV_OPT_TYPE_INT, { .i64 = INTERP_ALGO_DEFAULT }, 0, INTERP_ALGO_COUNT - 1, FLAGS, "interp_algo" },
|
{ "interp_algo", "Interpolation algorithm used for resizing", OFFSET(interp_algo), AV_OPT_TYPE_INT, { .i64 = INTERP_ALGO_DEFAULT }, 0, INTERP_ALGO_COUNT - 1, FLAGS, "interp_algo" },
|
||||||
{ "nearest", "nearest neighbour", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_NEAREST }, 0, 0, FLAGS, "interp_algo" },
|
{ "nearest", "nearest neighbour", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_NEAREST }, 0, 0, FLAGS, "interp_algo" },
|
||||||
{ "bilinear", "bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BILINEAR }, 0, 0, FLAGS, "interp_algo" },
|
{ "bilinear", "bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BILINEAR }, 0, 0, FLAGS, "interp_algo" },
|
||||||
{ "bicubic", "bicubic", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BICUBIC }, 0, 0, FLAGS, "interp_algo" },
|
{ "bicubic", "bicubic", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BICUBIC }, 0, 0, FLAGS, "interp_algo" },
|
||||||
{ "lanczos", "lanczos", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_LANCZOS }, 0, 0, FLAGS, "interp_algo" },
|
{ "lanczos", "lanczos", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_LANCZOS }, 0, 0, FLAGS, "interp_algo" },
|
||||||
{ "passthrough", "Do not process frames at all if parameters match", OFFSET(passthrough), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS },
|
{ "passthrough", "Do not process frames at all if parameters match", OFFSET(passthrough), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS },
|
||||||
|
{ "param", "Algorithm-Specific parameter", OFFSET(param), AV_OPT_TYPE_FLOAT, { .dbl = SCALE_CUDA_PARAM_DEFAULT }, -FLT_MAX, FLT_MAX, FLAGS },
|
||||||
{ "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, FLAGS, "force_oar" },
|
{ "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, FLAGS, "force_oar" },
|
||||||
{ "disable", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, "force_oar" },
|
{ "disable", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, "force_oar" },
|
||||||
{ "decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, "force_oar" },
|
{ "decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, "force_oar" },
|
||||||
{ "increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, "force_oar" },
|
{ "increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, "force_oar" },
|
||||||
{ "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1}, 1, 256, FLAGS },
|
{ "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 256, FLAGS },
|
||||||
{ NULL },
|
{ NULL },
|
||||||
};
|
};
|
||||||
|
|
||||||
|
28
libavfilter/vf_scale_cuda.h
Normal file
28
libavfilter/vf_scale_cuda.h
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
/*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef AVFILTER_SCALE_CUDA_H
|
||||||
|
#define AVFILTER_SCALE_CUDA_H
|
||||||
|
|
||||||
|
#define SCALE_CUDA_PARAM_DEFAULT 999999.0f
|
||||||
|
|
||||||
|
#endif
|
@ -21,10 +21,11 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "cuda/vector_helpers.cuh"
|
#include "cuda/vector_helpers.cuh"
|
||||||
|
#include "vf_scale_cuda.h"
|
||||||
|
|
||||||
typedef float4 (*coeffs_function_t)(float);
|
typedef float4 (*coeffs_function_t)(float, float);
|
||||||
|
|
||||||
__device__ inline float4 lanczos_coeffs(float x)
|
__device__ inline float4 lanczos_coeffs(float x, float param)
|
||||||
{
|
{
|
||||||
const float pi = 3.141592654f;
|
const float pi = 3.141592654f;
|
||||||
|
|
||||||
@ -46,9 +47,9 @@ __device__ inline float4 lanczos_coeffs(float x)
|
|||||||
return res / (res.x + res.y + res.z + res.w);
|
return res / (res.x + res.y + res.z + res.w);
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ inline float4 bicubic_coeffs(float x)
|
__device__ inline float4 bicubic_coeffs(float x, float param)
|
||||||
{
|
{
|
||||||
const float A = -0.75f;
|
const float A = param == SCALE_CUDA_PARAM_DEFAULT ? 0.0f : -param;
|
||||||
|
|
||||||
float4 res;
|
float4 res;
|
||||||
res.x = ((A * (x + 1) - 5 * A) * (x + 1) + 8 * A) * (x + 1) - 4 * A;
|
res.x = ((A * (x + 1) - 5 * A) * (x + 1) + 8 * A) * (x + 1) - 4 * A;
|
||||||
@ -86,7 +87,7 @@ __device__ inline void Subsample_Bicubic(coeffs_function_t coeffs_function,
|
|||||||
T *dst,
|
T *dst,
|
||||||
int dst_width, int dst_height, int dst_pitch,
|
int dst_width, int dst_height, int dst_pitch,
|
||||||
int src_width, int src_height,
|
int src_width, int src_height,
|
||||||
int bit_depth)
|
int bit_depth, float param)
|
||||||
{
|
{
|
||||||
int xo = blockIdx.x * blockDim.x + threadIdx.x;
|
int xo = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int yo = blockIdx.y * blockDim.y + threadIdx.y;
|
int yo = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -104,8 +105,8 @@ __device__ inline void Subsample_Bicubic(coeffs_function_t coeffs_function,
|
|||||||
|
|
||||||
float factor = bit_depth > 8 ? 0xFFFF : 0xFF;
|
float factor = bit_depth > 8 ? 0xFFFF : 0xFF;
|
||||||
|
|
||||||
float4 coeffsX = coeffs_function(fx);
|
float4 coeffsX = coeffs_function(fx, param);
|
||||||
float4 coeffsY = coeffs_function(fy);
|
float4 coeffsY = coeffs_function(fy, param);
|
||||||
|
|
||||||
#define PIX(x, y) tex2D<floatT>(src_tex, (x), (y))
|
#define PIX(x, y) tex2D<floatT>(src_tex, (x), (y))
|
||||||
|
|
||||||
@ -129,7 +130,7 @@ __device__ inline void Subsample_FastBicubic(coeffs_function_t coeffs_function,
|
|||||||
T *dst,
|
T *dst,
|
||||||
int dst_width, int dst_height, int dst_pitch,
|
int dst_width, int dst_height, int dst_pitch,
|
||||||
int src_width, int src_height,
|
int src_width, int src_height,
|
||||||
int bit_depth)
|
int bit_depth, float param)
|
||||||
{
|
{
|
||||||
int xo = blockIdx.x * blockDim.x + threadIdx.x;
|
int xo = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int yo = blockIdx.y * blockDim.y + threadIdx.y;
|
int yo = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -147,8 +148,8 @@ __device__ inline void Subsample_FastBicubic(coeffs_function_t coeffs_function,
|
|||||||
|
|
||||||
float factor = bit_depth > 8 ? 0xFFFF : 0xFF;
|
float factor = bit_depth > 8 ? 0xFFFF : 0xFF;
|
||||||
|
|
||||||
float4 coeffsX = coeffs_function(fx);
|
float4 coeffsX = coeffs_function(fx, param);
|
||||||
float4 coeffsY = coeffs_function(fy);
|
float4 coeffsY = coeffs_function(fy, param);
|
||||||
|
|
||||||
float h0x, h1x, sx;
|
float h0x, h1x, sx;
|
||||||
float h0y, h1y, sy;
|
float h0y, h1y, sy;
|
||||||
@ -182,12 +183,12 @@ extern "C" {
|
|||||||
T *dst, \
|
T *dst, \
|
||||||
int dst_width, int dst_height, int dst_pitch, \
|
int dst_width, int dst_height, int dst_pitch, \
|
||||||
int src_width, int src_height, \
|
int src_width, int src_height, \
|
||||||
int bit_depth) \
|
int bit_depth, float param) \
|
||||||
{ \
|
{ \
|
||||||
Subsample_Bicubic<T>(&bicubic_coeffs, src_tex, dst, \
|
Subsample_Bicubic<T>(&bicubic_coeffs, src_tex, dst, \
|
||||||
dst_width, dst_height, dst_pitch, \
|
dst_width, dst_height, dst_pitch, \
|
||||||
src_width, src_height, \
|
src_width, src_height, \
|
||||||
bit_depth); \
|
bit_depth, param); \
|
||||||
}
|
}
|
||||||
|
|
||||||
BICUBIC_KERNEL(uchar)
|
BICUBIC_KERNEL(uchar)
|
||||||
@ -204,12 +205,12 @@ BICUBIC_KERNEL(ushort4)
|
|||||||
T *dst, \
|
T *dst, \
|
||||||
int dst_width, int dst_height, int dst_pitch, \
|
int dst_width, int dst_height, int dst_pitch, \
|
||||||
int src_width, int src_height, \
|
int src_width, int src_height, \
|
||||||
int bit_depth) \
|
int bit_depth, float param) \
|
||||||
{ \
|
{ \
|
||||||
Subsample_Bicubic<T>(&lanczos_coeffs, src_tex, dst, \
|
Subsample_Bicubic<T>(&lanczos_coeffs, src_tex, dst, \
|
||||||
dst_width, dst_height, dst_pitch, \
|
dst_width, dst_height, dst_pitch, \
|
||||||
src_width, src_height, \
|
src_width, src_height, \
|
||||||
bit_depth); \
|
bit_depth, param); \
|
||||||
}
|
}
|
||||||
|
|
||||||
LANCZOS_KERNEL(uchar)
|
LANCZOS_KERNEL(uchar)
|
||||||
|
Loading…
Reference in New Issue
Block a user