diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c index f6401b35b0..5405e6a4ed 100644 --- a/libavfilter/vf_scale_cuda.c +++ b/libavfilter/vf_scale_cuda.c @@ -20,6 +20,7 @@ * DEALINGS IN THE SOFTWARE. */ +#include #include #include @@ -38,6 +39,8 @@ #include "scale_eval.h" #include "video.h" +#include "vf_scale_cuda.h" + static const enum AVPixelFormat supported_formats[] = { AV_PIX_FMT_YUV420P, AV_PIX_FMT_NV12, @@ -106,6 +109,8 @@ typedef struct CUDAScaleContext { int interp_algo; int interp_use_linear; int interp_as_integer; + + float param; } CUDAScaleContext; static av_cold int cudascale_init(AVFilterContext *ctx) @@ -395,7 +400,8 @@ static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, int channel CudaFunctions *cu = s->hwctx->internal->cuda_dl; CUdeviceptr dst_devptr = (CUdeviceptr)dst_dptr; CUtexObject tex = 0; - void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch, &src_width, &src_height, &bit_depth }; + void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch, + &src_width, &src_height, &bit_depth, &s->param }; int ret; CUDA_TEXTURE_DESC tex_desc = { @@ -602,19 +608,20 @@ static AVFrame *cudascale_get_video_buffer(AVFilterLink *inlink, int w, int h) #define OFFSET(x) offsetof(CUDAScaleContext, x) #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM) static const AVOption options[] = { - { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS }, - { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS }, + { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS }, + { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS }, { "interp_algo", "Interpolation algorithm used for resizing", OFFSET(interp_algo), AV_OPT_TYPE_INT, { .i64 = INTERP_ALGO_DEFAULT }, 0, INTERP_ALGO_COUNT - 1, FLAGS, "interp_algo" }, { "nearest", "nearest neighbour", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_NEAREST }, 0, 0, FLAGS, "interp_algo" }, { "bilinear", "bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BILINEAR }, 0, 0, FLAGS, "interp_algo" }, { "bicubic", "bicubic", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BICUBIC }, 0, 0, FLAGS, "interp_algo" }, { "lanczos", "lanczos", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_LANCZOS }, 0, 0, FLAGS, "interp_algo" }, { "passthrough", "Do not process frames at all if parameters match", OFFSET(passthrough), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, + { "param", "Algorithm-Specific parameter", OFFSET(param), AV_OPT_TYPE_FLOAT, { .dbl = SCALE_CUDA_PARAM_DEFAULT }, -FLT_MAX, FLT_MAX, FLAGS }, { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, FLAGS, "force_oar" }, { "disable", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, "force_oar" }, { "decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, "force_oar" }, { "increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, "force_oar" }, - { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1}, 1, 256, FLAGS }, + { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 256, FLAGS }, { NULL }, }; diff --git a/libavfilter/vf_scale_cuda.h b/libavfilter/vf_scale_cuda.h new file mode 100644 index 0000000000..40d5b9cfac --- /dev/null +++ b/libavfilter/vf_scale_cuda.h @@ -0,0 +1,28 @@ +/* + * This file is part of FFmpeg. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef AVFILTER_SCALE_CUDA_H +#define AVFILTER_SCALE_CUDA_H + +#define SCALE_CUDA_PARAM_DEFAULT 999999.0f + +#endif diff --git a/libavfilter/vf_scale_cuda_bicubic.cu b/libavfilter/vf_scale_cuda_bicubic.cu index fe451ec54b..554667383a 100644 --- a/libavfilter/vf_scale_cuda_bicubic.cu +++ b/libavfilter/vf_scale_cuda_bicubic.cu @@ -21,10 +21,11 @@ */ #include "cuda/vector_helpers.cuh" +#include "vf_scale_cuda.h" -typedef float4 (*coeffs_function_t)(float); +typedef float4 (*coeffs_function_t)(float, float); -__device__ inline float4 lanczos_coeffs(float x) +__device__ inline float4 lanczos_coeffs(float x, float param) { const float pi = 3.141592654f; @@ -46,9 +47,9 @@ __device__ inline float4 lanczos_coeffs(float x) return res / (res.x + res.y + res.z + res.w); } -__device__ inline float4 bicubic_coeffs(float x) +__device__ inline float4 bicubic_coeffs(float x, float param) { - const float A = -0.75f; + const float A = param == SCALE_CUDA_PARAM_DEFAULT ? 0.0f : -param; float4 res; res.x = ((A * (x + 1) - 5 * A) * (x + 1) + 8 * A) * (x + 1) - 4 * A; @@ -86,7 +87,7 @@ __device__ inline void Subsample_Bicubic(coeffs_function_t coeffs_function, T *dst, int dst_width, int dst_height, int dst_pitch, int src_width, int src_height, - int bit_depth) + int bit_depth, float param) { int xo = blockIdx.x * blockDim.x + threadIdx.x; int yo = blockIdx.y * blockDim.y + threadIdx.y; @@ -104,8 +105,8 @@ __device__ inline void Subsample_Bicubic(coeffs_function_t coeffs_function, float factor = bit_depth > 8 ? 0xFFFF : 0xFF; - float4 coeffsX = coeffs_function(fx); - float4 coeffsY = coeffs_function(fy); + float4 coeffsX = coeffs_function(fx, param); + float4 coeffsY = coeffs_function(fy, param); #define PIX(x, y) tex2D(src_tex, (x), (y)) @@ -129,7 +130,7 @@ __device__ inline void Subsample_FastBicubic(coeffs_function_t coeffs_function, T *dst, int dst_width, int dst_height, int dst_pitch, int src_width, int src_height, - int bit_depth) + int bit_depth, float param) { int xo = blockIdx.x * blockDim.x + threadIdx.x; int yo = blockIdx.y * blockDim.y + threadIdx.y; @@ -147,8 +148,8 @@ __device__ inline void Subsample_FastBicubic(coeffs_function_t coeffs_function, float factor = bit_depth > 8 ? 0xFFFF : 0xFF; - float4 coeffsX = coeffs_function(fx); - float4 coeffsY = coeffs_function(fy); + float4 coeffsX = coeffs_function(fx, param); + float4 coeffsY = coeffs_function(fy, param); float h0x, h1x, sx; float h0y, h1y, sy; @@ -182,12 +183,12 @@ extern "C" { T *dst, \ int dst_width, int dst_height, int dst_pitch, \ int src_width, int src_height, \ - int bit_depth) \ + int bit_depth, float param) \ { \ Subsample_Bicubic(&bicubic_coeffs, src_tex, dst, \ dst_width, dst_height, dst_pitch, \ src_width, src_height, \ - bit_depth); \ + bit_depth, param); \ } BICUBIC_KERNEL(uchar) @@ -204,12 +205,12 @@ BICUBIC_KERNEL(ushort4) T *dst, \ int dst_width, int dst_height, int dst_pitch, \ int src_width, int src_height, \ - int bit_depth) \ + int bit_depth, float param) \ { \ Subsample_Bicubic(&lanczos_coeffs, src_tex, dst, \ dst_width, dst_height, dst_pitch, \ src_width, src_height, \ - bit_depth); \ + bit_depth, param); \ } LANCZOS_KERNEL(uchar)