diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c index dc565cda89..dfa638dbf7 100644 --- a/libavfilter/vf_scale_cuda.c +++ b/libavfilter/vf_scale_cuda.c @@ -56,6 +56,7 @@ static const enum AVPixelFormat supported_formats[] = { enum { INTERP_ALGO_DEFAULT, + INTERP_ALGO_NEAREST, INTERP_ALGO_BILINEAR, INTERP_ALGO_BICUBIC, @@ -273,6 +274,12 @@ static av_cold int cudascale_config_props(AVFilterLink *outlink) extern char vf_scale_cuda_bicubic_ptx[]; switch(s->interp_algo) { + case INTERP_ALGO_NEAREST: + scaler_ptx = vf_scale_cuda_ptx; + function_infix = "_Nearest"; + s->interp_use_linear = 0; + s->interp_as_integer = 1; + break; case INTERP_ALGO_BILINEAR: scaler_ptx = vf_scale_cuda_ptx; function_infix = "_Bilinear"; @@ -591,6 +598,7 @@ static const AVOption options[] = { { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS }, { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS }, { "interp_algo", "Interpolation algorithm used for resizing", OFFSET(interp_algo), AV_OPT_TYPE_INT, { .i64 = INTERP_ALGO_DEFAULT }, 0, INTERP_ALGO_COUNT - 1, FLAGS, "interp_algo" }, + { "nearest", "nearest neighbour", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_NEAREST }, 0, 0, FLAGS, "interp_algo" }, { "bilinear", "bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BILINEAR }, 0, 0, FLAGS, "interp_algo" }, { "bicubic", "bicubic", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BICUBIC }, 0, 0, FLAGS, "interp_algo" }, { "passthrough", "Do not process frames at all if parameters match", OFFSET(passthrough), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, diff --git a/libavfilter/vf_scale_cuda.cu b/libavfilter/vf_scale_cuda.cu index 24b1151215..44eef535fd 100644 --- a/libavfilter/vf_scale_cuda.cu +++ b/libavfilter/vf_scale_cuda.cu @@ -22,6 +22,27 @@ #include "cuda/vector_helpers.cuh" +template +__device__ inline void Subsample_Nearest(cudaTextureObject_t tex, + T *dst, + int dst_width, int dst_height, int dst_pitch, + int src_width, int src_height, + int bit_depth) +{ + int xo = blockIdx.x * blockDim.x + threadIdx.x; + int yo = blockIdx.y * blockDim.y + threadIdx.y; + + if (yo < dst_height && xo < dst_width) + { + float hscale = (float)src_width / (float)dst_width; + float vscale = (float)src_height / (float)dst_height; + float xi = (xo + 0.5f) * hscale; + float yi = (yo + 0.5f) * vscale; + + dst[yo*dst_pitch+xo] = tex2D(tex, xi, yi); + } +} + template __device__ inline void Subsample_Bilinear(cudaTextureObject_t tex, T *dst, @@ -57,6 +78,27 @@ __device__ inline void Subsample_Bilinear(cudaTextureObject_t tex, extern "C" { +#define NEAREST_KERNEL(T) \ + __global__ void Subsample_Nearest_ ## T(cudaTextureObject_t src_tex, \ + T *dst, \ + int dst_width, int dst_height, int dst_pitch, \ + int src_width, int src_height, \ + int bit_depth) \ + { \ + Subsample_Nearest(src_tex, dst, \ + dst_width, dst_height, dst_pitch, \ + src_width, src_height, \ + bit_depth); \ + } + +NEAREST_KERNEL(uchar) +NEAREST_KERNEL(uchar2) +NEAREST_KERNEL(uchar4) + +NEAREST_KERNEL(ushort) +NEAREST_KERNEL(ushort2) +NEAREST_KERNEL(ushort4) + #define BILINEAR_KERNEL(T) \ __global__ void Subsample_Bilinear_ ## T(cudaTextureObject_t src_tex, \ T *dst, \