2016-01-23 18:15:53 +02:00
/*
* Copyright ( C ) 2010 - 2011 Kevin Stone
* Copyright ( C ) 2016 Paul B Mahol
*
* This file is part of FFmpeg .
*
* FFmpeg is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 2 of the License , or
* ( at your option ) any later version .
*
* FFmpeg is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg ; if not , write to the Free Software Foundation , Inc . ,
* 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA .
*/
# include <float.h>
# include "libavutil/common.h"
# include "libavutil/float_dsp.h"
# include "libavutil/imgutils.h"
2021-01-17 18:39:28 +02:00
# include "libavutil/mem_internal.h"
2016-01-23 18:15:53 +02:00
# include "libavutil/opt.h"
# include "libavutil/pixdesc.h"
# include "avfilter.h"
# include "formats.h"
# include "internal.h"
# include "video.h"
2021-01-17 18:39:28 +02:00
static const size_t NNEDI_WEIGHTS_SIZE = 13574928 ;
static const uint8_t NNEDI_XDIM [ ] = { 8 , 16 , 32 , 48 , 8 , 16 , 32 } ;
static const uint8_t NNEDI_YDIM [ ] = { 6 , 6 , 6 , 6 , 4 , 4 , 4 } ;
static const uint16_t NNEDI_NNS [ ] = { 16 , 32 , 64 , 128 , 256 } ;
2021-01-19 18:06:05 +02:00
typedef struct PrescreenerCoefficients {
DECLARE_ALIGNED ( 32 , float , kernel_l0 ) [ 4 ] [ 16 * 4 ] ;
2021-01-19 15:49:45 +02:00
DECLARE_ALIGNED ( 32 , float , bias_l0 ) [ 4 ] ;
2021-01-17 18:39:28 +02:00
DECLARE_ALIGNED ( 32 , float , kernel_l1 ) [ 4 ] [ 4 ] ;
2021-01-19 15:49:45 +02:00
DECLARE_ALIGNED ( 32 , float , bias_l1 ) [ 4 ] ;
2021-01-17 18:39:28 +02:00
DECLARE_ALIGNED ( 32 , float , kernel_l2 ) [ 4 ] [ 8 ] ;
2021-01-19 15:49:45 +02:00
DECLARE_ALIGNED ( 32 , float , bias_l2 ) [ 4 ] ;
2021-01-19 18:06:05 +02:00
} PrescreenerCoefficients ;
2021-01-17 18:39:28 +02:00
typedef struct PredictorCoefficients {
2021-01-19 15:49:45 +02:00
int xdim , ydim , nns , nsize ;
2021-01-17 18:39:28 +02:00
float * data ;
float * softmax_q1 ;
float * elliott_q1 ;
float * softmax_bias_q1 ;
float * elliott_bias_q1 ;
float * softmax_q2 ;
float * elliott_q2 ;
float * softmax_bias_q2 ;
float * elliott_bias_q2 ;
} PredictorCoefficients ;
2016-01-23 18:15:53 +02:00
typedef struct NNEDIContext {
const AVClass * class ;
char * weights_file ;
2021-01-20 14:33:41 +02:00
AVFrame * prev ;
2016-01-23 18:15:53 +02:00
int eof ;
2021-01-20 14:33:41 +02:00
int64_t pts ;
2016-01-23 18:15:53 +02:00
AVFloatDSPContext * fdsp ;
2021-01-17 18:39:28 +02:00
int depth ;
2016-01-23 18:15:53 +02:00
int nb_planes ;
2021-01-17 18:39:28 +02:00
int nb_threads ;
2016-01-23 18:15:53 +02:00
int linesize [ 4 ] ;
2021-01-17 18:39:28 +02:00
int planewidth [ 4 ] ;
2016-01-23 18:15:53 +02:00
int planeheight [ 4 ] ;
2021-01-17 18:39:28 +02:00
int field_n ;
2021-01-19 18:06:05 +02:00
PrescreenerCoefficients prescreener [ 4 ] ;
2021-01-17 18:39:28 +02:00
PredictorCoefficients coeffs [ 2 ] [ 5 ] [ 7 ] ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
float half ;
float in_scale ;
float out_scale ;
2016-01-23 18:15:53 +02:00
// Parameters
int deint ;
int field ;
int process_plane ;
int nsize ;
int nnsparam ;
int qual ;
int etype ;
int pscrn ;
2021-01-17 18:39:28 +02:00
int input_size ;
2021-01-19 18:06:05 +02:00
uint8_t * * prescreen_buf ;
float * * input_buf ;
float * * output_buf ;
2021-01-17 18:39:28 +02:00
void ( * read ) ( const uint8_t * src , float * dst ,
int src_stride , int dst_stride ,
int width , int height , float scale ) ;
void ( * write ) ( const float * src , uint8_t * dst ,
int src_stride , int dst_stride ,
int width , int height , int depth , float scale ) ;
void ( * prescreen [ 2 ] ) ( AVFilterContext * ctx ,
const void * src , ptrdiff_t src_stride ,
2021-01-19 18:06:05 +02:00
uint8_t * prescreen , int N ,
const PrescreenerCoefficients * const coeffs ) ;
2016-01-23 18:15:53 +02:00
} NNEDIContext ;
# define OFFSET(x) offsetof(NNEDIContext, x)
2021-01-17 18:39:28 +02:00
# define RFLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
2016-01-23 18:15:53 +02:00
# define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
static const AVOption nnedi_options [ ] = {
{ " weights " , " set weights file " , OFFSET ( weights_file ) , AV_OPT_TYPE_STRING , { . str = " nnedi3_weights.bin " } , 0 , 0 , FLAGS } ,
2021-01-17 18:39:28 +02:00
{ " deint " , " set which frames to deinterlace " , OFFSET ( deint ) , AV_OPT_TYPE_INT , { . i64 = 0 } , 0 , 1 , RFLAGS , " deint " } ,
{ " all " , " deinterlace all frames " , 0 , AV_OPT_TYPE_CONST , { . i64 = 0 } , 0 , 0 , RFLAGS , " deint " } ,
{ " interlaced " , " only deinterlace frames marked as interlaced " , 0 , AV_OPT_TYPE_CONST , { . i64 = 1 } , 0 , 0 , RFLAGS , " deint " } ,
{ " field " , " set mode of operation " , OFFSET ( field ) , AV_OPT_TYPE_INT , { . i64 = - 1 } , - 2 , 3 , RFLAGS , " field " } ,
{ " af " , " use frame flags, both fields " , 0 , AV_OPT_TYPE_CONST , { . i64 = - 2 } , 0 , 0 , RFLAGS , " field " } ,
{ " a " , " use frame flags, single field " , 0 , AV_OPT_TYPE_CONST , { . i64 = - 1 } , 0 , 0 , RFLAGS , " field " } ,
{ " t " , " use top field only " , 0 , AV_OPT_TYPE_CONST , { . i64 = 0 } , 0 , 0 , RFLAGS , " field " } ,
{ " b " , " use bottom field only " , 0 , AV_OPT_TYPE_CONST , { . i64 = 1 } , 0 , 0 , RFLAGS , " field " } ,
{ " tf " , " use both fields, top first " , 0 , AV_OPT_TYPE_CONST , { . i64 = 2 } , 0 , 0 , RFLAGS , " field " } ,
{ " bf " , " use both fields, bottom first " , 0 , AV_OPT_TYPE_CONST , { . i64 = 3 } , 0 , 0 , RFLAGS , " field " } ,
{ " planes " , " set which planes to process " , OFFSET ( process_plane ) , AV_OPT_TYPE_INT , { . i64 = 7 } , 0 , 15 , RFLAGS } ,
{ " nsize " , " set size of local neighborhood around each pixel, used by the predictor neural network " , OFFSET ( nsize ) , AV_OPT_TYPE_INT , { . i64 = 6 } , 0 , 6 , RFLAGS , " nsize " } ,
{ " s8x6 " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 0 } , 0 , 0 , RFLAGS , " nsize " } ,
{ " s16x6 " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 1 } , 0 , 0 , RFLAGS , " nsize " } ,
{ " s32x6 " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 2 } , 0 , 0 , RFLAGS , " nsize " } ,
{ " s48x6 " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 3 } , 0 , 0 , RFLAGS , " nsize " } ,
{ " s8x4 " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 4 } , 0 , 0 , RFLAGS , " nsize " } ,
{ " s16x4 " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 5 } , 0 , 0 , RFLAGS , " nsize " } ,
{ " s32x4 " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 6 } , 0 , 0 , RFLAGS , " nsize " } ,
{ " nns " , " set number of neurons in predictor neural network " , OFFSET ( nnsparam ) , AV_OPT_TYPE_INT , { . i64 = 1 } , 0 , 4 , RFLAGS , " nns " } ,
{ " n16 " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 0 } , 0 , 0 , RFLAGS , " nns " } ,
{ " n32 " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 1 } , 0 , 0 , RFLAGS , " nns " } ,
{ " n64 " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 2 } , 0 , 0 , RFLAGS , " nns " } ,
{ " n128 " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 3 } , 0 , 0 , RFLAGS , " nns " } ,
{ " n256 " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 4 } , 0 , 0 , RFLAGS , " nns " } ,
{ " qual " , " set quality " , OFFSET ( qual ) , AV_OPT_TYPE_INT , { . i64 = 1 } , 1 , 2 , RFLAGS , " qual " } ,
{ " fast " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 1 } , 0 , 0 , RFLAGS , " qual " } ,
{ " slow " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 2 } , 0 , 0 , RFLAGS , " qual " } ,
{ " etype " , " set which set of weights to use in the predictor " , OFFSET ( etype ) , AV_OPT_TYPE_INT , { . i64 = 0 } , 0 , 1 , RFLAGS , " etype " } ,
{ " a " , " weights trained to minimize absolute error " , 0 , AV_OPT_TYPE_CONST , { . i64 = 0 } , 0 , 0 , RFLAGS , " etype " } ,
{ " abs " , " weights trained to minimize absolute error " , 0 , AV_OPT_TYPE_CONST , { . i64 = 0 } , 0 , 0 , RFLAGS , " etype " } ,
{ " s " , " weights trained to minimize squared error " , 0 , AV_OPT_TYPE_CONST , { . i64 = 1 } , 0 , 0 , RFLAGS , " etype " } ,
{ " mse " , " weights trained to minimize squared error " , 0 , AV_OPT_TYPE_CONST , { . i64 = 1 } , 0 , 0 , RFLAGS , " etype " } ,
{ " pscrn " , " set prescreening " , OFFSET ( pscrn ) , AV_OPT_TYPE_INT , { . i64 = 2 } , 0 , 4 , RFLAGS , " pscrn " } ,
{ " none " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 0 } , 0 , 0 , RFLAGS , " pscrn " } ,
{ " original " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 1 } , 0 , 0 , RFLAGS , " pscrn " } ,
{ " new " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 2 } , 0 , 0 , RFLAGS , " pscrn " } ,
{ " new2 " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 3 } , 0 , 0 , RFLAGS , " pscrn " } ,
{ " new3 " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 4 } , 0 , 0 , RFLAGS , " pscrn " } ,
2016-01-23 18:15:53 +02:00
{ NULL }
} ;
AVFILTER_DEFINE_CLASS ( nnedi ) ;
static int config_output ( AVFilterLink * outlink )
{
AVFilterContext * ctx = outlink - > src ;
2022-04-30 10:01:23 +02:00
const NNEDIContext * const s = ctx - > priv ;
2016-01-23 18:15:53 +02:00
2021-07-16 09:09:13 +02:00
outlink - > time_base = av_mul_q ( ctx - > inputs [ 0 ] - > time_base , ( AVRational ) { 1 , 2 } ) ;
2016-01-23 18:15:53 +02:00
outlink - > w = ctx - > inputs [ 0 ] - > w ;
outlink - > h = ctx - > inputs [ 0 ] - > h ;
2022-04-30 10:01:23 +02:00
if ( s - > field = = - 2 | | s - > field > 1 )
outlink - > frame_rate = av_mul_q ( ctx - > inputs [ 0 ] - > frame_rate ,
( AVRational ) { 2 , 1 } ) ;
2016-01-23 18:15:53 +02:00
return 0 ;
}
2021-09-29 15:57:43 +02:00
static const enum AVPixelFormat pix_fmts [ ] = {
AV_PIX_FMT_GRAY8 ,
AV_PIX_FMT_GRAY9 , AV_PIX_FMT_GRAY10 , AV_PIX_FMT_GRAY12 , AV_PIX_FMT_GRAY14 , AV_PIX_FMT_GRAY16 ,
AV_PIX_FMT_YUV410P , AV_PIX_FMT_YUV411P ,
AV_PIX_FMT_YUV420P , AV_PIX_FMT_YUV422P ,
AV_PIX_FMT_YUV440P , AV_PIX_FMT_YUV444P ,
AV_PIX_FMT_YUVJ444P , AV_PIX_FMT_YUVJ440P ,
AV_PIX_FMT_YUVJ422P , AV_PIX_FMT_YUVJ420P ,
AV_PIX_FMT_YUVJ411P ,
AV_PIX_FMT_YUVA420P , AV_PIX_FMT_YUVA422P , AV_PIX_FMT_YUVA444P ,
AV_PIX_FMT_GBRP , AV_PIX_FMT_GBRAP ,
AV_PIX_FMT_YUV420P9 , AV_PIX_FMT_YUV422P9 , AV_PIX_FMT_YUV444P9 ,
AV_PIX_FMT_YUV420P10 , AV_PIX_FMT_YUV422P10 , AV_PIX_FMT_YUV444P10 ,
AV_PIX_FMT_YUV440P10 ,
AV_PIX_FMT_YUV420P12 , AV_PIX_FMT_YUV422P12 , AV_PIX_FMT_YUV444P12 ,
AV_PIX_FMT_YUV440P12 ,
AV_PIX_FMT_YUV420P14 , AV_PIX_FMT_YUV422P14 , AV_PIX_FMT_YUV444P14 ,
AV_PIX_FMT_YUV420P16 , AV_PIX_FMT_YUV422P16 , AV_PIX_FMT_YUV444P16 ,
AV_PIX_FMT_GBRP9 , AV_PIX_FMT_GBRP10 , AV_PIX_FMT_GBRP12 , AV_PIX_FMT_GBRP14 , AV_PIX_FMT_GBRP16 ,
AV_PIX_FMT_YUVA444P9 , AV_PIX_FMT_YUVA444P10 , AV_PIX_FMT_YUVA444P12 , AV_PIX_FMT_YUVA444P16 ,
AV_PIX_FMT_YUVA422P9 , AV_PIX_FMT_YUVA422P10 , AV_PIX_FMT_YUVA422P12 , AV_PIX_FMT_YUVA422P16 ,
AV_PIX_FMT_YUVA420P9 , AV_PIX_FMT_YUVA420P10 , AV_PIX_FMT_YUVA420P16 ,
AV_PIX_FMT_GBRAP10 , AV_PIX_FMT_GBRAP12 , AV_PIX_FMT_GBRAP16 ,
AV_PIX_FMT_NONE
} ;
2016-01-23 18:15:53 +02:00
2021-01-19 18:06:05 +02:00
static float dot_dsp ( const NNEDIContext * const s , const float * kernel , const float * input ,
2021-01-19 15:49:45 +02:00
int n , float scale , float bias )
2016-01-23 18:15:53 +02:00
{
2021-01-20 14:05:52 +02:00
float sum , y ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
sum = s - > fdsp - > scalarproduct_float ( kernel , input , n ) ;
2016-01-23 18:15:53 +02:00
2021-01-20 14:05:52 +02:00
y = sum * scale + bias + 1e-20 f ;
return y ;
2021-01-17 18:39:28 +02:00
}
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
static float elliott ( float x )
{
return x / ( 1.0f + fabsf ( x ) ) ;
}
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
static void transform_elliott ( float * input , int size )
{
for ( int i = 0 ; i < size ; i + + )
input [ i ] = elliott ( input [ i ] ) ;
}
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
static void process_old ( AVFilterContext * ctx ,
const void * src , ptrdiff_t src_stride ,
uint8_t * prescreen , int N ,
2021-01-19 18:06:05 +02:00
const PrescreenerCoefficients * const m_data )
2021-01-17 18:39:28 +02:00
{
NNEDIContext * s = ctx - > priv ;
const float * src_p = src ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
// Adjust source pointer to point to top-left of filter window.
const float * window = src_p - 2 * src_stride - 5 ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
for ( int j = 0 ; j < N ; j + + ) {
LOCAL_ALIGNED_32 ( float , input , [ 48 ] ) ;
float state [ 12 ] ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
for ( int i = 0 ; i < 4 ; i + + )
memcpy ( input + i * 12 , window + i * src_stride + j , 12 * sizeof ( float ) ) ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
// Layer 0.
for ( int n = 0 ; n < 4 ; n + + )
state [ n ] = dot_dsp ( s , m_data - > kernel_l0 [ n ] , input , 48 , 1.0f , m_data - > bias_l0 [ n ] ) ;
transform_elliott ( state + 1 , 3 ) ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
// Layer 1.
for ( int n = 0 ; n < 4 ; n + + )
2021-01-19 15:49:45 +02:00
state [ n + 4 ] = dot_dsp ( s , m_data - > kernel_l1 [ n ] , state , 4 , 1.0f , m_data - > bias_l1 [ n ] ) ;
2021-01-17 18:39:28 +02:00
transform_elliott ( state + 4 , 3 ) ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
// Layer 2.
for ( int n = 0 ; n < 4 ; n + + )
2021-01-19 15:49:45 +02:00
state [ n + 8 ] = dot_dsp ( s , m_data - > kernel_l2 [ n ] , state , 8 , 1.0f , m_data - > bias_l2 [ n ] ) ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
prescreen [ j ] = FFMAX ( state [ 10 ] , state [ 11 ] ) < = FFMAX ( state [ 8 ] , state [ 9 ] ) ? 255 : 0 ;
}
2016-01-23 18:15:53 +02:00
}
2021-01-17 18:39:28 +02:00
static void process_new ( AVFilterContext * ctx ,
const void * src , ptrdiff_t src_stride ,
uint8_t * prescreen , int N ,
2021-01-19 18:06:05 +02:00
const PrescreenerCoefficients * const m_data )
2016-01-23 18:15:53 +02:00
{
2021-01-17 18:39:28 +02:00
NNEDIContext * s = ctx - > priv ;
const float * src_p = src ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
// Adjust source pointer to point to top-left of filter window.
const float * window = src_p - 2 * src_stride - 6 ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
for ( int j = 0 ; j < N ; j + = 4 ) {
LOCAL_ALIGNED_32 ( float , input , [ 64 ] ) ;
float state [ 8 ] ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
for ( int i = 0 ; i < 4 ; i + + )
memcpy ( input + i * 16 , window + i * src_stride + j , 16 * sizeof ( float ) ) ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
for ( int n = 0 ; n < 4 ; n + + )
state [ n ] = dot_dsp ( s , m_data - > kernel_l0 [ n ] , input , 64 , 1.0f , m_data - > bias_l0 [ n ] ) ;
transform_elliott ( state , 4 ) ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
for ( int n = 0 ; n < 4 ; n + + )
2021-01-19 15:49:45 +02:00
state [ n + 4 ] = dot_dsp ( s , m_data - > kernel_l1 [ n ] , state , 4 , 1.0f , m_data - > bias_l1 [ n ] ) ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
for ( int n = 0 ; n < 4 ; n + + )
prescreen [ j + n ] = state [ n + 4 ] > 0.f ;
2016-01-23 18:15:53 +02:00
}
}
2021-01-19 15:49:45 +02:00
static int filter_offset ( int nn , const PredictorCoefficients * const model )
2016-01-23 18:15:53 +02:00
{
2021-01-19 15:49:45 +02:00
return nn * model - > nsize ;
2016-01-23 18:15:53 +02:00
}
2021-01-19 15:49:45 +02:00
static const float * softmax_q1_filter ( int nn ,
const PredictorCoefficients * const model )
2016-01-23 18:15:53 +02:00
{
2021-01-17 18:39:28 +02:00
return model - > softmax_q1 + filter_offset ( nn , model ) ;
2016-01-23 18:15:53 +02:00
}
2021-01-19 15:49:45 +02:00
static const float * elliott_q1_filter ( int nn ,
const PredictorCoefficients * const model )
2016-01-23 18:15:53 +02:00
{
2021-01-17 18:39:28 +02:00
return model - > elliott_q1 + filter_offset ( nn , model ) ;
2016-01-23 18:15:53 +02:00
}
2021-01-19 15:49:45 +02:00
static const float * softmax_q2_filter ( int nn ,
const PredictorCoefficients * const model )
2016-01-23 18:15:53 +02:00
{
2021-01-17 18:39:28 +02:00
return model - > softmax_q2 + filter_offset ( nn , model ) ;
2016-01-23 18:15:53 +02:00
}
2021-01-19 15:49:45 +02:00
static const float * elliott_q2_filter ( int nn ,
const PredictorCoefficients * const model )
2016-01-23 18:15:53 +02:00
{
2021-01-17 18:39:28 +02:00
return model - > elliott_q2 + filter_offset ( nn , model ) ;
2016-01-23 18:15:53 +02:00
}
2021-01-17 18:39:28 +02:00
static void gather_input ( const float * src , ptrdiff_t src_stride ,
float * buf , float mstd [ 4 ] ,
2021-01-19 15:49:45 +02:00
const PredictorCoefficients * const model )
2016-01-23 18:15:53 +02:00
{
2021-01-20 14:05:52 +02:00
const float scale = 1.f / model - > nsize ;
2021-01-19 18:06:05 +02:00
float sum = 0.f ;
float sum_sq = 0.f ;
2021-01-17 18:39:28 +02:00
float tmp ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
for ( int i = 0 ; i < model - > ydim ; i + + ) {
2021-01-19 15:49:45 +02:00
memcpy ( buf , src , model - > xdim * sizeof ( float ) ) ;
2021-01-17 18:39:28 +02:00
for ( int j = 0 ; j < model - > xdim ; j + + ) {
2021-01-19 15:49:45 +02:00
const float val = src [ j ] ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
sum + = val ;
sum_sq + = val * val ;
2016-01-23 18:15:53 +02:00
}
2021-01-19 15:49:45 +02:00
src + = src_stride ;
buf + = model - > xdim ;
2016-01-23 18:15:53 +02:00
}
2021-01-20 14:05:52 +02:00
mstd [ 0 ] = sum * scale ;
2021-01-17 18:39:28 +02:00
mstd [ 3 ] = 0.f ;
2021-01-20 14:05:52 +02:00
tmp = sum_sq * scale - mstd [ 0 ] * mstd [ 0 ] ;
2021-01-17 18:39:28 +02:00
if ( tmp < FLT_EPSILON ) {
mstd [ 1 ] = 0.0f ;
mstd [ 2 ] = 0.0f ;
} else {
mstd [ 1 ] = sqrtf ( tmp ) ;
2016-01-23 18:15:53 +02:00
mstd [ 2 ] = 1.0f / mstd [ 1 ] ;
}
}
2021-01-17 18:39:28 +02:00
static float softmax_exp ( float x )
2016-01-23 18:15:53 +02:00
{
2021-01-17 18:39:28 +02:00
return expf ( av_clipf ( x , - 80.f , 80.f ) ) ;
2016-01-23 18:15:53 +02:00
}
2021-01-17 18:39:28 +02:00
static void transform_softmax_exp ( float * input , int size )
2016-01-23 18:15:53 +02:00
{
2021-01-17 18:39:28 +02:00
for ( int i = 0 ; i < size ; i + + )
input [ i ] = softmax_exp ( input [ i ] ) ;
2016-01-23 18:15:53 +02:00
}
2021-01-17 18:39:28 +02:00
static void wae5 ( const float * softmax , const float * el ,
2021-01-19 15:49:45 +02:00
int n , float mstd [ 4 ] )
2016-01-23 18:15:53 +02:00
{
float vsum = 0.0f , wsum = 0.0f ;
2021-01-17 18:39:28 +02:00
for ( int i = 0 ; i < n ; i + + ) {
vsum + = softmax [ i ] * elliott ( el [ i ] ) ;
wsum + = softmax [ i ] ;
2016-01-23 18:15:53 +02:00
}
2021-01-17 18:39:28 +02:00
if ( wsum > 1e-10 f )
mstd [ 3 ] + = ( 5.0f * vsum ) / wsum * mstd [ 1 ] + mstd [ 0 ] ;
2016-01-23 18:15:53 +02:00
else
mstd [ 3 ] + = mstd [ 0 ] ;
}
2021-01-17 18:39:28 +02:00
static void predictor ( AVFilterContext * ctx ,
const void * src , ptrdiff_t src_stride , void * dst ,
const uint8_t * prescreen , int N ,
2021-01-19 18:06:05 +02:00
const PredictorCoefficients * const model , int use_q2 )
2016-01-23 18:15:53 +02:00
{
2021-01-19 18:06:05 +02:00
const NNEDIContext * const s = ctx - > priv ;
2021-01-17 18:39:28 +02:00
const float * src_p = src ;
float * dst_p = dst ;
// Adjust source pointer to point to top-left of filter window.
const float * window = src_p - ( model - > ydim / 2 ) * src_stride - ( model - > xdim / 2 - 1 ) ;
2021-01-19 18:06:05 +02:00
const int filter_size = model - > nsize ;
const int nns = model - > nns ;
2021-01-17 18:39:28 +02:00
for ( int i = 0 ; i < N ; i + + ) {
LOCAL_ALIGNED_32 ( float , input , [ 48 * 6 ] ) ;
float activation [ 256 * 2 ] ;
float mstd [ 4 ] ;
float scale ;
if ( prescreen [ i ] )
2016-02-03 10:38:11 +02:00
continue ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
gather_input ( window + i , src_stride , input , mstd , model ) ;
scale = mstd [ 2 ] ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
for ( int nn = 0 ; nn < nns ; nn + + )
activation [ nn ] = dot_dsp ( s , softmax_q1_filter ( nn , model ) , input , filter_size , scale , model - > softmax_bias_q1 [ nn ] ) ;
2016-02-03 10:38:11 +02:00
2021-01-17 18:39:28 +02:00
for ( int nn = 0 ; nn < nns ; nn + + )
2021-01-19 18:06:05 +02:00
activation [ nns + nn ] = dot_dsp ( s , elliott_q1_filter ( nn , model ) , input , filter_size , scale , model - > elliott_bias_q1 [ nn ] ) ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
transform_softmax_exp ( activation , nns ) ;
wae5 ( activation , activation + nns , nns , mstd ) ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
if ( use_q2 ) {
for ( int nn = 0 ; nn < nns ; nn + + )
activation [ nn ] = dot_dsp ( s , softmax_q2_filter ( nn , model ) , input , filter_size , scale , model - > softmax_bias_q2 [ nn ] ) ;
for ( int nn = 0 ; nn < nns ; nn + + )
activation [ nns + nn ] = dot_dsp ( s , elliott_q2_filter ( nn , model ) , input , filter_size , scale , model - > elliott_bias_q2 [ nn ] ) ;
transform_softmax_exp ( activation , nns ) ;
wae5 ( activation , activation + nns , nns , mstd ) ;
2016-01-23 18:15:53 +02:00
}
2021-01-17 18:39:28 +02:00
2021-01-19 18:06:05 +02:00
dst_p [ i ] = mstd [ 3 ] * ( use_q2 ? 0.5f : 1.f ) ;
2016-01-23 18:15:53 +02:00
}
}
2021-01-17 18:39:28 +02:00
static void read_bytes ( const uint8_t * src , float * dst ,
int src_stride , int dst_stride ,
int width , int height , float scale )
2016-01-23 18:15:53 +02:00
{
2021-01-17 18:39:28 +02:00
for ( int y = 0 ; y < height ; y + + ) {
for ( int x = 0 ; x < 32 ; x + + )
dst [ - x - 1 ] = src [ x ] ;
for ( int x = 0 ; x < width ; x + + )
dst [ x ] = src [ x ] ;
for ( int x = 0 ; x < 32 ; x + + )
dst [ width + x ] = src [ width - x - 1 ] ;
dst + = dst_stride ;
src + = src_stride ;
}
2016-01-23 18:15:53 +02:00
}
2021-01-17 18:39:28 +02:00
static void read_words ( const uint8_t * srcp , float * dst ,
int src_stride , int dst_stride ,
int width , int height , float scale )
2016-01-23 18:15:53 +02:00
{
2021-01-17 18:39:28 +02:00
const uint16_t * src = ( const uint16_t * ) srcp ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
src_stride / = 2 ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
for ( int y = 0 ; y < height ; y + + ) {
for ( int x = 0 ; x < 32 ; x + + )
dst [ - x - 1 ] = src [ x ] * scale ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
for ( int x = 0 ; x < width ; x + + )
dst [ x ] = src [ x ] * scale ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
for ( int x = 0 ; x < 32 ; x + + )
dst [ width + x ] = src [ width - x - 1 ] * scale ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
dst + = dst_stride ;
src + = src_stride ;
}
2016-01-23 18:15:53 +02:00
}
2021-01-17 18:39:28 +02:00
static void write_bytes ( const float * src , uint8_t * dst ,
int src_stride , int dst_stride ,
int width , int height , int depth ,
float scale )
2016-01-23 18:15:53 +02:00
{
2021-01-17 18:39:28 +02:00
for ( int y = 0 ; y < height ; y + + ) {
for ( int x = 0 ; x < width ; x + + )
dst [ x ] = av_clip_uint8 ( src [ x ] ) ;
dst + = dst_stride ;
src + = src_stride ;
}
2016-01-23 18:15:53 +02:00
}
2021-01-17 18:39:28 +02:00
static void write_words ( const float * src , uint8_t * dstp ,
int src_stride , int dst_stride ,
int width , int height , int depth ,
float scale )
2016-01-23 18:15:53 +02:00
{
2021-01-17 18:39:28 +02:00
uint16_t * dst = ( uint16_t * ) dstp ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
dst_stride / = 2 ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
for ( int y = 0 ; y < height ; y + + ) {
for ( int x = 0 ; x < width ; x + + )
dst [ x ] = av_clip_uintp2_c ( src [ x ] * scale , depth ) ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
dst + = dst_stride ;
src + = src_stride ;
2016-01-23 18:15:53 +02:00
}
2021-01-17 18:39:28 +02:00
}
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
static void interpolation ( const void * src , ptrdiff_t src_stride ,
2021-01-19 15:49:45 +02:00
void * dst , const uint8_t * prescreen , int n )
2021-01-17 18:39:28 +02:00
{
const float * src_p = src ;
float * dst_p = dst ;
const float * window = src_p - 2 * src_stride ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
for ( int i = 0 ; i < n ; i + + ) {
float accum = 0.0f ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
if ( ! prescreen [ i ] )
continue ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
accum + = ( - 3.0f / 32.0f ) * window [ 0 * src_stride + i ] ;
accum + = ( 19.0f / 32.0f ) * window [ 1 * src_stride + i ] ;
accum + = ( 19.0f / 32.0f ) * window [ 2 * src_stride + i ] ;
accum + = ( - 3.0f / 32.0f ) * window [ 3 * src_stride + i ] ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
dst_p [ i ] = accum ;
}
}
static int filter_slice ( AVFilterContext * ctx , void * arg , int jobnr , int nb_jobs )
{
2021-01-19 18:06:05 +02:00
const NNEDIContext * const s = ctx - > priv ;
2021-01-20 14:33:41 +02:00
AVFrame * out = arg ;
AVFrame * in = s - > prev ;
2021-01-17 18:39:28 +02:00
const float in_scale = s - > in_scale ;
const float out_scale = s - > out_scale ;
const int depth = s - > depth ;
const int interlaced = in - > interlaced_frame ;
const int tff = s - > field_n = = ( s - > field < 0 ? interlaced ? in - > top_field_first : 1 :
( s - > field & 1 ) ^ 1 ) ;
for ( int p = 0 ; p < s - > nb_planes ; p + + ) {
const int height = s - > planeheight [ p ] ;
const int width = s - > planewidth [ p ] ;
const int slice_start = 2 * ( ( height / 2 * jobnr ) / nb_jobs ) ;
const int slice_end = 2 * ( ( height / 2 * ( jobnr + 1 ) ) / nb_jobs ) ;
const uint8_t * src_data = in - > data [ p ] ;
uint8_t * dst_data = out - > data [ p ] ;
uint8_t * dst = out - > data [ p ] + slice_start * out - > linesize [ p ] ;
const int src_linesize = in - > linesize [ p ] ;
const int dst_linesize = out - > linesize [ p ] ;
2021-01-19 18:06:05 +02:00
uint8_t * prescreen_buf = s - > prescreen_buf [ jobnr ] ;
float * srcbuf = s - > input_buf [ jobnr ] ;
2021-01-17 18:39:28 +02:00
const int srcbuf_stride = width + 64 ;
2021-01-19 18:06:05 +02:00
float * dstbuf = s - > output_buf [ jobnr ] ;
2021-01-17 18:39:28 +02:00
const int dstbuf_stride = width ;
const int slice_height = ( slice_end - slice_start ) / 2 ;
const int last_slice = slice_end = = height ;
const uint8_t * in_line ;
uint8_t * out_line ;
int y_out ;
if ( ! ( s - > process_plane & ( 1 < < p ) ) ) {
av_image_copy_plane ( dst , out - > linesize [ p ] ,
in - > data [ p ] + slice_start * in - > linesize [ p ] ,
in - > linesize [ p ] ,
s - > linesize [ p ] , slice_end - slice_start ) ;
2016-01-23 18:15:53 +02:00
continue ;
}
2021-01-17 18:39:28 +02:00
y_out = slice_start + ( tff ^ ( slice_start & 1 ) ) ;
in_line = src_data + ( y_out * src_linesize ) ;
out_line = dst_data + ( y_out * dst_linesize ) ;
while ( y_out < slice_end ) {
memcpy ( out_line , in_line , s - > linesize [ p ] ) ;
y_out + = 2 ;
in_line + = src_linesize * 2 ;
out_line + = dst_linesize * 2 ;
2016-01-23 18:15:53 +02:00
}
2021-01-17 18:39:28 +02:00
y_out = slice_start + ( ( ! tff ) ^ ( slice_start & 1 ) ) ;
s - > read ( src_data + FFMAX ( y_out - 5 , tff ) * src_linesize ,
srcbuf + 32 ,
src_linesize * 2 , srcbuf_stride ,
width , 1 , in_scale ) ;
srcbuf + = srcbuf_stride ;
s - > read ( src_data + FFMAX ( y_out - 3 , tff ) * src_linesize ,
srcbuf + 32 ,
src_linesize * 2 , srcbuf_stride ,
width , 1 , in_scale ) ;
srcbuf + = srcbuf_stride ;
s - > read ( src_data + FFMAX ( y_out - 1 , tff ) * src_linesize ,
srcbuf + 32 ,
src_linesize * 2 , srcbuf_stride ,
width , 1 , in_scale ) ;
srcbuf + = srcbuf_stride ;
in_line = src_data + FFMIN ( y_out + 1 , height - 1 - ! tff ) * src_linesize ;
out_line = dst_data + ( y_out * dst_linesize ) ;
s - > read ( in_line , srcbuf + 32 , src_linesize * 2 , srcbuf_stride ,
width , slice_height - last_slice , in_scale ) ;
y_out + = ( slice_height - last_slice ) * 2 ;
s - > read ( src_data + FFMIN ( y_out + 1 , height - 1 - ! tff ) * src_linesize ,
srcbuf + 32 + srcbuf_stride * ( slice_height - last_slice ) ,
src_linesize * 2 , srcbuf_stride ,
width , 1 , in_scale ) ;
s - > read ( src_data + FFMIN ( y_out + 3 , height - 1 - ! tff ) * src_linesize ,
srcbuf + 32 + srcbuf_stride * ( slice_height + 1 - last_slice ) ,
src_linesize * 2 , srcbuf_stride ,
width , 1 , in_scale ) ;
s - > read ( src_data + FFMIN ( y_out + 5 , height - 1 - ! tff ) * src_linesize ,
srcbuf + 32 + srcbuf_stride * ( slice_height + 2 - last_slice ) ,
src_linesize * 2 , srcbuf_stride ,
width , 1 , in_scale ) ;
for ( int y = 0 ; y < slice_end - slice_start ; y + = 2 ) {
2021-01-24 22:56:36 +02:00
if ( s - > pscrn > 0 )
2021-01-19 18:06:05 +02:00
s - > prescreen [ s - > pscrn > 1 ] ( ctx , srcbuf + ( y / 2 ) * srcbuf_stride + 32 ,
srcbuf_stride , prescreen_buf , width ,
& s - > prescreener [ s - > pscrn - 1 ] ) ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
predictor ( ctx ,
srcbuf + ( y / 2 ) * srcbuf_stride + 32 ,
srcbuf_stride ,
dstbuf + ( y / 2 ) * dstbuf_stride ,
prescreen_buf , width ,
& s - > coeffs [ s - > etype ] [ s - > nnsparam ] [ s - > nsize ] , s - > qual = = 2 ) ;
2021-01-24 22:56:36 +02:00
if ( s - > pscrn > 0 )
2021-01-17 18:39:28 +02:00
interpolation ( srcbuf + ( y / 2 ) * srcbuf_stride + 32 ,
srcbuf_stride ,
dstbuf + ( y / 2 ) * dstbuf_stride ,
prescreen_buf , width ) ;
2016-01-23 18:15:53 +02:00
}
2021-01-17 18:39:28 +02:00
s - > write ( dstbuf , out_line , dstbuf_stride , dst_linesize * 2 ,
width , slice_height , depth , out_scale ) ;
2016-01-23 18:15:53 +02:00
}
2021-01-17 18:39:28 +02:00
return 0 ;
}
static int get_frame ( AVFilterContext * ctx , int is_second )
{
NNEDIContext * s = ctx - > priv ;
AVFilterLink * outlink = ctx - > outputs [ 0 ] ;
2021-01-20 14:33:41 +02:00
AVFrame * dst ;
2016-01-23 18:15:53 +02:00
2021-01-20 14:33:41 +02:00
dst = ff_get_video_buffer ( outlink , outlink - > w , outlink - > h ) ;
if ( ! dst )
2021-01-17 18:39:28 +02:00
return AVERROR ( ENOMEM ) ;
2021-01-20 14:33:41 +02:00
av_frame_copy_props ( dst , s - > prev ) ;
dst - > interlaced_frame = 0 ;
dst - > pts = s - > pts ;
2016-01-23 18:15:53 +02:00
2021-08-15 21:33:25 +02:00
ff_filter_execute ( ctx , filter_slice , dst , NULL ,
FFMIN ( s - > planeheight [ 1 ] / 2 , s - > nb_threads ) ) ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
if ( s - > field = = - 2 | | s - > field > 1 )
s - > field_n = ! s - > field_n ;
2016-01-23 18:15:53 +02:00
2021-01-20 14:33:41 +02:00
return ff_filter_frame ( outlink , dst ) ;
2016-01-23 18:15:53 +02:00
}
2021-01-20 14:33:41 +02:00
static int filter_frame ( AVFilterLink * inlink , AVFrame * in )
2016-01-23 18:15:53 +02:00
{
AVFilterContext * ctx = inlink - > dst ;
NNEDIContext * s = ctx - > priv ;
int ret ;
2021-01-20 14:33:41 +02:00
if ( ! s - > prev ) {
s - > prev = in ;
return 0 ;
2016-01-23 18:15:53 +02:00
}
2021-05-04 21:04:59 +02:00
if ( ( s - > deint & & ! s - > prev - > interlaced_frame ) | | ctx - > is_disabled ) {
2021-01-20 14:33:41 +02:00
s - > prev - > pts * = 2 ;
ret = ff_filter_frame ( ctx - > outputs [ 0 ] , s - > prev ) ;
s - > prev = in ;
return ret ;
2016-01-23 18:15:53 +02:00
}
2021-01-20 14:33:41 +02:00
s - > pts = s - > prev - > pts * 2 ;
2016-01-23 18:15:53 +02:00
ret = get_frame ( ctx , 0 ) ;
2021-01-20 14:33:41 +02:00
if ( ret < 0 | | ( s - > field > - 2 & & s - > field < 2 ) ) {
av_frame_free ( & s - > prev ) ;
s - > prev = in ;
2016-01-23 18:15:53 +02:00
return ret ;
}
2021-01-20 14:33:41 +02:00
s - > pts = s - > prev - > pts + in - > pts ;
ret = get_frame ( ctx , 1 ) ;
av_frame_free ( & s - > prev ) ;
s - > prev = in ;
return ret ;
2016-01-23 18:15:53 +02:00
}
static int request_frame ( AVFilterLink * link )
{
AVFilterContext * ctx = link - > src ;
NNEDIContext * s = ctx - > priv ;
int ret ;
if ( s - > eof )
return AVERROR_EOF ;
ret = ff_request_frame ( ctx - > inputs [ 0 ] ) ;
2021-01-20 14:33:41 +02:00
if ( ret = = AVERROR_EOF & & s - > prev ) {
AVFrame * next = av_frame_clone ( s - > prev ) ;
2016-01-23 18:15:53 +02:00
if ( ! next )
return AVERROR ( ENOMEM ) ;
2021-01-20 14:33:41 +02:00
next - > pts = s - > prev - > pts + av_rescale_q ( 1 , av_inv_q ( ctx - > outputs [ 0 ] - > frame_rate ) ,
ctx - > outputs [ 0 ] - > time_base ) ;
2016-01-23 18:15:53 +02:00
s - > eof = 1 ;
2021-01-20 14:33:41 +02:00
ret = filter_frame ( ctx - > inputs [ 0 ] , next ) ;
2016-01-23 18:15:53 +02:00
} else if ( ret < 0 ) {
return ret ;
}
2021-01-20 14:33:41 +02:00
return ret ;
2016-01-23 18:15:53 +02:00
}
2021-01-19 13:15:07 +02:00
static void copy_weights ( float * dst , int n , const float * * data )
2021-01-17 18:39:28 +02:00
{
memcpy ( dst , * data , n * sizeof ( float ) ) ;
* data + = n ;
}
2021-01-19 13:15:07 +02:00
static float * allocate ( float * * ptr , int size )
2021-01-17 18:39:28 +02:00
{
float * ret = * ptr ;
* ptr + = size ;
return ret ;
}
static int allocate_model ( PredictorCoefficients * coeffs , int xdim , int ydim , int nns )
{
2021-01-19 13:15:07 +02:00
int filter_size = nns * xdim * ydim ;
int bias_size = nns ;
2021-01-17 18:39:28 +02:00
float * data ;
2021-01-19 18:06:05 +02:00
data = av_calloc ( filter_size + bias_size , 4 * sizeof ( float ) ) ;
2021-01-17 18:39:28 +02:00
if ( ! data )
return AVERROR ( ENOMEM ) ;
coeffs - > data = data ;
coeffs - > xdim = xdim ;
coeffs - > ydim = ydim ;
2021-01-19 15:49:45 +02:00
coeffs - > nsize = xdim * ydim ;
2021-01-17 18:39:28 +02:00
coeffs - > nns = nns ;
coeffs - > softmax_q1 = allocate ( & data , filter_size ) ;
coeffs - > elliott_q1 = allocate ( & data , filter_size ) ;
coeffs - > softmax_bias_q1 = allocate ( & data , bias_size ) ;
coeffs - > elliott_bias_q1 = allocate ( & data , bias_size ) ;
coeffs - > softmax_q2 = allocate ( & data , filter_size ) ;
coeffs - > elliott_q2 = allocate ( & data , filter_size ) ;
coeffs - > softmax_bias_q2 = allocate ( & data , bias_size ) ;
coeffs - > elliott_bias_q2 = allocate ( & data , bias_size ) ;
return 0 ;
}
static int read_weights ( AVFilterContext * ctx , const float * bdata )
{
NNEDIContext * s = ctx - > priv ;
int ret ;
2021-01-19 18:06:05 +02:00
copy_weights ( & s - > prescreener [ 0 ] . kernel_l0 [ 0 ] [ 0 ] , 4 * 48 , & bdata ) ;
copy_weights ( s - > prescreener [ 0 ] . bias_l0 , 4 , & bdata ) ;
2021-01-17 18:39:28 +02:00
2021-01-19 18:06:05 +02:00
copy_weights ( & s - > prescreener [ 0 ] . kernel_l1 [ 0 ] [ 0 ] , 4 * 4 , & bdata ) ;
copy_weights ( s - > prescreener [ 0 ] . bias_l1 , 4 , & bdata ) ;
2021-01-17 18:39:28 +02:00
2021-01-19 18:06:05 +02:00
copy_weights ( & s - > prescreener [ 0 ] . kernel_l2 [ 0 ] [ 0 ] , 4 * 8 , & bdata ) ;
copy_weights ( s - > prescreener [ 0 ] . bias_l2 , 4 , & bdata ) ;
2021-01-17 18:39:28 +02:00
for ( int i = 0 ; i < 3 ; i + + ) {
2021-01-19 18:06:05 +02:00
PrescreenerCoefficients * data = & s - > prescreener [ i + 1 ] ;
2021-01-17 18:39:28 +02:00
float kernel_l0_shuffled [ 4 * 64 ] ;
float kernel_l1_shuffled [ 4 * 4 ] ;
2021-01-19 13:15:07 +02:00
copy_weights ( kernel_l0_shuffled , 4 * 64 , & bdata ) ;
copy_weights ( data - > bias_l0 , 4 , & bdata ) ;
2021-01-17 18:39:28 +02:00
2021-01-19 13:15:07 +02:00
copy_weights ( kernel_l1_shuffled , 4 * 4 , & bdata ) ;
copy_weights ( data - > bias_l1 , 4 , & bdata ) ;
2021-01-17 18:39:28 +02:00
for ( int n = 0 ; n < 4 ; n + + ) {
for ( int k = 0 ; k < 64 ; k + + )
data - > kernel_l0 [ n ] [ k ] = kernel_l0_shuffled [ ( k / 8 ) * 32 + n * 8 + k % 8 ] ;
for ( int k = 0 ; k < 4 ; k + + )
data - > kernel_l1 [ n ] [ k ] = kernel_l1_shuffled [ k * 4 + n ] ;
}
}
for ( int m = 0 ; m < 2 ; m + + ) {
// Grouping by neuron count.
for ( int i = 0 ; i < 5 ; i + + ) {
2021-01-19 18:06:05 +02:00
const int nns = NNEDI_NNS [ i ] ;
2021-01-17 18:39:28 +02:00
// Grouping by window size.
for ( int j = 0 ; j < 7 ; j + + ) {
PredictorCoefficients * model = & s - > coeffs [ m ] [ i ] [ j ] ;
2021-01-19 18:06:05 +02:00
const int xdim = NNEDI_XDIM [ j ] ;
const int ydim = NNEDI_YDIM [ j ] ;
const int filter_size = xdim * ydim ;
2021-01-17 18:39:28 +02:00
ret = allocate_model ( model , xdim , ydim , nns ) ;
if ( ret < 0 )
return ret ;
// Quality 1 model. NNS[i] * (XDIM[j] * YDIM[j]) * 2 coefficients.
2021-01-19 13:15:07 +02:00
copy_weights ( model - > softmax_q1 , nns * filter_size , & bdata ) ;
copy_weights ( model - > elliott_q1 , nns * filter_size , & bdata ) ;
2021-01-17 18:39:28 +02:00
// Quality 1 model bias. NNS[i] * 2 coefficients.
2021-01-19 13:15:07 +02:00
copy_weights ( model - > softmax_bias_q1 , nns , & bdata ) ;
copy_weights ( model - > elliott_bias_q1 , nns , & bdata ) ;
2021-01-17 18:39:28 +02:00
// Quality 2 model. NNS[i] * (XDIM[j] * YDIM[j]) * 2 coefficients.
2021-01-19 13:15:07 +02:00
copy_weights ( model - > softmax_q2 , nns * filter_size , & bdata ) ;
copy_weights ( model - > elliott_q2 , nns * filter_size , & bdata ) ;
2021-01-17 18:39:28 +02:00
// Quality 2 model bias. NNS[i] * 2 coefficients.
2021-01-19 13:15:07 +02:00
copy_weights ( model - > softmax_bias_q2 , nns , & bdata ) ;
copy_weights ( model - > elliott_bias_q2 , nns , & bdata ) ;
2021-01-17 18:39:28 +02:00
}
}
}
return 0 ;
}
static float mean ( const float * input , int size )
{
2021-01-19 18:06:05 +02:00
float sum = 0.f ;
2021-01-17 18:39:28 +02:00
for ( int i = 0 ; i < size ; i + + )
sum + = input [ i ] ;
return sum / size ;
}
static void transform ( float * input , int size , float mean , float half )
{
for ( int i = 0 ; i < size ; i + + )
input [ i ] = ( input [ i ] - mean ) / half ;
}
2021-01-19 18:06:05 +02:00
static void subtract_mean_old ( PrescreenerCoefficients * coeffs , float half )
2021-01-17 18:39:28 +02:00
{
for ( int n = 0 ; n < 4 ; n + + ) {
float m = mean ( coeffs - > kernel_l0 [ n ] , 48 ) ;
transform ( coeffs - > kernel_l0 [ n ] , 48 , m , half ) ;
}
}
2021-01-19 18:06:05 +02:00
static void subtract_mean_new ( PrescreenerCoefficients * coeffs , float half )
2021-01-17 18:39:28 +02:00
{
for ( int n = 0 ; n < 4 ; n + + ) {
float m = mean ( coeffs - > kernel_l0 [ n ] , 64 ) ;
transform ( coeffs - > kernel_l0 [ n ] , 64 , m , half ) ;
}
}
static void subtract_mean_predictor ( PredictorCoefficients * model )
{
2021-01-20 14:05:52 +02:00
const int filter_size = model - > nsize ;
const int nns = model - > nns ;
const float scale = 1.f / nns ;
2021-01-17 18:39:28 +02:00
2021-01-19 21:20:35 +02:00
double softmax_means [ 256 ] ; // Average of individual softmax filters.
double elliott_means [ 256 ] ; // Average of individual elliott filters.
2021-03-09 23:43:13 +02:00
double mean_filter [ 48 * 6 ] = { 0 } ; // Pointwise average of all softmax filters.
2021-01-19 21:20:35 +02:00
double mean_bias ;
2021-01-17 18:39:28 +02:00
// Quality 1.
for ( int nn = 0 ; nn < nns ; nn + + ) {
softmax_means [ nn ] = mean ( model - > softmax_q1 + nn * filter_size , filter_size ) ;
elliott_means [ nn ] = mean ( model - > elliott_q1 + nn * filter_size , filter_size ) ;
for ( int k = 0 ; k < filter_size ; k + + )
mean_filter [ k ] + = model - > softmax_q1 [ nn * filter_size + k ] - softmax_means [ nn ] ;
}
for ( int k = 0 ; k < filter_size ; k + + )
2021-01-20 14:05:52 +02:00
mean_filter [ k ] * = scale ;
2021-01-17 18:39:28 +02:00
mean_bias = mean ( model - > softmax_bias_q1 , nns ) ;
for ( int nn = 0 ; nn < nns ; nn + + ) {
for ( int k = 0 ; k < filter_size ; k + + ) {
model - > softmax_q1 [ nn * filter_size + k ] - = softmax_means [ nn ] + mean_filter [ k ] ;
model - > elliott_q1 [ nn * filter_size + k ] - = elliott_means [ nn ] ;
}
model - > softmax_bias_q1 [ nn ] - = mean_bias ;
}
// Quality 2.
2021-01-19 21:20:35 +02:00
memset ( mean_filter , 0 , sizeof ( mean_filter ) ) ;
2021-01-17 18:39:28 +02:00
for ( int nn = 0 ; nn < nns ; nn + + ) {
softmax_means [ nn ] = mean ( model - > softmax_q2 + nn * filter_size , filter_size ) ;
elliott_means [ nn ] = mean ( model - > elliott_q2 + nn * filter_size , filter_size ) ;
for ( int k = 0 ; k < filter_size ; k + + ) {
mean_filter [ k ] + = model - > softmax_q2 [ nn * filter_size + k ] - softmax_means [ nn ] ;
}
}
for ( int k = 0 ; k < filter_size ; k + + )
2021-01-20 14:05:52 +02:00
mean_filter [ k ] * = scale ;
2021-01-17 18:39:28 +02:00
mean_bias = mean ( model - > softmax_bias_q2 , nns ) ;
2021-01-19 15:49:45 +02:00
for ( int nn = 0 ; nn < nns ; nn + + ) {
for ( int k = 0 ; k < filter_size ; k + + ) {
2021-01-17 18:39:28 +02:00
model - > softmax_q2 [ nn * filter_size + k ] - = softmax_means [ nn ] + mean_filter [ k ] ;
model - > elliott_q2 [ nn * filter_size + k ] - = elliott_means [ nn ] ;
}
model - > softmax_bias_q2 [ nn ] - = mean_bias ;
}
}
2016-01-23 18:15:53 +02:00
static av_cold int init ( AVFilterContext * ctx )
{
NNEDIContext * s = ctx - > priv ;
FILE * weights_file = NULL ;
int64_t weights_size ;
float * bdata ;
size_t bytes_read ;
2021-01-17 18:39:28 +02:00
int ret = 0 ;
2016-01-23 18:15:53 +02:00
2020-09-07 19:11:55 +02:00
weights_file = av_fopen_utf8 ( s - > weights_file , " rb " ) ;
2016-01-23 18:15:53 +02:00
if ( ! weights_file ) {
av_log ( ctx , AV_LOG_ERROR , " No weights file provided, aborting! \n " ) ;
return AVERROR ( EINVAL ) ;
}
if ( fseek ( weights_file , 0 , SEEK_END ) ) {
av_log ( ctx , AV_LOG_ERROR , " Couldn't seek to the end of weights file. \n " ) ;
fclose ( weights_file ) ;
return AVERROR ( EINVAL ) ;
}
weights_size = ftell ( weights_file ) ;
if ( weights_size = = - 1 ) {
fclose ( weights_file ) ;
av_log ( ctx , AV_LOG_ERROR , " Couldn't get size of weights file. \n " ) ;
return AVERROR ( EINVAL ) ;
2021-01-17 18:39:28 +02:00
} else if ( weights_size ! = NNEDI_WEIGHTS_SIZE ) {
2016-01-23 18:15:53 +02:00
fclose ( weights_file ) ;
av_log ( ctx , AV_LOG_ERROR , " Unexpected weights file size. \n " ) ;
return AVERROR ( EINVAL ) ;
}
if ( fseek ( weights_file , 0 , SEEK_SET ) ) {
fclose ( weights_file ) ;
av_log ( ctx , AV_LOG_ERROR , " Couldn't seek to the start of weights file. \n " ) ;
return AVERROR ( EINVAL ) ;
}
2021-01-17 18:39:28 +02:00
bdata = av_malloc ( NNEDI_WEIGHTS_SIZE ) ;
2016-01-23 18:15:53 +02:00
if ( ! bdata ) {
fclose ( weights_file ) ;
return AVERROR ( ENOMEM ) ;
}
2021-01-17 18:39:28 +02:00
bytes_read = fread ( bdata , 1 , NNEDI_WEIGHTS_SIZE , weights_file ) ;
if ( bytes_read ! = NNEDI_WEIGHTS_SIZE ) {
2016-01-23 18:15:53 +02:00
fclose ( weights_file ) ;
ret = AVERROR_INVALIDDATA ;
av_log ( ctx , AV_LOG_ERROR , " Couldn't read weights file. \n " ) ;
goto fail ;
}
fclose ( weights_file ) ;
2021-01-17 18:39:28 +02:00
s - > fdsp = avpriv_float_dsp_alloc ( 0 ) ;
if ( ! s - > fdsp ) {
2016-01-23 18:15:53 +02:00
ret = AVERROR ( ENOMEM ) ;
goto fail ;
}
2021-01-17 18:39:28 +02:00
ret = read_weights ( ctx , bdata ) ;
if ( ret < 0 )
goto fail ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
fail :
av_free ( bdata ) ;
return ret ;
}
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
static int config_input ( AVFilterLink * inlink )
{
AVFilterContext * ctx = inlink - > dst ;
NNEDIContext * s = ctx - > priv ;
const AVPixFmtDescriptor * desc = av_pix_fmt_desc_get ( inlink - > format ) ;
int ret ;
s - > depth = desc - > comp [ 0 ] . depth ;
s - > nb_threads = ff_filter_get_nb_threads ( ctx ) ;
s - > nb_planes = av_pix_fmt_count_planes ( inlink - > format ) ;
if ( ( ret = av_image_fill_linesizes ( s - > linesize , inlink - > format , inlink - > w ) ) < 0 )
return ret ;
s - > planewidth [ 1 ] = s - > planewidth [ 2 ] = AV_CEIL_RSHIFT ( inlink - > w , desc - > log2_chroma_w ) ;
s - > planewidth [ 0 ] = s - > planewidth [ 3 ] = inlink - > w ;
s - > planeheight [ 1 ] = s - > planeheight [ 2 ] = AV_CEIL_RSHIFT ( inlink - > h , desc - > log2_chroma_h ) ;
s - > planeheight [ 0 ] = s - > planeheight [ 3 ] = inlink - > h ;
s - > half = ( ( 1 < < 8 ) - 1 ) / 2.f ;
s - > out_scale = 1 < < ( s - > depth - 8 ) ;
s - > in_scale = 1.f / s - > out_scale ;
switch ( s - > depth ) {
case 8 :
s - > read = read_bytes ;
s - > write = write_bytes ;
break ;
default :
s - > read = read_words ;
s - > write = write_words ;
break ;
2016-01-23 18:15:53 +02:00
}
2021-01-19 18:06:05 +02:00
subtract_mean_old ( & s - > prescreener [ 0 ] , s - > half ) ;
subtract_mean_new ( & s - > prescreener [ 1 ] , s - > half ) ;
subtract_mean_new ( & s - > prescreener [ 2 ] , s - > half ) ;
subtract_mean_new ( & s - > prescreener [ 3 ] , s - > half ) ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
s - > prescreen [ 0 ] = process_old ;
s - > prescreen [ 1 ] = process_new ;
for ( int i = 0 ; i < 2 ; i + + ) {
for ( int j = 0 ; j < 5 ; j + + ) {
for ( int k = 0 ; k < 7 ; k + + )
subtract_mean_predictor ( & s - > coeffs [ i ] [ j ] [ k ] ) ;
2016-01-23 18:15:53 +02:00
}
}
2021-01-17 18:39:28 +02:00
s - > input_size = ( s - > planewidth [ 0 ] + 64 ) * ( s - > planeheight [ 0 ] + 6 ) ;
2021-01-19 18:06:05 +02:00
s - > input_buf = av_calloc ( s - > nb_threads , sizeof ( * s - > input_buf ) ) ;
2021-01-17 18:39:28 +02:00
if ( ! s - > input_buf )
return AVERROR ( ENOMEM ) ;
2016-01-23 18:15:53 +02:00
2021-01-19 18:06:05 +02:00
for ( int i = 0 ; i < s - > nb_threads ; i + + ) {
s - > input_buf [ i ] = av_calloc ( s - > input_size , sizeof ( * * s - > input_buf ) ) ;
if ( ! s - > input_buf [ i ] )
return AVERROR ( ENOMEM ) ;
}
s - > output_buf = av_calloc ( s - > nb_threads , sizeof ( * s - > output_buf ) ) ;
2021-01-17 18:39:28 +02:00
if ( ! s - > output_buf )
return AVERROR ( ENOMEM ) ;
2016-01-23 18:15:53 +02:00
2021-01-19 18:06:05 +02:00
for ( int i = 0 ; i < s - > nb_threads ; i + + ) {
s - > output_buf [ i ] = av_calloc ( s - > input_size , sizeof ( * * s - > output_buf ) ) ;
if ( ! s - > output_buf [ i ] )
return AVERROR ( ENOMEM ) ;
}
s - > prescreen_buf = av_calloc ( s - > nb_threads , sizeof ( * s - > prescreen_buf ) ) ;
if ( ! s - > prescreen_buf )
return AVERROR ( ENOMEM ) ;
for ( int i = 0 ; i < s - > nb_threads ; i + + ) {
s - > prescreen_buf [ i ] = av_calloc ( s - > planewidth [ 0 ] , sizeof ( * * s - > prescreen_buf ) ) ;
if ( ! s - > prescreen_buf [ i ] )
return AVERROR ( ENOMEM ) ;
}
2021-01-17 18:39:28 +02:00
return 0 ;
2016-01-23 18:15:53 +02:00
}
static av_cold void uninit ( AVFilterContext * ctx )
{
NNEDIContext * s = ctx - > priv ;
2021-01-19 18:06:05 +02:00
for ( int i = 0 ; i < s - > nb_threads & & s - > prescreen_buf ; i + + )
av_freep ( & s - > prescreen_buf [ i ] ) ;
2021-01-17 18:39:28 +02:00
av_freep ( & s - > prescreen_buf ) ;
2021-01-19 18:06:05 +02:00
for ( int i = 0 ; i < s - > nb_threads & & s - > input_buf ; i + + )
av_freep ( & s - > input_buf [ i ] ) ;
2021-01-17 18:39:28 +02:00
av_freep ( & s - > input_buf ) ;
2021-01-19 18:06:05 +02:00
for ( int i = 0 ; i < s - > nb_threads & & s - > output_buf ; i + + )
av_freep ( & s - > output_buf [ i ] ) ;
2021-01-17 18:39:28 +02:00
av_freep ( & s - > output_buf ) ;
av_freep ( & s - > fdsp ) ;
2016-01-23 18:15:53 +02:00
2021-01-17 18:39:28 +02:00
for ( int i = 0 ; i < 2 ; i + + ) {
for ( int j = 0 ; j < 5 ; j + + ) {
for ( int k = 0 ; k < 7 ; k + + ) {
av_freep ( & s - > coeffs [ i ] [ j ] [ k ] . data ) ;
}
}
2016-01-23 18:15:53 +02:00
}
2021-01-20 14:33:41 +02:00
av_frame_free ( & s - > prev ) ;
2016-01-23 18:15:53 +02:00
}
static const AVFilterPad inputs [ ] = {
{
. name = " default " ,
. type = AVMEDIA_TYPE_VIDEO ,
. filter_frame = filter_frame ,
. config_props = config_input ,
} ,
} ;
static const AVFilterPad outputs [ ] = {
{
. name = " default " ,
. type = AVMEDIA_TYPE_VIDEO ,
. config_props = config_output ,
. request_frame = request_frame ,
} ,
} ;
2021-04-19 18:33:56 +02:00
const AVFilter ff_vf_nnedi = {
2016-01-23 18:15:53 +02:00
. name = " nnedi " ,
. description = NULL_IF_CONFIG_SMALL ( " Apply neural network edge directed interpolation intra-only deinterlacer. " ) ,
. priv_size = sizeof ( NNEDIContext ) ,
. priv_class = & nnedi_class ,
. init = init ,
. uninit = uninit ,
2021-08-12 13:05:31 +02:00
FILTER_INPUTS ( inputs ) ,
FILTER_OUTPUTS ( outputs ) ,
2021-09-27 21:21:14 +02:00
FILTER_PIXFMTS_ARRAY ( pix_fmts ) ,
2021-01-17 18:39:28 +02:00
. flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS ,
. process_command = ff_filter_process_command ,
2016-01-23 18:15:53 +02:00
} ;