1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-12-23 12:43:46 +02:00

libavfilter: Removes stored DNN models. Adds support for native backend model file format in tf backend.

Signed-off-by: Pedro Arthur <bygrandao@gmail.com>
This commit is contained in:
Sergey Lavrushkin 2018-09-06 14:33:06 +03:00 committed by Pedro Arthur
parent bc1097a2bf
commit bd10c1e9a8
10 changed files with 317 additions and 8000 deletions

View File

@ -15593,7 +15593,17 @@ option may cause flicker since the B-Frames have often larger QP. Default is
@section sr @section sr
Scale the input by applying one of the super-resolution methods based on Scale the input by applying one of the super-resolution methods based on
convolutional neural networks. convolutional neural networks. Supported models:
@itemize
@item
Super-Resolution Convolutional Neural Network model (SRCNN).
See @url{https://arxiv.org/abs/1501.00092}.
@item
Efficient Sub-Pixel Convolutional Neural Network model (ESPCN).
See @url{https://arxiv.org/abs/1609.05158}.
@end itemize
Training scripts as well as scripts for model generation are provided in Training scripts as well as scripts for model generation are provided in
the repository at @url{https://github.com/HighVoltageRocknRoll/sr.git}. the repository at @url{https://github.com/HighVoltageRocknRoll/sr.git}.
@ -15601,22 +15611,6 @@ the repository at @url{https://github.com/HighVoltageRocknRoll/sr.git}.
The filter accepts the following options: The filter accepts the following options:
@table @option @table @option
@item model
Specify which super-resolution model to use. This option accepts the following values:
@table @samp
@item srcnn
Super-Resolution Convolutional Neural Network model.
See @url{https://arxiv.org/abs/1501.00092}.
@item espcn
Efficient Sub-Pixel Convolutional Neural Network model.
See @url{https://arxiv.org/abs/1609.05158}.
@end table
Default value is @samp{srcnn}.
@item dnn_backend @item dnn_backend
Specify which DNN backend to use for model loading and execution. This option accepts Specify which DNN backend to use for model loading and execution. This option accepts
the following values: the following values:
@ -15630,23 +15624,20 @@ TensorFlow backend. To enable this backend you
need to install the TensorFlow for C library (see need to install the TensorFlow for C library (see
@url{https://www.tensorflow.org/install/install_c}) and configure FFmpeg with @url{https://www.tensorflow.org/install/install_c}) and configure FFmpeg with
@code{--enable-libtensorflow} @code{--enable-libtensorflow}
@end table @end table
Default value is @samp{native}. Default value is @samp{native}.
@item scale_factor @item model
Set scale factor for SRCNN model, for which custom model file was provided.
Allowed values are @code{2}, @code{3} and @code{4}. Default value is @code{2}.
Scale factor is necessary for SRCNN model, because it accepts input upscaled
using bicubic upscaling with proper scale factor.
@item model_filename
Set path to model file specifying network architecture and its parameters. Set path to model file specifying network architecture and its parameters.
Note that different backends use different file formats. TensorFlow backend Note that different backends use different file formats. TensorFlow backend
can load files for both formats, while native backend can load files for only can load files for both formats, while native backend can load files for only
its format. its format.
@item scale_factor
Set scale factor for SRCNN model. Allowed values are @code{2}, @code{3} and @code{4}.
Default value is @code{2}. Scale factor is necessary for SRCNN model, because it accepts
input upscaled using bicubic upscaling with proper scale factor.
@end table @end table
@anchor{subtitles} @anchor{subtitles}

View File

@ -24,40 +24,6 @@
*/ */
#include "dnn_backend_native.h" #include "dnn_backend_native.h"
#include "dnn_srcnn.h"
#include "dnn_espcn.h"
#include "libavformat/avio.h"
typedef enum {INPUT, CONV, DEPTH_TO_SPACE} LayerType;
typedef enum {RELU, TANH, SIGMOID} ActivationFunc;
typedef struct Layer{
LayerType type;
float *output;
void *params;
} Layer;
typedef struct ConvolutionalParams{
int32_t input_num, output_num, kernel_size;
ActivationFunc activation;
float *kernel;
float *biases;
} ConvolutionalParams;
typedef struct InputParams{
int height, width, channels;
} InputParams;
typedef struct DepthToSpaceParams{
int block_size;
} DepthToSpaceParams;
// Represents simple feed-forward convolutional network.
typedef struct ConvolutionalNetwork{
Layer *layers;
int32_t layers_num;
} ConvolutionalNetwork;
static DNNReturnType set_input_output_native(void *model, DNNData *input, DNNData *output) static DNNReturnType set_input_output_native(void *model, DNNData *input, DNNData *output)
{ {
@ -134,7 +100,7 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
AVIOContext *model_file_context; AVIOContext *model_file_context;
int file_size, dnn_size, kernel_size, i; int file_size, dnn_size, kernel_size, i;
int32_t layer; int32_t layer;
LayerType layer_type; DNNLayerType layer_type;
ConvolutionalParams *conv_params; ConvolutionalParams *conv_params;
DepthToSpaceParams *depth_to_space_params; DepthToSpaceParams *depth_to_space_params;
@ -251,118 +217,6 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
return model; return model;
} }
static int set_up_conv_layer(Layer *layer, const float *kernel, const float *biases, ActivationFunc activation,
int32_t input_num, int32_t output_num, int32_t size)
{
ConvolutionalParams *conv_params;
int kernel_size;
conv_params = av_malloc(sizeof(ConvolutionalParams));
if (!conv_params){
return DNN_ERROR;
}
conv_params->activation = activation;
conv_params->input_num = input_num;
conv_params->output_num = output_num;
conv_params->kernel_size = size;
kernel_size = input_num * output_num * size * size;
conv_params->kernel = av_malloc(kernel_size * sizeof(float));
conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
if (!conv_params->kernel || !conv_params->biases){
av_freep(&conv_params->kernel);
av_freep(&conv_params->biases);
av_freep(&conv_params);
return DNN_ERROR;
}
memcpy(conv_params->kernel, kernel, kernel_size * sizeof(float));
memcpy(conv_params->biases, biases, output_num * sizeof(float));
layer->type = CONV;
layer->params = conv_params;
return DNN_SUCCESS;
}
DNNModel *ff_dnn_load_default_model_native(DNNDefaultModel model_type)
{
DNNModel *model = NULL;
ConvolutionalNetwork *network = NULL;
DepthToSpaceParams *depth_to_space_params;
int32_t layer;
model = av_malloc(sizeof(DNNModel));
if (!model){
return NULL;
}
network = av_malloc(sizeof(ConvolutionalNetwork));
if (!network){
av_freep(&model);
return NULL;
}
model->model = (void *)network;
switch (model_type){
case DNN_SRCNN:
network->layers_num = 4;
break;
case DNN_ESPCN:
network->layers_num = 5;
break;
default:
av_freep(&network);
av_freep(&model);
return NULL;
}
network->layers = av_malloc(network->layers_num * sizeof(Layer));
if (!network->layers){
av_freep(&network);
av_freep(&model);
return NULL;
}
for (layer = 0; layer < network->layers_num; ++layer){
network->layers[layer].output = NULL;
network->layers[layer].params = NULL;
}
network->layers[0].type = INPUT;
network->layers[0].params = av_malloc(sizeof(InputParams));
if (!network->layers[0].params){
ff_dnn_free_model_native(&model);
return NULL;
}
switch (model_type){
case DNN_SRCNN:
if (set_up_conv_layer(network->layers + 1, srcnn_conv1_kernel, srcnn_conv1_bias, RELU, 1, 64, 9) != DNN_SUCCESS ||
set_up_conv_layer(network->layers + 2, srcnn_conv2_kernel, srcnn_conv2_bias, RELU, 64, 32, 1) != DNN_SUCCESS ||
set_up_conv_layer(network->layers + 3, srcnn_conv3_kernel, srcnn_conv3_bias, RELU, 32, 1, 5) != DNN_SUCCESS){
ff_dnn_free_model_native(&model);
return NULL;
}
break;
case DNN_ESPCN:
if (set_up_conv_layer(network->layers + 1, espcn_conv1_kernel, espcn_conv1_bias, TANH, 1, 64, 5) != DNN_SUCCESS ||
set_up_conv_layer(network->layers + 2, espcn_conv2_kernel, espcn_conv2_bias, TANH, 64, 32, 3) != DNN_SUCCESS ||
set_up_conv_layer(network->layers + 3, espcn_conv3_kernel, espcn_conv3_bias, SIGMOID, 32, 4, 3) != DNN_SUCCESS){
ff_dnn_free_model_native(&model);
return NULL;
}
network->layers[4].type = DEPTH_TO_SPACE;
depth_to_space_params = av_malloc(sizeof(DepthToSpaceParams));
if (!depth_to_space_params){
ff_dnn_free_model_native(&model);
return NULL;
}
depth_to_space_params->block_size = 2;
network->layers[4].params = depth_to_space_params;
}
model->set_input_output = &set_input_output_native;
return model;
}
#define CLAMP_TO_EDGE(x, w) ((x) < 0 ? 0 : ((x) >= (w) ? (w - 1) : (x))) #define CLAMP_TO_EDGE(x, w) ((x) < 0 ? 0 : ((x) >= (w) ? (w - 1) : (x)))
static void convolve(const float *input, float *output, const ConvolutionalParams *conv_params, int width, int height) static void convolve(const float *input, float *output, const ConvolutionalParams *conv_params, int width, int height)

View File

@ -28,11 +28,41 @@
#define AVFILTER_DNN_BACKEND_NATIVE_H #define AVFILTER_DNN_BACKEND_NATIVE_H
#include "dnn_interface.h" #include "dnn_interface.h"
#include "libavformat/avio.h"
typedef enum {INPUT, CONV, DEPTH_TO_SPACE} DNNLayerType;
typedef enum {RELU, TANH, SIGMOID} DNNActivationFunc;
typedef struct Layer{
DNNLayerType type;
float *output;
void *params;
} Layer;
typedef struct ConvolutionalParams{
int32_t input_num, output_num, kernel_size;
DNNActivationFunc activation;
float *kernel;
float *biases;
} ConvolutionalParams;
typedef struct InputParams{
int height, width, channels;
} InputParams;
typedef struct DepthToSpaceParams{
int block_size;
} DepthToSpaceParams;
// Represents simple feed-forward convolutional network.
typedef struct ConvolutionalNetwork{
Layer *layers;
int32_t layers_num;
} ConvolutionalNetwork;
DNNModel *ff_dnn_load_model_native(const char *model_filename); DNNModel *ff_dnn_load_model_native(const char *model_filename);
DNNModel *ff_dnn_load_default_model_native(DNNDefaultModel model_type);
DNNReturnType ff_dnn_execute_model_native(const DNNModel *model); DNNReturnType ff_dnn_execute_model_native(const DNNModel *model);
void ff_dnn_free_model_native(DNNModel **model); void ff_dnn_free_model_native(DNNModel **model);

View File

@ -24,8 +24,7 @@
*/ */
#include "dnn_backend_tf.h" #include "dnn_backend_tf.h"
#include "dnn_srcnn.h" #include "dnn_backend_native.h"
#include "dnn_espcn.h"
#include "libavformat/avio.h" #include "libavformat/avio.h"
#include <tensorflow/c/c_api.h> #include <tensorflow/c/c_api.h>
@ -156,32 +155,14 @@ static DNNReturnType set_input_output_tf(void *model, DNNData *input, DNNData *o
return DNN_SUCCESS; return DNN_SUCCESS;
} }
DNNModel *ff_dnn_load_model_tf(const char *model_filename) static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename)
{ {
DNNModel *model = NULL;
TFModel *tf_model = NULL;
TF_Buffer *graph_def; TF_Buffer *graph_def;
TF_ImportGraphDefOptions *graph_opts; TF_ImportGraphDefOptions *graph_opts;
model = av_malloc(sizeof(DNNModel));
if (!model){
return NULL;
}
tf_model = av_malloc(sizeof(TFModel));
if (!tf_model){
av_freep(&model);
return NULL;
}
tf_model->session = NULL;
tf_model->input_tensor = NULL;
tf_model->output_data = NULL;
graph_def = read_graph(model_filename); graph_def = read_graph(model_filename);
if (!graph_def){ if (!graph_def){
av_freep(&tf_model); return DNN_ERROR;
av_freep(&model);
return NULL;
} }
tf_model->graph = TF_NewGraph(); tf_model->graph = TF_NewGraph();
tf_model->status = TF_NewStatus(); tf_model->status = TF_NewStatus();
@ -192,26 +173,178 @@ DNNModel *ff_dnn_load_model_tf(const char *model_filename)
if (TF_GetCode(tf_model->status) != TF_OK){ if (TF_GetCode(tf_model->status) != TF_OK){
TF_DeleteGraph(tf_model->graph); TF_DeleteGraph(tf_model->graph);
TF_DeleteStatus(tf_model->status); TF_DeleteStatus(tf_model->status);
av_freep(&tf_model); return DNN_ERROR;
av_freep(&model);
return NULL;
} }
model->model = (void *)tf_model; return DNN_SUCCESS;
model->set_input_output = &set_input_output_tf;
return model;
} }
static TF_Operation *add_pad_op(TFModel *tf_model, TF_Operation *input_op, int32_t pad) #define NAME_BUFFER_SIZE 256
static DNNReturnType add_conv_layer(TFModel *tf_model, TF_Operation *transpose_op, TF_Operation **cur_op,
ConvolutionalParams* params, const int layer)
{
TF_Operation *op;
TF_OperationDescription *op_desc;
TF_Output input;
int64_t strides[] = {1, 1, 1, 1};
TF_Tensor *tensor;
int64_t dims[4];
int dims_len;
char name_buffer[NAME_BUFFER_SIZE];
int32_t size;
size = params->input_num * params->output_num * params->kernel_size * params->kernel_size;
input.index = 0;
snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_kernel%d", layer);
op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
dims[0] = params->output_num;
dims[1] = params->kernel_size;
dims[2] = params->kernel_size;
dims[3] = params->input_num;
dims_len = 4;
tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, size * sizeof(float));
memcpy(TF_TensorData(tensor), params->kernel, size * sizeof(float));
TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return DNN_ERROR;
}
op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return DNN_ERROR;
}
snprintf(name_buffer, NAME_BUFFER_SIZE, "transpose%d", layer);
op_desc = TF_NewOperation(tf_model->graph, "Transpose", name_buffer);
input.oper = op;
TF_AddInput(op_desc, input);
input.oper = transpose_op;
TF_AddInput(op_desc, input);
TF_SetAttrType(op_desc, "T", TF_FLOAT);
TF_SetAttrType(op_desc, "Tperm", TF_INT32);
op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return DNN_ERROR;
}
snprintf(name_buffer, NAME_BUFFER_SIZE, "conv2d%d", layer);
op_desc = TF_NewOperation(tf_model->graph, "Conv2D", name_buffer);
input.oper = *cur_op;
TF_AddInput(op_desc, input);
input.oper = op;
TF_AddInput(op_desc, input);
TF_SetAttrType(op_desc, "T", TF_FLOAT);
TF_SetAttrIntList(op_desc, "strides", strides, 4);
TF_SetAttrString(op_desc, "padding", "VALID", 5);
*cur_op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return DNN_ERROR;
}
snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_biases%d", layer);
op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
dims[0] = params->output_num;
dims_len = 1;
tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, params->output_num * sizeof(float));
memcpy(TF_TensorData(tensor), params->biases, params->output_num * sizeof(float));
TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return DNN_ERROR;
}
op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return DNN_ERROR;
}
snprintf(name_buffer, NAME_BUFFER_SIZE, "bias_add%d", layer);
op_desc = TF_NewOperation(tf_model->graph, "BiasAdd", name_buffer);
input.oper = *cur_op;
TF_AddInput(op_desc, input);
input.oper = op;
TF_AddInput(op_desc, input);
TF_SetAttrType(op_desc, "T", TF_FLOAT);
*cur_op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return DNN_ERROR;
}
snprintf(name_buffer, NAME_BUFFER_SIZE, "activation%d", layer);
switch (params->activation){
case RELU:
op_desc = TF_NewOperation(tf_model->graph, "Relu", name_buffer);
break;
case TANH:
op_desc = TF_NewOperation(tf_model->graph, "Tanh", name_buffer);
break;
case SIGMOID:
op_desc = TF_NewOperation(tf_model->graph, "Sigmoid", name_buffer);
break;
default:
return DNN_ERROR;
}
input.oper = *cur_op;
TF_AddInput(op_desc, input);
TF_SetAttrType(op_desc, "T", TF_FLOAT);
*cur_op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return DNN_ERROR;
}
return DNN_SUCCESS;
}
static DNNReturnType add_depth_to_space_layer(TFModel *tf_model, TF_Operation **cur_op,
DepthToSpaceParams *params, const int layer)
{ {
TF_OperationDescription *op_desc; TF_OperationDescription *op_desc;
TF_Output input;
char name_buffer[NAME_BUFFER_SIZE];
snprintf(name_buffer, NAME_BUFFER_SIZE, "depth_to_space%d", layer);
op_desc = TF_NewOperation(tf_model->graph, "DepthToSpace", name_buffer);
input.oper = *cur_op;
input.index = 0;
TF_AddInput(op_desc, input);
TF_SetAttrType(op_desc, "T", TF_FLOAT);
TF_SetAttrInt(op_desc, "block_size", params->block_size);
*cur_op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return DNN_ERROR;
}
return DNN_SUCCESS;
}
static int calculate_pad(const ConvolutionalNetwork *conv_network)
{
ConvolutionalParams *params;
int32_t layer;
int pad = 0;
for (layer = 0; layer < conv_network->layers_num; ++layer){
if (conv_network->layers[layer].type == CONV){
params = (ConvolutionalParams *)conv_network->layers[layer].params;
pad += params->kernel_size >> 1;
}
}
return pad;
}
static DNNReturnType add_pad_op(TFModel *tf_model, TF_Operation **cur_op, const int32_t pad)
{
TF_Operation *op; TF_Operation *op;
TF_Tensor *tensor; TF_Tensor *tensor;
TF_OperationDescription *op_desc;
TF_Output input; TF_Output input;
int32_t *pads; int32_t *pads;
int64_t pads_shape[] = {4, 2}; int64_t pads_shape[] = {4, 2};
input.index = 0;
op_desc = TF_NewOperation(tf_model->graph, "Const", "pads"); op_desc = TF_NewOperation(tf_model->graph, "Const", "pads");
TF_SetAttrType(op_desc, "dtype", TF_INT32); TF_SetAttrType(op_desc, "dtype", TF_INT32);
tensor = TF_AllocateTensor(TF_INT32, pads_shape, 2, 4 * 2 * sizeof(int32_t)); tensor = TF_AllocateTensor(TF_INT32, pads_shape, 2, 4 * 2 * sizeof(int32_t));
@ -222,68 +355,73 @@ static TF_Operation *add_pad_op(TFModel *tf_model, TF_Operation *input_op, int32
pads[6] = 0; pads[7] = 0; pads[6] = 0; pads[7] = 0;
TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status); TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){ if (TF_GetCode(tf_model->status) != TF_OK){
return NULL; return DNN_ERROR;
} }
op = TF_FinishOperation(op_desc, tf_model->status); op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){ if (TF_GetCode(tf_model->status) != TF_OK){
return NULL; return DNN_ERROR;
} }
op_desc = TF_NewOperation(tf_model->graph, "MirrorPad", "mirror_pad"); op_desc = TF_NewOperation(tf_model->graph, "MirrorPad", "mirror_pad");
input.oper = input_op; input.oper = *cur_op;
input.index = 0;
TF_AddInput(op_desc, input); TF_AddInput(op_desc, input);
input.oper = op; input.oper = op;
TF_AddInput(op_desc, input); TF_AddInput(op_desc, input);
TF_SetAttrType(op_desc, "T", TF_FLOAT); TF_SetAttrType(op_desc, "T", TF_FLOAT);
TF_SetAttrType(op_desc, "Tpaddings", TF_INT32); TF_SetAttrType(op_desc, "Tpaddings", TF_INT32);
TF_SetAttrString(op_desc, "mode", "SYMMETRIC", 9); TF_SetAttrString(op_desc, "mode", "SYMMETRIC", 9);
op = TF_FinishOperation(op_desc, tf_model->status); *cur_op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){ if (TF_GetCode(tf_model->status) != TF_OK){
return NULL; return DNN_ERROR;
} }
return op; return DNN_SUCCESS;
} }
static TF_Operation *add_const_op(TFModel *tf_model, const float *values, const int64_t *dims, int dims_len, const char *name) static DNNReturnType load_native_model(TFModel *tf_model, const char *model_filename)
{ {
int dim; int32_t layer;
TF_OperationDescription *op_desc;
TF_Tensor *tensor;
size_t len;
op_desc = TF_NewOperation(tf_model->graph, "Const", name);
TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
len = sizeof(float);
for (dim = 0; dim < dims_len; ++dim){
len *= dims[dim];
}
tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, len);
memcpy(TF_TensorData(tensor), values, len);
TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return NULL;
}
return TF_FinishOperation(op_desc, tf_model->status);
}
static TF_Operation* add_conv_layers(TFModel *tf_model, const float **consts, const int64_t **consts_dims,
const int *consts_dims_len, const char **activations,
TF_Operation *input_op, int layers_num)
{
int i;
TF_OperationDescription *op_desc; TF_OperationDescription *op_desc;
TF_Operation *op; TF_Operation *op;
TF_Operation *transpose_op; TF_Operation *transpose_op;
TF_Output input;
int64_t strides[] = {1, 1, 1, 1};
int32_t *transpose_perm;
TF_Tensor *tensor; TF_Tensor *tensor;
TF_Output input;
int32_t *transpose_perm;
int64_t transpose_perm_shape[] = {4}; int64_t transpose_perm_shape[] = {4};
#define NAME_BUFF_SIZE 256 int64_t input_shape[] = {1, -1, -1, -1};
char name_buffer[NAME_BUFF_SIZE]; int32_t pad;
DNNReturnType layer_add_res;
DNNModel *native_model = NULL;
ConvolutionalNetwork *conv_network;
native_model = ff_dnn_load_model_native(model_filename);
if (!native_model){
return DNN_ERROR;
}
conv_network = (ConvolutionalNetwork *)native_model->model;
pad = calculate_pad(conv_network);
tf_model->graph = TF_NewGraph();
tf_model->status = TF_NewStatus();
#define CLEANUP_ON_ERROR(tf_model) \
{ \
TF_DeleteGraph(tf_model->graph); \
TF_DeleteStatus(tf_model->status); \
return DNN_ERROR; \
}
op_desc = TF_NewOperation(tf_model->graph, "Placeholder", "x");
TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
TF_SetAttrShape(op_desc, "shape", input_shape, 4);
op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
CLEANUP_ON_ERROR(tf_model);
}
if (add_pad_op(tf_model, &op, pad) != DNN_SUCCESS){
CLEANUP_ON_ERROR(tf_model);
}
op_desc = TF_NewOperation(tf_model->graph, "Const", "transpose_perm"); op_desc = TF_NewOperation(tf_model->graph, "Const", "transpose_perm");
TF_SetAttrType(op_desc, "dtype", TF_INT32); TF_SetAttrType(op_desc, "dtype", TF_INT32);
@ -295,153 +433,48 @@ static TF_Operation* add_conv_layers(TFModel *tf_model, const float **consts, co
transpose_perm[3] = 0; transpose_perm[3] = 0;
TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status); TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){ if (TF_GetCode(tf_model->status) != TF_OK){
return NULL; CLEANUP_ON_ERROR(tf_model);
} }
transpose_op = TF_FinishOperation(op_desc, tf_model->status); transpose_op = TF_FinishOperation(op_desc, tf_model->status);
for (layer = 0; layer < conv_network->layers_num; ++layer){
switch (conv_network->layers[layer].type){
case INPUT:
break;
case CONV:
layer_add_res = add_conv_layer(tf_model, transpose_op, &op,
(ConvolutionalParams *)conv_network->layers[layer].params, layer);
break;
case DEPTH_TO_SPACE:
layer_add_res = add_depth_to_space_layer(tf_model, &op,
(DepthToSpaceParams *)conv_network->layers[layer].params, layer);
break;
default:
CLEANUP_ON_ERROR(tf_model);
}
if (layer_add_res != DNN_SUCCESS){
CLEANUP_ON_ERROR(tf_model);
}
}
op_desc = TF_NewOperation(tf_model->graph, "Identity", "y");
input.oper = op;
TF_AddInput(op_desc, input);
TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){ if (TF_GetCode(tf_model->status) != TF_OK){
return NULL; CLEANUP_ON_ERROR(tf_model);
} }
input.index = 0; ff_dnn_free_model_native(&native_model);
for (i = 0; i < layers_num; ++i){
snprintf(name_buffer, NAME_BUFF_SIZE, "conv_kernel%d", i);
op = add_const_op(tf_model, consts[i << 1], consts_dims[i << 1], consts_dims_len[i << 1], name_buffer);
if (TF_GetCode(tf_model->status) != TF_OK || op == NULL){
return NULL;
}
snprintf(name_buffer, NAME_BUFF_SIZE, "transpose%d", i); return DNN_SUCCESS;
op_desc = TF_NewOperation(tf_model->graph, "Transpose", name_buffer);
input.oper = op;
TF_AddInput(op_desc, input);
input.oper = transpose_op;
TF_AddInput(op_desc, input);
TF_SetAttrType(op_desc, "T", TF_FLOAT);
TF_SetAttrType(op_desc, "Tperm", TF_INT32);
op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return NULL;
}
snprintf(name_buffer, NAME_BUFF_SIZE, "conv2d%d", i);
op_desc = TF_NewOperation(tf_model->graph, "Conv2D", name_buffer);
input.oper = input_op;
TF_AddInput(op_desc, input);
input.oper = op;
TF_AddInput(op_desc, input);
TF_SetAttrType(op_desc, "T", TF_FLOAT);
TF_SetAttrIntList(op_desc, "strides", strides, 4);
TF_SetAttrString(op_desc, "padding", "VALID", 5);
input_op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return NULL;
}
snprintf(name_buffer, NAME_BUFF_SIZE, "conv_biases%d", i);
op = add_const_op(tf_model, consts[(i << 1) + 1], consts_dims[(i << 1) + 1], consts_dims_len[(i << 1) + 1], name_buffer);
if (TF_GetCode(tf_model->status) != TF_OK || op == NULL){
return NULL;
}
snprintf(name_buffer, NAME_BUFF_SIZE, "bias_add%d", i);
op_desc = TF_NewOperation(tf_model->graph, "BiasAdd", name_buffer);
input.oper = input_op;
TF_AddInput(op_desc, input);
input.oper = op;
TF_AddInput(op_desc, input);
TF_SetAttrType(op_desc, "T", TF_FLOAT);
input_op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return NULL;
}
snprintf(name_buffer, NAME_BUFF_SIZE, "activation%d", i);
op_desc = TF_NewOperation(tf_model->graph, activations[i], name_buffer);
input.oper = input_op;
TF_AddInput(op_desc, input);
TF_SetAttrType(op_desc, "T", TF_FLOAT);
input_op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return NULL;
}
}
return input_op;
} }
DNNModel *ff_dnn_load_default_model_tf(DNNDefaultModel model_type) DNNModel *ff_dnn_load_model_tf(const char *model_filename)
{ {
DNNModel *model = NULL; DNNModel *model = NULL;
TFModel *tf_model = NULL; TFModel *tf_model = NULL;
TF_OperationDescription *op_desc;
TF_Operation *op;
TF_Output input;
static const int64_t input_shape[] = {1, -1, -1, 1};
static const char tanh[] = "Tanh";
static const char sigmoid[] = "Sigmoid";
static const char relu[] = "Relu";
static const float *srcnn_consts[] = {
srcnn_conv1_kernel,
srcnn_conv1_bias,
srcnn_conv2_kernel,
srcnn_conv2_bias,
srcnn_conv3_kernel,
srcnn_conv3_bias
};
static const long int *srcnn_consts_dims[] = {
srcnn_conv1_kernel_dims,
srcnn_conv1_bias_dims,
srcnn_conv2_kernel_dims,
srcnn_conv2_bias_dims,
srcnn_conv3_kernel_dims,
srcnn_conv3_bias_dims
};
static const int srcnn_consts_dims_len[] = {
4,
1,
4,
1,
4,
1
};
static const char *srcnn_activations[] = {
relu,
relu,
relu
};
static const float *espcn_consts[] = {
espcn_conv1_kernel,
espcn_conv1_bias,
espcn_conv2_kernel,
espcn_conv2_bias,
espcn_conv3_kernel,
espcn_conv3_bias
};
static const long int *espcn_consts_dims[] = {
espcn_conv1_kernel_dims,
espcn_conv1_bias_dims,
espcn_conv2_kernel_dims,
espcn_conv2_bias_dims,
espcn_conv3_kernel_dims,
espcn_conv3_bias_dims
};
static const int espcn_consts_dims_len[] = {
4,
1,
4,
1,
4,
1
};
static const char *espcn_activations[] = {
tanh,
tanh,
sigmoid
};
input.index = 0;
model = av_malloc(sizeof(DNNModel)); model = av_malloc(sizeof(DNNModel));
if (!model){ if (!model){
@ -457,70 +490,13 @@ DNNModel *ff_dnn_load_default_model_tf(DNNDefaultModel model_type)
tf_model->input_tensor = NULL; tf_model->input_tensor = NULL;
tf_model->output_data = NULL; tf_model->output_data = NULL;
tf_model->graph = TF_NewGraph(); if (load_tf_model(tf_model, model_filename) != DNN_SUCCESS){
tf_model->status = TF_NewStatus(); if (load_native_model(tf_model, model_filename) != DNN_SUCCESS){
av_freep(&tf_model);
av_freep(&model);
#define CLEANUP_ON_ERROR(tf_model, model) { \ return NULL;
TF_DeleteGraph(tf_model->graph); \
TF_DeleteStatus(tf_model->status); \
av_freep(&tf_model); \
av_freep(&model); \
return NULL; \
}
op_desc = TF_NewOperation(tf_model->graph, "Placeholder", "x");
TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
TF_SetAttrShape(op_desc, "shape", input_shape, 4);
op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
CLEANUP_ON_ERROR(tf_model, model);
}
switch (model_type){
case DNN_SRCNN:
op = add_pad_op(tf_model, op, 6);
if (!op){
CLEANUP_ON_ERROR(tf_model, model);
} }
op = add_conv_layers(tf_model, srcnn_consts,
srcnn_consts_dims, srcnn_consts_dims_len,
srcnn_activations, op, 3);
if (!op){
CLEANUP_ON_ERROR(tf_model, model);
}
break;
case DNN_ESPCN:
op = add_pad_op(tf_model, op, 4);
if (!op){
CLEANUP_ON_ERROR(tf_model, model);
}
op = add_conv_layers(tf_model, espcn_consts,
espcn_consts_dims, espcn_consts_dims_len,
espcn_activations, op, 3);
if (!op){
CLEANUP_ON_ERROR(tf_model, model);
}
op_desc = TF_NewOperation(tf_model->graph, "DepthToSpace", "depth_to_space");
input.oper = op;
TF_AddInput(op_desc, input);
TF_SetAttrType(op_desc, "T", TF_FLOAT);
TF_SetAttrInt(op_desc, "block_size", 2);
op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
CLEANUP_ON_ERROR(tf_model, model);
}
break;
default:
CLEANUP_ON_ERROR(tf_model, model);
}
op_desc = TF_NewOperation(tf_model->graph, "Identity", "y");
input.oper = op;
TF_AddInput(op_desc, input);
TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
CLEANUP_ON_ERROR(tf_model, model);
} }
model->model = (void *)tf_model; model->model = (void *)tf_model;
@ -529,6 +505,8 @@ DNNModel *ff_dnn_load_default_model_tf(DNNDefaultModel model_type)
return model; return model;
} }
DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model) DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model)
{ {
TFModel *tf_model = (TFModel *)model->model; TFModel *tf_model = (TFModel *)model->model;
@ -572,7 +550,7 @@ void ff_dnn_free_model_tf(DNNModel **model)
TF_DeleteTensor(tf_model->input_tensor); TF_DeleteTensor(tf_model->input_tensor);
} }
if (tf_model->output_data){ if (tf_model->output_data){
av_freep(&(tf_model->output_data->data)); av_freep(&tf_model->output_data->data);
} }
av_freep(&tf_model); av_freep(&tf_model);
av_freep(model); av_freep(model);

View File

@ -31,8 +31,6 @@
DNNModel *ff_dnn_load_model_tf(const char *model_filename); DNNModel *ff_dnn_load_model_tf(const char *model_filename);
DNNModel *ff_dnn_load_default_model_tf(DNNDefaultModel model_type);
DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model); DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model);
void ff_dnn_free_model_tf(DNNModel **model); void ff_dnn_free_model_tf(DNNModel **model);

File diff suppressed because it is too large Load Diff

View File

@ -40,14 +40,12 @@ DNNModule *ff_get_dnn_module(DNNBackendType backend_type)
switch(backend_type){ switch(backend_type){
case DNN_NATIVE: case DNN_NATIVE:
dnn_module->load_model = &ff_dnn_load_model_native; dnn_module->load_model = &ff_dnn_load_model_native;
dnn_module->load_default_model = &ff_dnn_load_default_model_native;
dnn_module->execute_model = &ff_dnn_execute_model_native; dnn_module->execute_model = &ff_dnn_execute_model_native;
dnn_module->free_model = &ff_dnn_free_model_native; dnn_module->free_model = &ff_dnn_free_model_native;
break; break;
case DNN_TF: case DNN_TF:
#if (CONFIG_LIBTENSORFLOW == 1) #if (CONFIG_LIBTENSORFLOW == 1)
dnn_module->load_model = &ff_dnn_load_model_tf; dnn_module->load_model = &ff_dnn_load_model_tf;
dnn_module->load_default_model = &ff_dnn_load_default_model_tf;
dnn_module->execute_model = &ff_dnn_execute_model_tf; dnn_module->execute_model = &ff_dnn_execute_model_tf;
dnn_module->free_model = &ff_dnn_free_model_tf; dnn_module->free_model = &ff_dnn_free_model_tf;
#else #else

View File

@ -30,8 +30,6 @@ typedef enum {DNN_SUCCESS, DNN_ERROR} DNNReturnType;
typedef enum {DNN_NATIVE, DNN_TF} DNNBackendType; typedef enum {DNN_NATIVE, DNN_TF} DNNBackendType;
typedef enum {DNN_SRCNN, DNN_ESPCN} DNNDefaultModel;
typedef struct DNNData{ typedef struct DNNData{
float *data; float *data;
int width, height, channels; int width, height, channels;
@ -49,8 +47,6 @@ typedef struct DNNModel{
typedef struct DNNModule{ typedef struct DNNModule{
// Loads model and parameters from given file. Returns NULL if it is not possible. // Loads model and parameters from given file. Returns NULL if it is not possible.
DNNModel *(*load_model)(const char *model_filename); DNNModel *(*load_model)(const char *model_filename);
// Loads one of the default models
DNNModel *(*load_default_model)(DNNDefaultModel model_type);
// Executes model with specified input and output. Returns DNN_ERROR otherwise. // Executes model with specified input and output. Returns DNN_ERROR otherwise.
DNNReturnType (*execute_model)(const DNNModel *model); DNNReturnType (*execute_model)(const DNNModel *model);
// Frees memory allocated for model. // Frees memory allocated for model.

File diff suppressed because it is too large Load Diff

View File

@ -33,12 +33,9 @@
#include "libswscale/swscale.h" #include "libswscale/swscale.h"
#include "dnn_interface.h" #include "dnn_interface.h"
typedef enum {SRCNN, ESPCN} SRModel;
typedef struct SRContext { typedef struct SRContext {
const AVClass *class; const AVClass *class;
SRModel model_type;
char *model_filename; char *model_filename;
DNNBackendType backend_type; DNNBackendType backend_type;
DNNModule *dnn_module; DNNModule *dnn_module;
@ -52,16 +49,13 @@ typedef struct SRContext {
#define OFFSET(x) offsetof(SRContext, x) #define OFFSET(x) offsetof(SRContext, x)
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
static const AVOption sr_options[] = { static const AVOption sr_options[] = {
{ "model", "specifies what DNN model to use", OFFSET(model_type), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, 0, 1, FLAGS, "model_type" },
{ "srcnn", "Super-Resolution Convolutional Neural Network model (scale factor should be specified for custom SRCNN model)", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "model_type" },
{ "espcn", "Efficient Sub-Pixel Convolutional Neural Network model", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "model_type" },
{ "dnn_backend", "DNN backend used for model execution", OFFSET(backend_type), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, 0, 1, FLAGS, "backend" }, { "dnn_backend", "DNN backend used for model execution", OFFSET(backend_type), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, 0, 1, FLAGS, "backend" },
{ "native", "native backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "backend" }, { "native", "native backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "backend" },
#if (CONFIG_LIBTENSORFLOW == 1) #if (CONFIG_LIBTENSORFLOW == 1)
{ "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "backend" }, { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "backend" },
#endif #endif
{"scale_factor", "scale factor for SRCNN model", OFFSET(scale_factor), AV_OPT_TYPE_INT, { .i64 = 2 }, 2, 4, FLAGS}, { "scale_factor", "scale factor for SRCNN model", OFFSET(scale_factor), AV_OPT_TYPE_INT, { .i64 = 2 }, 2, 4, FLAGS },
{ "model_filename", "path to model file specifying network architecture and its parameters", OFFSET(model_filename), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS }, { "model", "path to model file specifying network architecture and its parameters", OFFSET(model_filename), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
{ NULL } { NULL }
}; };
@ -77,15 +71,8 @@ static av_cold int init(AVFilterContext *context)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
} }
if (!sr_context->model_filename){ if (!sr_context->model_filename){
av_log(context, AV_LOG_VERBOSE, "model file for network was not specified, using default network for x2 upsampling\n"); av_log(context, AV_LOG_ERROR, "model file for network was not specified\n");
sr_context->scale_factor = 2; return AVERROR(EIO);
switch (sr_context->model_type){
case SRCNN:
sr_context->model = (sr_context->dnn_module->load_default_model)(DNN_SRCNN);
break;
case ESPCN:
sr_context->model = (sr_context->dnn_module->load_default_model)(DNN_ESPCN);
}
} }
else{ else{
sr_context->model = (sr_context->dnn_module->load_model)(sr_context->model_filename); sr_context->model = (sr_context->dnn_module->load_model)(sr_context->model_filename);
@ -126,15 +113,8 @@ static int config_props(AVFilterLink *inlink)
DNNReturnType result; DNNReturnType result;
int sws_src_h, sws_src_w, sws_dst_h, sws_dst_w; int sws_src_h, sws_src_w, sws_dst_h, sws_dst_w;
switch (sr_context->model_type){ sr_context->input.width = inlink->w * sr_context->scale_factor;
case SRCNN: sr_context->input.height = inlink->h * sr_context->scale_factor;
sr_context->input.width = inlink->w * sr_context->scale_factor;
sr_context->input.height = inlink->h * sr_context->scale_factor;
break;
case ESPCN:
sr_context->input.width = inlink->w;
sr_context->input.height = inlink->h;
}
sr_context->input.channels = 1; sr_context->input.channels = 1;
result = (sr_context->model->set_input_output)(sr_context->model->model, &sr_context->input, &sr_context->output); result = (sr_context->model->set_input_output)(sr_context->model->model, &sr_context->input, &sr_context->output);
@ -143,6 +123,16 @@ static int config_props(AVFilterLink *inlink)
return AVERROR(EIO); return AVERROR(EIO);
} }
else{ else{
if (sr_context->input.height != sr_context->output.height || sr_context->input.width != sr_context->output.width){
sr_context->input.width = inlink->w;
sr_context->input.height = inlink->h;
result = (sr_context->model->set_input_output)(sr_context->model->model, &sr_context->input, &sr_context->output);
if (result != DNN_SUCCESS){
av_log(context, AV_LOG_ERROR, "could not set input and output for the model\n");
return AVERROR(EIO);
}
sr_context->scale_factor = 0;
}
outlink->h = sr_context->output.height; outlink->h = sr_context->output.height;
outlink->w = sr_context->output.width; outlink->w = sr_context->output.width;
sr_context->sws_contexts[1] = sws_getContext(sr_context->input.width, sr_context->input.height, AV_PIX_FMT_GRAY8, sr_context->sws_contexts[1] = sws_getContext(sr_context->input.width, sr_context->input.height, AV_PIX_FMT_GRAY8,
@ -157,8 +147,7 @@ static int config_props(AVFilterLink *inlink)
av_log(context, AV_LOG_ERROR, "could not create SwsContext for conversions\n"); av_log(context, AV_LOG_ERROR, "could not create SwsContext for conversions\n");
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
} }
switch (sr_context->model_type){ if (sr_context->scale_factor){
case SRCNN:
sr_context->sws_contexts[0] = sws_getContext(inlink->w, inlink->h, inlink->format, sr_context->sws_contexts[0] = sws_getContext(inlink->w, inlink->h, inlink->format,
outlink->w, outlink->h, outlink->format, outlink->w, outlink->h, outlink->format,
SWS_BICUBIC, NULL, NULL, NULL); SWS_BICUBIC, NULL, NULL, NULL);
@ -167,8 +156,8 @@ static int config_props(AVFilterLink *inlink)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
} }
sr_context->sws_slice_h = inlink->h; sr_context->sws_slice_h = inlink->h;
break; }
case ESPCN: else{
if (inlink->format != AV_PIX_FMT_GRAY8){ if (inlink->format != AV_PIX_FMT_GRAY8){
sws_src_h = sr_context->input.height; sws_src_h = sr_context->input.height;
sws_src_w = sr_context->input.width; sws_src_w = sr_context->input.width;
@ -233,15 +222,14 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
av_frame_copy_props(out, in); av_frame_copy_props(out, in);
out->height = sr_context->output.height; out->height = sr_context->output.height;
out->width = sr_context->output.width; out->width = sr_context->output.width;
switch (sr_context->model_type){ if (sr_context->scale_factor){
case SRCNN:
sws_scale(sr_context->sws_contexts[0], (const uint8_t **)in->data, in->linesize, sws_scale(sr_context->sws_contexts[0], (const uint8_t **)in->data, in->linesize,
0, sr_context->sws_slice_h, out->data, out->linesize); 0, sr_context->sws_slice_h, out->data, out->linesize);
sws_scale(sr_context->sws_contexts[1], (const uint8_t **)out->data, out->linesize, sws_scale(sr_context->sws_contexts[1], (const uint8_t **)out->data, out->linesize,
0, out->height, (uint8_t * const*)(&sr_context->input.data), &sr_context->sws_input_linesize); 0, out->height, (uint8_t * const*)(&sr_context->input.data), &sr_context->sws_input_linesize);
break; }
case ESPCN: else{
if (sr_context->sws_contexts[0]){ if (sr_context->sws_contexts[0]){
sws_scale(sr_context->sws_contexts[0], (const uint8_t **)(in->data + 1), in->linesize + 1, sws_scale(sr_context->sws_contexts[0], (const uint8_t **)(in->data + 1), in->linesize + 1,
0, sr_context->sws_slice_h, out->data + 1, out->linesize + 1); 0, sr_context->sws_slice_h, out->data + 1, out->linesize + 1);