mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
libavfilter: Removes stored DNN models. Adds support for native backend model file format in tf backend.
Signed-off-by: Pedro Arthur <bygrandao@gmail.com>
This commit is contained in:
parent
bc1097a2bf
commit
bd10c1e9a8
@ -15593,7 +15593,17 @@ option may cause flicker since the B-Frames have often larger QP. Default is
|
|||||||
@section sr
|
@section sr
|
||||||
|
|
||||||
Scale the input by applying one of the super-resolution methods based on
|
Scale the input by applying one of the super-resolution methods based on
|
||||||
convolutional neural networks.
|
convolutional neural networks. Supported models:
|
||||||
|
|
||||||
|
@itemize
|
||||||
|
@item
|
||||||
|
Super-Resolution Convolutional Neural Network model (SRCNN).
|
||||||
|
See @url{https://arxiv.org/abs/1501.00092}.
|
||||||
|
|
||||||
|
@item
|
||||||
|
Efficient Sub-Pixel Convolutional Neural Network model (ESPCN).
|
||||||
|
See @url{https://arxiv.org/abs/1609.05158}.
|
||||||
|
@end itemize
|
||||||
|
|
||||||
Training scripts as well as scripts for model generation are provided in
|
Training scripts as well as scripts for model generation are provided in
|
||||||
the repository at @url{https://github.com/HighVoltageRocknRoll/sr.git}.
|
the repository at @url{https://github.com/HighVoltageRocknRoll/sr.git}.
|
||||||
@ -15601,22 +15611,6 @@ the repository at @url{https://github.com/HighVoltageRocknRoll/sr.git}.
|
|||||||
The filter accepts the following options:
|
The filter accepts the following options:
|
||||||
|
|
||||||
@table @option
|
@table @option
|
||||||
@item model
|
|
||||||
Specify which super-resolution model to use. This option accepts the following values:
|
|
||||||
|
|
||||||
@table @samp
|
|
||||||
@item srcnn
|
|
||||||
Super-Resolution Convolutional Neural Network model.
|
|
||||||
See @url{https://arxiv.org/abs/1501.00092}.
|
|
||||||
|
|
||||||
@item espcn
|
|
||||||
Efficient Sub-Pixel Convolutional Neural Network model.
|
|
||||||
See @url{https://arxiv.org/abs/1609.05158}.
|
|
||||||
|
|
||||||
@end table
|
|
||||||
|
|
||||||
Default value is @samp{srcnn}.
|
|
||||||
|
|
||||||
@item dnn_backend
|
@item dnn_backend
|
||||||
Specify which DNN backend to use for model loading and execution. This option accepts
|
Specify which DNN backend to use for model loading and execution. This option accepts
|
||||||
the following values:
|
the following values:
|
||||||
@ -15630,23 +15624,20 @@ TensorFlow backend. To enable this backend you
|
|||||||
need to install the TensorFlow for C library (see
|
need to install the TensorFlow for C library (see
|
||||||
@url{https://www.tensorflow.org/install/install_c}) and configure FFmpeg with
|
@url{https://www.tensorflow.org/install/install_c}) and configure FFmpeg with
|
||||||
@code{--enable-libtensorflow}
|
@code{--enable-libtensorflow}
|
||||||
|
|
||||||
@end table
|
@end table
|
||||||
|
|
||||||
Default value is @samp{native}.
|
Default value is @samp{native}.
|
||||||
|
|
||||||
@item scale_factor
|
@item model
|
||||||
Set scale factor for SRCNN model, for which custom model file was provided.
|
|
||||||
Allowed values are @code{2}, @code{3} and @code{4}. Default value is @code{2}.
|
|
||||||
Scale factor is necessary for SRCNN model, because it accepts input upscaled
|
|
||||||
using bicubic upscaling with proper scale factor.
|
|
||||||
|
|
||||||
@item model_filename
|
|
||||||
Set path to model file specifying network architecture and its parameters.
|
Set path to model file specifying network architecture and its parameters.
|
||||||
Note that different backends use different file formats. TensorFlow backend
|
Note that different backends use different file formats. TensorFlow backend
|
||||||
can load files for both formats, while native backend can load files for only
|
can load files for both formats, while native backend can load files for only
|
||||||
its format.
|
its format.
|
||||||
|
|
||||||
|
@item scale_factor
|
||||||
|
Set scale factor for SRCNN model. Allowed values are @code{2}, @code{3} and @code{4}.
|
||||||
|
Default value is @code{2}. Scale factor is necessary for SRCNN model, because it accepts
|
||||||
|
input upscaled using bicubic upscaling with proper scale factor.
|
||||||
@end table
|
@end table
|
||||||
|
|
||||||
@anchor{subtitles}
|
@anchor{subtitles}
|
||||||
|
@ -24,40 +24,6 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "dnn_backend_native.h"
|
#include "dnn_backend_native.h"
|
||||||
#include "dnn_srcnn.h"
|
|
||||||
#include "dnn_espcn.h"
|
|
||||||
#include "libavformat/avio.h"
|
|
||||||
|
|
||||||
typedef enum {INPUT, CONV, DEPTH_TO_SPACE} LayerType;
|
|
||||||
|
|
||||||
typedef enum {RELU, TANH, SIGMOID} ActivationFunc;
|
|
||||||
|
|
||||||
typedef struct Layer{
|
|
||||||
LayerType type;
|
|
||||||
float *output;
|
|
||||||
void *params;
|
|
||||||
} Layer;
|
|
||||||
|
|
||||||
typedef struct ConvolutionalParams{
|
|
||||||
int32_t input_num, output_num, kernel_size;
|
|
||||||
ActivationFunc activation;
|
|
||||||
float *kernel;
|
|
||||||
float *biases;
|
|
||||||
} ConvolutionalParams;
|
|
||||||
|
|
||||||
typedef struct InputParams{
|
|
||||||
int height, width, channels;
|
|
||||||
} InputParams;
|
|
||||||
|
|
||||||
typedef struct DepthToSpaceParams{
|
|
||||||
int block_size;
|
|
||||||
} DepthToSpaceParams;
|
|
||||||
|
|
||||||
// Represents simple feed-forward convolutional network.
|
|
||||||
typedef struct ConvolutionalNetwork{
|
|
||||||
Layer *layers;
|
|
||||||
int32_t layers_num;
|
|
||||||
} ConvolutionalNetwork;
|
|
||||||
|
|
||||||
static DNNReturnType set_input_output_native(void *model, DNNData *input, DNNData *output)
|
static DNNReturnType set_input_output_native(void *model, DNNData *input, DNNData *output)
|
||||||
{
|
{
|
||||||
@ -134,7 +100,7 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
|
|||||||
AVIOContext *model_file_context;
|
AVIOContext *model_file_context;
|
||||||
int file_size, dnn_size, kernel_size, i;
|
int file_size, dnn_size, kernel_size, i;
|
||||||
int32_t layer;
|
int32_t layer;
|
||||||
LayerType layer_type;
|
DNNLayerType layer_type;
|
||||||
ConvolutionalParams *conv_params;
|
ConvolutionalParams *conv_params;
|
||||||
DepthToSpaceParams *depth_to_space_params;
|
DepthToSpaceParams *depth_to_space_params;
|
||||||
|
|
||||||
@ -251,118 +217,6 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
|
|||||||
return model;
|
return model;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int set_up_conv_layer(Layer *layer, const float *kernel, const float *biases, ActivationFunc activation,
|
|
||||||
int32_t input_num, int32_t output_num, int32_t size)
|
|
||||||
{
|
|
||||||
ConvolutionalParams *conv_params;
|
|
||||||
int kernel_size;
|
|
||||||
|
|
||||||
conv_params = av_malloc(sizeof(ConvolutionalParams));
|
|
||||||
if (!conv_params){
|
|
||||||
return DNN_ERROR;
|
|
||||||
}
|
|
||||||
conv_params->activation = activation;
|
|
||||||
conv_params->input_num = input_num;
|
|
||||||
conv_params->output_num = output_num;
|
|
||||||
conv_params->kernel_size = size;
|
|
||||||
kernel_size = input_num * output_num * size * size;
|
|
||||||
conv_params->kernel = av_malloc(kernel_size * sizeof(float));
|
|
||||||
conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
|
|
||||||
if (!conv_params->kernel || !conv_params->biases){
|
|
||||||
av_freep(&conv_params->kernel);
|
|
||||||
av_freep(&conv_params->biases);
|
|
||||||
av_freep(&conv_params);
|
|
||||||
return DNN_ERROR;
|
|
||||||
}
|
|
||||||
memcpy(conv_params->kernel, kernel, kernel_size * sizeof(float));
|
|
||||||
memcpy(conv_params->biases, biases, output_num * sizeof(float));
|
|
||||||
layer->type = CONV;
|
|
||||||
layer->params = conv_params;
|
|
||||||
|
|
||||||
return DNN_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
DNNModel *ff_dnn_load_default_model_native(DNNDefaultModel model_type)
|
|
||||||
{
|
|
||||||
DNNModel *model = NULL;
|
|
||||||
ConvolutionalNetwork *network = NULL;
|
|
||||||
DepthToSpaceParams *depth_to_space_params;
|
|
||||||
int32_t layer;
|
|
||||||
|
|
||||||
model = av_malloc(sizeof(DNNModel));
|
|
||||||
if (!model){
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
network = av_malloc(sizeof(ConvolutionalNetwork));
|
|
||||||
if (!network){
|
|
||||||
av_freep(&model);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
model->model = (void *)network;
|
|
||||||
|
|
||||||
switch (model_type){
|
|
||||||
case DNN_SRCNN:
|
|
||||||
network->layers_num = 4;
|
|
||||||
break;
|
|
||||||
case DNN_ESPCN:
|
|
||||||
network->layers_num = 5;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
av_freep(&network);
|
|
||||||
av_freep(&model);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
network->layers = av_malloc(network->layers_num * sizeof(Layer));
|
|
||||||
if (!network->layers){
|
|
||||||
av_freep(&network);
|
|
||||||
av_freep(&model);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (layer = 0; layer < network->layers_num; ++layer){
|
|
||||||
network->layers[layer].output = NULL;
|
|
||||||
network->layers[layer].params = NULL;
|
|
||||||
}
|
|
||||||
network->layers[0].type = INPUT;
|
|
||||||
network->layers[0].params = av_malloc(sizeof(InputParams));
|
|
||||||
if (!network->layers[0].params){
|
|
||||||
ff_dnn_free_model_native(&model);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (model_type){
|
|
||||||
case DNN_SRCNN:
|
|
||||||
if (set_up_conv_layer(network->layers + 1, srcnn_conv1_kernel, srcnn_conv1_bias, RELU, 1, 64, 9) != DNN_SUCCESS ||
|
|
||||||
set_up_conv_layer(network->layers + 2, srcnn_conv2_kernel, srcnn_conv2_bias, RELU, 64, 32, 1) != DNN_SUCCESS ||
|
|
||||||
set_up_conv_layer(network->layers + 3, srcnn_conv3_kernel, srcnn_conv3_bias, RELU, 32, 1, 5) != DNN_SUCCESS){
|
|
||||||
ff_dnn_free_model_native(&model);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case DNN_ESPCN:
|
|
||||||
if (set_up_conv_layer(network->layers + 1, espcn_conv1_kernel, espcn_conv1_bias, TANH, 1, 64, 5) != DNN_SUCCESS ||
|
|
||||||
set_up_conv_layer(network->layers + 2, espcn_conv2_kernel, espcn_conv2_bias, TANH, 64, 32, 3) != DNN_SUCCESS ||
|
|
||||||
set_up_conv_layer(network->layers + 3, espcn_conv3_kernel, espcn_conv3_bias, SIGMOID, 32, 4, 3) != DNN_SUCCESS){
|
|
||||||
ff_dnn_free_model_native(&model);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
network->layers[4].type = DEPTH_TO_SPACE;
|
|
||||||
depth_to_space_params = av_malloc(sizeof(DepthToSpaceParams));
|
|
||||||
if (!depth_to_space_params){
|
|
||||||
ff_dnn_free_model_native(&model);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
depth_to_space_params->block_size = 2;
|
|
||||||
network->layers[4].params = depth_to_space_params;
|
|
||||||
}
|
|
||||||
|
|
||||||
model->set_input_output = &set_input_output_native;
|
|
||||||
|
|
||||||
return model;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define CLAMP_TO_EDGE(x, w) ((x) < 0 ? 0 : ((x) >= (w) ? (w - 1) : (x)))
|
#define CLAMP_TO_EDGE(x, w) ((x) < 0 ? 0 : ((x) >= (w) ? (w - 1) : (x)))
|
||||||
|
|
||||||
static void convolve(const float *input, float *output, const ConvolutionalParams *conv_params, int width, int height)
|
static void convolve(const float *input, float *output, const ConvolutionalParams *conv_params, int width, int height)
|
||||||
|
@ -28,11 +28,41 @@
|
|||||||
#define AVFILTER_DNN_BACKEND_NATIVE_H
|
#define AVFILTER_DNN_BACKEND_NATIVE_H
|
||||||
|
|
||||||
#include "dnn_interface.h"
|
#include "dnn_interface.h"
|
||||||
|
#include "libavformat/avio.h"
|
||||||
|
|
||||||
|
typedef enum {INPUT, CONV, DEPTH_TO_SPACE} DNNLayerType;
|
||||||
|
|
||||||
|
typedef enum {RELU, TANH, SIGMOID} DNNActivationFunc;
|
||||||
|
|
||||||
|
typedef struct Layer{
|
||||||
|
DNNLayerType type;
|
||||||
|
float *output;
|
||||||
|
void *params;
|
||||||
|
} Layer;
|
||||||
|
|
||||||
|
typedef struct ConvolutionalParams{
|
||||||
|
int32_t input_num, output_num, kernel_size;
|
||||||
|
DNNActivationFunc activation;
|
||||||
|
float *kernel;
|
||||||
|
float *biases;
|
||||||
|
} ConvolutionalParams;
|
||||||
|
|
||||||
|
typedef struct InputParams{
|
||||||
|
int height, width, channels;
|
||||||
|
} InputParams;
|
||||||
|
|
||||||
|
typedef struct DepthToSpaceParams{
|
||||||
|
int block_size;
|
||||||
|
} DepthToSpaceParams;
|
||||||
|
|
||||||
|
// Represents simple feed-forward convolutional network.
|
||||||
|
typedef struct ConvolutionalNetwork{
|
||||||
|
Layer *layers;
|
||||||
|
int32_t layers_num;
|
||||||
|
} ConvolutionalNetwork;
|
||||||
|
|
||||||
DNNModel *ff_dnn_load_model_native(const char *model_filename);
|
DNNModel *ff_dnn_load_model_native(const char *model_filename);
|
||||||
|
|
||||||
DNNModel *ff_dnn_load_default_model_native(DNNDefaultModel model_type);
|
|
||||||
|
|
||||||
DNNReturnType ff_dnn_execute_model_native(const DNNModel *model);
|
DNNReturnType ff_dnn_execute_model_native(const DNNModel *model);
|
||||||
|
|
||||||
void ff_dnn_free_model_native(DNNModel **model);
|
void ff_dnn_free_model_native(DNNModel **model);
|
||||||
|
@ -24,8 +24,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "dnn_backend_tf.h"
|
#include "dnn_backend_tf.h"
|
||||||
#include "dnn_srcnn.h"
|
#include "dnn_backend_native.h"
|
||||||
#include "dnn_espcn.h"
|
|
||||||
#include "libavformat/avio.h"
|
#include "libavformat/avio.h"
|
||||||
|
|
||||||
#include <tensorflow/c/c_api.h>
|
#include <tensorflow/c/c_api.h>
|
||||||
@ -156,32 +155,14 @@ static DNNReturnType set_input_output_tf(void *model, DNNData *input, DNNData *o
|
|||||||
return DNN_SUCCESS;
|
return DNN_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
DNNModel *ff_dnn_load_model_tf(const char *model_filename)
|
static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename)
|
||||||
{
|
{
|
||||||
DNNModel *model = NULL;
|
|
||||||
TFModel *tf_model = NULL;
|
|
||||||
TF_Buffer *graph_def;
|
TF_Buffer *graph_def;
|
||||||
TF_ImportGraphDefOptions *graph_opts;
|
TF_ImportGraphDefOptions *graph_opts;
|
||||||
|
|
||||||
model = av_malloc(sizeof(DNNModel));
|
|
||||||
if (!model){
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
tf_model = av_malloc(sizeof(TFModel));
|
|
||||||
if (!tf_model){
|
|
||||||
av_freep(&model);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
tf_model->session = NULL;
|
|
||||||
tf_model->input_tensor = NULL;
|
|
||||||
tf_model->output_data = NULL;
|
|
||||||
|
|
||||||
graph_def = read_graph(model_filename);
|
graph_def = read_graph(model_filename);
|
||||||
if (!graph_def){
|
if (!graph_def){
|
||||||
av_freep(&tf_model);
|
return DNN_ERROR;
|
||||||
av_freep(&model);
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
tf_model->graph = TF_NewGraph();
|
tf_model->graph = TF_NewGraph();
|
||||||
tf_model->status = TF_NewStatus();
|
tf_model->status = TF_NewStatus();
|
||||||
@ -192,26 +173,178 @@ DNNModel *ff_dnn_load_model_tf(const char *model_filename)
|
|||||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||||
TF_DeleteGraph(tf_model->graph);
|
TF_DeleteGraph(tf_model->graph);
|
||||||
TF_DeleteStatus(tf_model->status);
|
TF_DeleteStatus(tf_model->status);
|
||||||
av_freep(&tf_model);
|
return DNN_ERROR;
|
||||||
av_freep(&model);
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
model->model = (void *)tf_model;
|
return DNN_SUCCESS;
|
||||||
model->set_input_output = &set_input_output_tf;
|
|
||||||
|
|
||||||
return model;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static TF_Operation *add_pad_op(TFModel *tf_model, TF_Operation *input_op, int32_t pad)
|
#define NAME_BUFFER_SIZE 256
|
||||||
|
|
||||||
|
static DNNReturnType add_conv_layer(TFModel *tf_model, TF_Operation *transpose_op, TF_Operation **cur_op,
|
||||||
|
ConvolutionalParams* params, const int layer)
|
||||||
|
{
|
||||||
|
TF_Operation *op;
|
||||||
|
TF_OperationDescription *op_desc;
|
||||||
|
TF_Output input;
|
||||||
|
int64_t strides[] = {1, 1, 1, 1};
|
||||||
|
TF_Tensor *tensor;
|
||||||
|
int64_t dims[4];
|
||||||
|
int dims_len;
|
||||||
|
char name_buffer[NAME_BUFFER_SIZE];
|
||||||
|
int32_t size;
|
||||||
|
|
||||||
|
size = params->input_num * params->output_num * params->kernel_size * params->kernel_size;
|
||||||
|
input.index = 0;
|
||||||
|
|
||||||
|
snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_kernel%d", layer);
|
||||||
|
op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
|
||||||
|
TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
|
||||||
|
dims[0] = params->output_num;
|
||||||
|
dims[1] = params->kernel_size;
|
||||||
|
dims[2] = params->kernel_size;
|
||||||
|
dims[3] = params->input_num;
|
||||||
|
dims_len = 4;
|
||||||
|
tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, size * sizeof(float));
|
||||||
|
memcpy(TF_TensorData(tensor), params->kernel, size * sizeof(float));
|
||||||
|
TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
|
||||||
|
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||||
|
return DNN_ERROR;
|
||||||
|
}
|
||||||
|
op = TF_FinishOperation(op_desc, tf_model->status);
|
||||||
|
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||||
|
return DNN_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
snprintf(name_buffer, NAME_BUFFER_SIZE, "transpose%d", layer);
|
||||||
|
op_desc = TF_NewOperation(tf_model->graph, "Transpose", name_buffer);
|
||||||
|
input.oper = op;
|
||||||
|
TF_AddInput(op_desc, input);
|
||||||
|
input.oper = transpose_op;
|
||||||
|
TF_AddInput(op_desc, input);
|
||||||
|
TF_SetAttrType(op_desc, "T", TF_FLOAT);
|
||||||
|
TF_SetAttrType(op_desc, "Tperm", TF_INT32);
|
||||||
|
op = TF_FinishOperation(op_desc, tf_model->status);
|
||||||
|
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||||
|
return DNN_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
snprintf(name_buffer, NAME_BUFFER_SIZE, "conv2d%d", layer);
|
||||||
|
op_desc = TF_NewOperation(tf_model->graph, "Conv2D", name_buffer);
|
||||||
|
input.oper = *cur_op;
|
||||||
|
TF_AddInput(op_desc, input);
|
||||||
|
input.oper = op;
|
||||||
|
TF_AddInput(op_desc, input);
|
||||||
|
TF_SetAttrType(op_desc, "T", TF_FLOAT);
|
||||||
|
TF_SetAttrIntList(op_desc, "strides", strides, 4);
|
||||||
|
TF_SetAttrString(op_desc, "padding", "VALID", 5);
|
||||||
|
*cur_op = TF_FinishOperation(op_desc, tf_model->status);
|
||||||
|
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||||
|
return DNN_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_biases%d", layer);
|
||||||
|
op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
|
||||||
|
TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
|
||||||
|
dims[0] = params->output_num;
|
||||||
|
dims_len = 1;
|
||||||
|
tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, params->output_num * sizeof(float));
|
||||||
|
memcpy(TF_TensorData(tensor), params->biases, params->output_num * sizeof(float));
|
||||||
|
TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
|
||||||
|
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||||
|
return DNN_ERROR;
|
||||||
|
}
|
||||||
|
op = TF_FinishOperation(op_desc, tf_model->status);
|
||||||
|
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||||
|
return DNN_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
snprintf(name_buffer, NAME_BUFFER_SIZE, "bias_add%d", layer);
|
||||||
|
op_desc = TF_NewOperation(tf_model->graph, "BiasAdd", name_buffer);
|
||||||
|
input.oper = *cur_op;
|
||||||
|
TF_AddInput(op_desc, input);
|
||||||
|
input.oper = op;
|
||||||
|
TF_AddInput(op_desc, input);
|
||||||
|
TF_SetAttrType(op_desc, "T", TF_FLOAT);
|
||||||
|
*cur_op = TF_FinishOperation(op_desc, tf_model->status);
|
||||||
|
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||||
|
return DNN_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
snprintf(name_buffer, NAME_BUFFER_SIZE, "activation%d", layer);
|
||||||
|
switch (params->activation){
|
||||||
|
case RELU:
|
||||||
|
op_desc = TF_NewOperation(tf_model->graph, "Relu", name_buffer);
|
||||||
|
break;
|
||||||
|
case TANH:
|
||||||
|
op_desc = TF_NewOperation(tf_model->graph, "Tanh", name_buffer);
|
||||||
|
break;
|
||||||
|
case SIGMOID:
|
||||||
|
op_desc = TF_NewOperation(tf_model->graph, "Sigmoid", name_buffer);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return DNN_ERROR;
|
||||||
|
}
|
||||||
|
input.oper = *cur_op;
|
||||||
|
TF_AddInput(op_desc, input);
|
||||||
|
TF_SetAttrType(op_desc, "T", TF_FLOAT);
|
||||||
|
*cur_op = TF_FinishOperation(op_desc, tf_model->status);
|
||||||
|
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||||
|
return DNN_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
return DNN_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static DNNReturnType add_depth_to_space_layer(TFModel *tf_model, TF_Operation **cur_op,
|
||||||
|
DepthToSpaceParams *params, const int layer)
|
||||||
{
|
{
|
||||||
TF_OperationDescription *op_desc;
|
TF_OperationDescription *op_desc;
|
||||||
|
TF_Output input;
|
||||||
|
char name_buffer[NAME_BUFFER_SIZE];
|
||||||
|
|
||||||
|
snprintf(name_buffer, NAME_BUFFER_SIZE, "depth_to_space%d", layer);
|
||||||
|
op_desc = TF_NewOperation(tf_model->graph, "DepthToSpace", name_buffer);
|
||||||
|
input.oper = *cur_op;
|
||||||
|
input.index = 0;
|
||||||
|
TF_AddInput(op_desc, input);
|
||||||
|
TF_SetAttrType(op_desc, "T", TF_FLOAT);
|
||||||
|
TF_SetAttrInt(op_desc, "block_size", params->block_size);
|
||||||
|
*cur_op = TF_FinishOperation(op_desc, tf_model->status);
|
||||||
|
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||||
|
return DNN_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
return DNN_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int calculate_pad(const ConvolutionalNetwork *conv_network)
|
||||||
|
{
|
||||||
|
ConvolutionalParams *params;
|
||||||
|
int32_t layer;
|
||||||
|
int pad = 0;
|
||||||
|
|
||||||
|
for (layer = 0; layer < conv_network->layers_num; ++layer){
|
||||||
|
if (conv_network->layers[layer].type == CONV){
|
||||||
|
params = (ConvolutionalParams *)conv_network->layers[layer].params;
|
||||||
|
pad += params->kernel_size >> 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return pad;
|
||||||
|
}
|
||||||
|
|
||||||
|
static DNNReturnType add_pad_op(TFModel *tf_model, TF_Operation **cur_op, const int32_t pad)
|
||||||
|
{
|
||||||
TF_Operation *op;
|
TF_Operation *op;
|
||||||
TF_Tensor *tensor;
|
TF_Tensor *tensor;
|
||||||
|
TF_OperationDescription *op_desc;
|
||||||
TF_Output input;
|
TF_Output input;
|
||||||
int32_t *pads;
|
int32_t *pads;
|
||||||
int64_t pads_shape[] = {4, 2};
|
int64_t pads_shape[] = {4, 2};
|
||||||
|
|
||||||
|
input.index = 0;
|
||||||
|
|
||||||
op_desc = TF_NewOperation(tf_model->graph, "Const", "pads");
|
op_desc = TF_NewOperation(tf_model->graph, "Const", "pads");
|
||||||
TF_SetAttrType(op_desc, "dtype", TF_INT32);
|
TF_SetAttrType(op_desc, "dtype", TF_INT32);
|
||||||
tensor = TF_AllocateTensor(TF_INT32, pads_shape, 2, 4 * 2 * sizeof(int32_t));
|
tensor = TF_AllocateTensor(TF_INT32, pads_shape, 2, 4 * 2 * sizeof(int32_t));
|
||||||
@ -222,68 +355,73 @@ static TF_Operation *add_pad_op(TFModel *tf_model, TF_Operation *input_op, int32
|
|||||||
pads[6] = 0; pads[7] = 0;
|
pads[6] = 0; pads[7] = 0;
|
||||||
TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
|
TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
|
||||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||||
return NULL;
|
return DNN_ERROR;
|
||||||
}
|
}
|
||||||
op = TF_FinishOperation(op_desc, tf_model->status);
|
op = TF_FinishOperation(op_desc, tf_model->status);
|
||||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||||
return NULL;
|
return DNN_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
op_desc = TF_NewOperation(tf_model->graph, "MirrorPad", "mirror_pad");
|
op_desc = TF_NewOperation(tf_model->graph, "MirrorPad", "mirror_pad");
|
||||||
input.oper = input_op;
|
input.oper = *cur_op;
|
||||||
input.index = 0;
|
|
||||||
TF_AddInput(op_desc, input);
|
TF_AddInput(op_desc, input);
|
||||||
input.oper = op;
|
input.oper = op;
|
||||||
TF_AddInput(op_desc, input);
|
TF_AddInput(op_desc, input);
|
||||||
TF_SetAttrType(op_desc, "T", TF_FLOAT);
|
TF_SetAttrType(op_desc, "T", TF_FLOAT);
|
||||||
TF_SetAttrType(op_desc, "Tpaddings", TF_INT32);
|
TF_SetAttrType(op_desc, "Tpaddings", TF_INT32);
|
||||||
TF_SetAttrString(op_desc, "mode", "SYMMETRIC", 9);
|
TF_SetAttrString(op_desc, "mode", "SYMMETRIC", 9);
|
||||||
op = TF_FinishOperation(op_desc, tf_model->status);
|
*cur_op = TF_FinishOperation(op_desc, tf_model->status);
|
||||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||||
return NULL;
|
return DNN_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
return op;
|
return DNN_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static TF_Operation *add_const_op(TFModel *tf_model, const float *values, const int64_t *dims, int dims_len, const char *name)
|
static DNNReturnType load_native_model(TFModel *tf_model, const char *model_filename)
|
||||||
{
|
{
|
||||||
int dim;
|
int32_t layer;
|
||||||
TF_OperationDescription *op_desc;
|
|
||||||
TF_Tensor *tensor;
|
|
||||||
size_t len;
|
|
||||||
|
|
||||||
op_desc = TF_NewOperation(tf_model->graph, "Const", name);
|
|
||||||
TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
|
|
||||||
len = sizeof(float);
|
|
||||||
for (dim = 0; dim < dims_len; ++dim){
|
|
||||||
len *= dims[dim];
|
|
||||||
}
|
|
||||||
tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, len);
|
|
||||||
memcpy(TF_TensorData(tensor), values, len);
|
|
||||||
TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
|
|
||||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return TF_FinishOperation(op_desc, tf_model->status);
|
|
||||||
}
|
|
||||||
|
|
||||||
static TF_Operation* add_conv_layers(TFModel *tf_model, const float **consts, const int64_t **consts_dims,
|
|
||||||
const int *consts_dims_len, const char **activations,
|
|
||||||
TF_Operation *input_op, int layers_num)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
TF_OperationDescription *op_desc;
|
TF_OperationDescription *op_desc;
|
||||||
TF_Operation *op;
|
TF_Operation *op;
|
||||||
TF_Operation *transpose_op;
|
TF_Operation *transpose_op;
|
||||||
TF_Output input;
|
|
||||||
int64_t strides[] = {1, 1, 1, 1};
|
|
||||||
int32_t *transpose_perm;
|
|
||||||
TF_Tensor *tensor;
|
TF_Tensor *tensor;
|
||||||
|
TF_Output input;
|
||||||
|
int32_t *transpose_perm;
|
||||||
int64_t transpose_perm_shape[] = {4};
|
int64_t transpose_perm_shape[] = {4};
|
||||||
#define NAME_BUFF_SIZE 256
|
int64_t input_shape[] = {1, -1, -1, -1};
|
||||||
char name_buffer[NAME_BUFF_SIZE];
|
int32_t pad;
|
||||||
|
DNNReturnType layer_add_res;
|
||||||
|
DNNModel *native_model = NULL;
|
||||||
|
ConvolutionalNetwork *conv_network;
|
||||||
|
|
||||||
|
native_model = ff_dnn_load_model_native(model_filename);
|
||||||
|
if (!native_model){
|
||||||
|
return DNN_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
conv_network = (ConvolutionalNetwork *)native_model->model;
|
||||||
|
pad = calculate_pad(conv_network);
|
||||||
|
tf_model->graph = TF_NewGraph();
|
||||||
|
tf_model->status = TF_NewStatus();
|
||||||
|
|
||||||
|
#define CLEANUP_ON_ERROR(tf_model) \
|
||||||
|
{ \
|
||||||
|
TF_DeleteGraph(tf_model->graph); \
|
||||||
|
TF_DeleteStatus(tf_model->status); \
|
||||||
|
return DNN_ERROR; \
|
||||||
|
}
|
||||||
|
|
||||||
|
op_desc = TF_NewOperation(tf_model->graph, "Placeholder", "x");
|
||||||
|
TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
|
||||||
|
TF_SetAttrShape(op_desc, "shape", input_shape, 4);
|
||||||
|
op = TF_FinishOperation(op_desc, tf_model->status);
|
||||||
|
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||||
|
CLEANUP_ON_ERROR(tf_model);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (add_pad_op(tf_model, &op, pad) != DNN_SUCCESS){
|
||||||
|
CLEANUP_ON_ERROR(tf_model);
|
||||||
|
}
|
||||||
|
|
||||||
op_desc = TF_NewOperation(tf_model->graph, "Const", "transpose_perm");
|
op_desc = TF_NewOperation(tf_model->graph, "Const", "transpose_perm");
|
||||||
TF_SetAttrType(op_desc, "dtype", TF_INT32);
|
TF_SetAttrType(op_desc, "dtype", TF_INT32);
|
||||||
@ -295,153 +433,48 @@ static TF_Operation* add_conv_layers(TFModel *tf_model, const float **consts, co
|
|||||||
transpose_perm[3] = 0;
|
transpose_perm[3] = 0;
|
||||||
TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
|
TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
|
||||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||||
return NULL;
|
CLEANUP_ON_ERROR(tf_model);
|
||||||
}
|
}
|
||||||
transpose_op = TF_FinishOperation(op_desc, tf_model->status);
|
transpose_op = TF_FinishOperation(op_desc, tf_model->status);
|
||||||
|
|
||||||
|
for (layer = 0; layer < conv_network->layers_num; ++layer){
|
||||||
|
switch (conv_network->layers[layer].type){
|
||||||
|
case INPUT:
|
||||||
|
break;
|
||||||
|
case CONV:
|
||||||
|
layer_add_res = add_conv_layer(tf_model, transpose_op, &op,
|
||||||
|
(ConvolutionalParams *)conv_network->layers[layer].params, layer);
|
||||||
|
break;
|
||||||
|
case DEPTH_TO_SPACE:
|
||||||
|
layer_add_res = add_depth_to_space_layer(tf_model, &op,
|
||||||
|
(DepthToSpaceParams *)conv_network->layers[layer].params, layer);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
CLEANUP_ON_ERROR(tf_model);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (layer_add_res != DNN_SUCCESS){
|
||||||
|
CLEANUP_ON_ERROR(tf_model);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
op_desc = TF_NewOperation(tf_model->graph, "Identity", "y");
|
||||||
|
input.oper = op;
|
||||||
|
TF_AddInput(op_desc, input);
|
||||||
|
TF_FinishOperation(op_desc, tf_model->status);
|
||||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||||
return NULL;
|
CLEANUP_ON_ERROR(tf_model);
|
||||||
}
|
}
|
||||||
|
|
||||||
input.index = 0;
|
ff_dnn_free_model_native(&native_model);
|
||||||
for (i = 0; i < layers_num; ++i){
|
|
||||||
snprintf(name_buffer, NAME_BUFF_SIZE, "conv_kernel%d", i);
|
|
||||||
op = add_const_op(tf_model, consts[i << 1], consts_dims[i << 1], consts_dims_len[i << 1], name_buffer);
|
|
||||||
if (TF_GetCode(tf_model->status) != TF_OK || op == NULL){
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
snprintf(name_buffer, NAME_BUFF_SIZE, "transpose%d", i);
|
return DNN_SUCCESS;
|
||||||
op_desc = TF_NewOperation(tf_model->graph, "Transpose", name_buffer);
|
|
||||||
input.oper = op;
|
|
||||||
TF_AddInput(op_desc, input);
|
|
||||||
input.oper = transpose_op;
|
|
||||||
TF_AddInput(op_desc, input);
|
|
||||||
TF_SetAttrType(op_desc, "T", TF_FLOAT);
|
|
||||||
TF_SetAttrType(op_desc, "Tperm", TF_INT32);
|
|
||||||
op = TF_FinishOperation(op_desc, tf_model->status);
|
|
||||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
snprintf(name_buffer, NAME_BUFF_SIZE, "conv2d%d", i);
|
|
||||||
op_desc = TF_NewOperation(tf_model->graph, "Conv2D", name_buffer);
|
|
||||||
input.oper = input_op;
|
|
||||||
TF_AddInput(op_desc, input);
|
|
||||||
input.oper = op;
|
|
||||||
TF_AddInput(op_desc, input);
|
|
||||||
TF_SetAttrType(op_desc, "T", TF_FLOAT);
|
|
||||||
TF_SetAttrIntList(op_desc, "strides", strides, 4);
|
|
||||||
TF_SetAttrString(op_desc, "padding", "VALID", 5);
|
|
||||||
input_op = TF_FinishOperation(op_desc, tf_model->status);
|
|
||||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
snprintf(name_buffer, NAME_BUFF_SIZE, "conv_biases%d", i);
|
|
||||||
op = add_const_op(tf_model, consts[(i << 1) + 1], consts_dims[(i << 1) + 1], consts_dims_len[(i << 1) + 1], name_buffer);
|
|
||||||
if (TF_GetCode(tf_model->status) != TF_OK || op == NULL){
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
snprintf(name_buffer, NAME_BUFF_SIZE, "bias_add%d", i);
|
|
||||||
op_desc = TF_NewOperation(tf_model->graph, "BiasAdd", name_buffer);
|
|
||||||
input.oper = input_op;
|
|
||||||
TF_AddInput(op_desc, input);
|
|
||||||
input.oper = op;
|
|
||||||
TF_AddInput(op_desc, input);
|
|
||||||
TF_SetAttrType(op_desc, "T", TF_FLOAT);
|
|
||||||
input_op = TF_FinishOperation(op_desc, tf_model->status);
|
|
||||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
snprintf(name_buffer, NAME_BUFF_SIZE, "activation%d", i);
|
|
||||||
op_desc = TF_NewOperation(tf_model->graph, activations[i], name_buffer);
|
|
||||||
input.oper = input_op;
|
|
||||||
TF_AddInput(op_desc, input);
|
|
||||||
TF_SetAttrType(op_desc, "T", TF_FLOAT);
|
|
||||||
input_op = TF_FinishOperation(op_desc, tf_model->status);
|
|
||||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return input_op;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DNNModel *ff_dnn_load_default_model_tf(DNNDefaultModel model_type)
|
DNNModel *ff_dnn_load_model_tf(const char *model_filename)
|
||||||
{
|
{
|
||||||
DNNModel *model = NULL;
|
DNNModel *model = NULL;
|
||||||
TFModel *tf_model = NULL;
|
TFModel *tf_model = NULL;
|
||||||
TF_OperationDescription *op_desc;
|
|
||||||
TF_Operation *op;
|
|
||||||
TF_Output input;
|
|
||||||
static const int64_t input_shape[] = {1, -1, -1, 1};
|
|
||||||
static const char tanh[] = "Tanh";
|
|
||||||
static const char sigmoid[] = "Sigmoid";
|
|
||||||
static const char relu[] = "Relu";
|
|
||||||
|
|
||||||
static const float *srcnn_consts[] = {
|
|
||||||
srcnn_conv1_kernel,
|
|
||||||
srcnn_conv1_bias,
|
|
||||||
srcnn_conv2_kernel,
|
|
||||||
srcnn_conv2_bias,
|
|
||||||
srcnn_conv3_kernel,
|
|
||||||
srcnn_conv3_bias
|
|
||||||
};
|
|
||||||
static const long int *srcnn_consts_dims[] = {
|
|
||||||
srcnn_conv1_kernel_dims,
|
|
||||||
srcnn_conv1_bias_dims,
|
|
||||||
srcnn_conv2_kernel_dims,
|
|
||||||
srcnn_conv2_bias_dims,
|
|
||||||
srcnn_conv3_kernel_dims,
|
|
||||||
srcnn_conv3_bias_dims
|
|
||||||
};
|
|
||||||
static const int srcnn_consts_dims_len[] = {
|
|
||||||
4,
|
|
||||||
1,
|
|
||||||
4,
|
|
||||||
1,
|
|
||||||
4,
|
|
||||||
1
|
|
||||||
};
|
|
||||||
static const char *srcnn_activations[] = {
|
|
||||||
relu,
|
|
||||||
relu,
|
|
||||||
relu
|
|
||||||
};
|
|
||||||
|
|
||||||
static const float *espcn_consts[] = {
|
|
||||||
espcn_conv1_kernel,
|
|
||||||
espcn_conv1_bias,
|
|
||||||
espcn_conv2_kernel,
|
|
||||||
espcn_conv2_bias,
|
|
||||||
espcn_conv3_kernel,
|
|
||||||
espcn_conv3_bias
|
|
||||||
};
|
|
||||||
static const long int *espcn_consts_dims[] = {
|
|
||||||
espcn_conv1_kernel_dims,
|
|
||||||
espcn_conv1_bias_dims,
|
|
||||||
espcn_conv2_kernel_dims,
|
|
||||||
espcn_conv2_bias_dims,
|
|
||||||
espcn_conv3_kernel_dims,
|
|
||||||
espcn_conv3_bias_dims
|
|
||||||
};
|
|
||||||
static const int espcn_consts_dims_len[] = {
|
|
||||||
4,
|
|
||||||
1,
|
|
||||||
4,
|
|
||||||
1,
|
|
||||||
4,
|
|
||||||
1
|
|
||||||
};
|
|
||||||
static const char *espcn_activations[] = {
|
|
||||||
tanh,
|
|
||||||
tanh,
|
|
||||||
sigmoid
|
|
||||||
};
|
|
||||||
|
|
||||||
input.index = 0;
|
|
||||||
|
|
||||||
model = av_malloc(sizeof(DNNModel));
|
model = av_malloc(sizeof(DNNModel));
|
||||||
if (!model){
|
if (!model){
|
||||||
@ -457,70 +490,13 @@ DNNModel *ff_dnn_load_default_model_tf(DNNDefaultModel model_type)
|
|||||||
tf_model->input_tensor = NULL;
|
tf_model->input_tensor = NULL;
|
||||||
tf_model->output_data = NULL;
|
tf_model->output_data = NULL;
|
||||||
|
|
||||||
tf_model->graph = TF_NewGraph();
|
if (load_tf_model(tf_model, model_filename) != DNN_SUCCESS){
|
||||||
tf_model->status = TF_NewStatus();
|
if (load_native_model(tf_model, model_filename) != DNN_SUCCESS){
|
||||||
|
av_freep(&tf_model);
|
||||||
|
av_freep(&model);
|
||||||
|
|
||||||
#define CLEANUP_ON_ERROR(tf_model, model) { \
|
return NULL;
|
||||||
TF_DeleteGraph(tf_model->graph); \
|
|
||||||
TF_DeleteStatus(tf_model->status); \
|
|
||||||
av_freep(&tf_model); \
|
|
||||||
av_freep(&model); \
|
|
||||||
return NULL; \
|
|
||||||
}
|
|
||||||
|
|
||||||
op_desc = TF_NewOperation(tf_model->graph, "Placeholder", "x");
|
|
||||||
TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
|
|
||||||
TF_SetAttrShape(op_desc, "shape", input_shape, 4);
|
|
||||||
op = TF_FinishOperation(op_desc, tf_model->status);
|
|
||||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
|
||||||
CLEANUP_ON_ERROR(tf_model, model);
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (model_type){
|
|
||||||
case DNN_SRCNN:
|
|
||||||
op = add_pad_op(tf_model, op, 6);
|
|
||||||
if (!op){
|
|
||||||
CLEANUP_ON_ERROR(tf_model, model);
|
|
||||||
}
|
}
|
||||||
op = add_conv_layers(tf_model, srcnn_consts,
|
|
||||||
srcnn_consts_dims, srcnn_consts_dims_len,
|
|
||||||
srcnn_activations, op, 3);
|
|
||||||
if (!op){
|
|
||||||
CLEANUP_ON_ERROR(tf_model, model);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case DNN_ESPCN:
|
|
||||||
op = add_pad_op(tf_model, op, 4);
|
|
||||||
if (!op){
|
|
||||||
CLEANUP_ON_ERROR(tf_model, model);
|
|
||||||
}
|
|
||||||
op = add_conv_layers(tf_model, espcn_consts,
|
|
||||||
espcn_consts_dims, espcn_consts_dims_len,
|
|
||||||
espcn_activations, op, 3);
|
|
||||||
if (!op){
|
|
||||||
CLEANUP_ON_ERROR(tf_model, model);
|
|
||||||
}
|
|
||||||
|
|
||||||
op_desc = TF_NewOperation(tf_model->graph, "DepthToSpace", "depth_to_space");
|
|
||||||
input.oper = op;
|
|
||||||
TF_AddInput(op_desc, input);
|
|
||||||
TF_SetAttrType(op_desc, "T", TF_FLOAT);
|
|
||||||
TF_SetAttrInt(op_desc, "block_size", 2);
|
|
||||||
op = TF_FinishOperation(op_desc, tf_model->status);
|
|
||||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
|
||||||
CLEANUP_ON_ERROR(tf_model, model);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
CLEANUP_ON_ERROR(tf_model, model);
|
|
||||||
}
|
|
||||||
|
|
||||||
op_desc = TF_NewOperation(tf_model->graph, "Identity", "y");
|
|
||||||
input.oper = op;
|
|
||||||
TF_AddInput(op_desc, input);
|
|
||||||
TF_FinishOperation(op_desc, tf_model->status);
|
|
||||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
|
||||||
CLEANUP_ON_ERROR(tf_model, model);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
model->model = (void *)tf_model;
|
model->model = (void *)tf_model;
|
||||||
@ -529,6 +505,8 @@ DNNModel *ff_dnn_load_default_model_tf(DNNDefaultModel model_type)
|
|||||||
return model;
|
return model;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model)
|
DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model)
|
||||||
{
|
{
|
||||||
TFModel *tf_model = (TFModel *)model->model;
|
TFModel *tf_model = (TFModel *)model->model;
|
||||||
@ -572,7 +550,7 @@ void ff_dnn_free_model_tf(DNNModel **model)
|
|||||||
TF_DeleteTensor(tf_model->input_tensor);
|
TF_DeleteTensor(tf_model->input_tensor);
|
||||||
}
|
}
|
||||||
if (tf_model->output_data){
|
if (tf_model->output_data){
|
||||||
av_freep(&(tf_model->output_data->data));
|
av_freep(&tf_model->output_data->data);
|
||||||
}
|
}
|
||||||
av_freep(&tf_model);
|
av_freep(&tf_model);
|
||||||
av_freep(model);
|
av_freep(model);
|
||||||
|
@ -31,8 +31,6 @@
|
|||||||
|
|
||||||
DNNModel *ff_dnn_load_model_tf(const char *model_filename);
|
DNNModel *ff_dnn_load_model_tf(const char *model_filename);
|
||||||
|
|
||||||
DNNModel *ff_dnn_load_default_model_tf(DNNDefaultModel model_type);
|
|
||||||
|
|
||||||
DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model);
|
DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model);
|
||||||
|
|
||||||
void ff_dnn_free_model_tf(DNNModel **model);
|
void ff_dnn_free_model_tf(DNNModel **model);
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -40,14 +40,12 @@ DNNModule *ff_get_dnn_module(DNNBackendType backend_type)
|
|||||||
switch(backend_type){
|
switch(backend_type){
|
||||||
case DNN_NATIVE:
|
case DNN_NATIVE:
|
||||||
dnn_module->load_model = &ff_dnn_load_model_native;
|
dnn_module->load_model = &ff_dnn_load_model_native;
|
||||||
dnn_module->load_default_model = &ff_dnn_load_default_model_native;
|
|
||||||
dnn_module->execute_model = &ff_dnn_execute_model_native;
|
dnn_module->execute_model = &ff_dnn_execute_model_native;
|
||||||
dnn_module->free_model = &ff_dnn_free_model_native;
|
dnn_module->free_model = &ff_dnn_free_model_native;
|
||||||
break;
|
break;
|
||||||
case DNN_TF:
|
case DNN_TF:
|
||||||
#if (CONFIG_LIBTENSORFLOW == 1)
|
#if (CONFIG_LIBTENSORFLOW == 1)
|
||||||
dnn_module->load_model = &ff_dnn_load_model_tf;
|
dnn_module->load_model = &ff_dnn_load_model_tf;
|
||||||
dnn_module->load_default_model = &ff_dnn_load_default_model_tf;
|
|
||||||
dnn_module->execute_model = &ff_dnn_execute_model_tf;
|
dnn_module->execute_model = &ff_dnn_execute_model_tf;
|
||||||
dnn_module->free_model = &ff_dnn_free_model_tf;
|
dnn_module->free_model = &ff_dnn_free_model_tf;
|
||||||
#else
|
#else
|
||||||
|
@ -30,8 +30,6 @@ typedef enum {DNN_SUCCESS, DNN_ERROR} DNNReturnType;
|
|||||||
|
|
||||||
typedef enum {DNN_NATIVE, DNN_TF} DNNBackendType;
|
typedef enum {DNN_NATIVE, DNN_TF} DNNBackendType;
|
||||||
|
|
||||||
typedef enum {DNN_SRCNN, DNN_ESPCN} DNNDefaultModel;
|
|
||||||
|
|
||||||
typedef struct DNNData{
|
typedef struct DNNData{
|
||||||
float *data;
|
float *data;
|
||||||
int width, height, channels;
|
int width, height, channels;
|
||||||
@ -49,8 +47,6 @@ typedef struct DNNModel{
|
|||||||
typedef struct DNNModule{
|
typedef struct DNNModule{
|
||||||
// Loads model and parameters from given file. Returns NULL if it is not possible.
|
// Loads model and parameters from given file. Returns NULL if it is not possible.
|
||||||
DNNModel *(*load_model)(const char *model_filename);
|
DNNModel *(*load_model)(const char *model_filename);
|
||||||
// Loads one of the default models
|
|
||||||
DNNModel *(*load_default_model)(DNNDefaultModel model_type);
|
|
||||||
// Executes model with specified input and output. Returns DNN_ERROR otherwise.
|
// Executes model with specified input and output. Returns DNN_ERROR otherwise.
|
||||||
DNNReturnType (*execute_model)(const DNNModel *model);
|
DNNReturnType (*execute_model)(const DNNModel *model);
|
||||||
// Frees memory allocated for model.
|
// Frees memory allocated for model.
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -33,12 +33,9 @@
|
|||||||
#include "libswscale/swscale.h"
|
#include "libswscale/swscale.h"
|
||||||
#include "dnn_interface.h"
|
#include "dnn_interface.h"
|
||||||
|
|
||||||
typedef enum {SRCNN, ESPCN} SRModel;
|
|
||||||
|
|
||||||
typedef struct SRContext {
|
typedef struct SRContext {
|
||||||
const AVClass *class;
|
const AVClass *class;
|
||||||
|
|
||||||
SRModel model_type;
|
|
||||||
char *model_filename;
|
char *model_filename;
|
||||||
DNNBackendType backend_type;
|
DNNBackendType backend_type;
|
||||||
DNNModule *dnn_module;
|
DNNModule *dnn_module;
|
||||||
@ -52,16 +49,13 @@ typedef struct SRContext {
|
|||||||
#define OFFSET(x) offsetof(SRContext, x)
|
#define OFFSET(x) offsetof(SRContext, x)
|
||||||
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
|
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
|
||||||
static const AVOption sr_options[] = {
|
static const AVOption sr_options[] = {
|
||||||
{ "model", "specifies what DNN model to use", OFFSET(model_type), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, 0, 1, FLAGS, "model_type" },
|
|
||||||
{ "srcnn", "Super-Resolution Convolutional Neural Network model (scale factor should be specified for custom SRCNN model)", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "model_type" },
|
|
||||||
{ "espcn", "Efficient Sub-Pixel Convolutional Neural Network model", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "model_type" },
|
|
||||||
{ "dnn_backend", "DNN backend used for model execution", OFFSET(backend_type), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, 0, 1, FLAGS, "backend" },
|
{ "dnn_backend", "DNN backend used for model execution", OFFSET(backend_type), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, 0, 1, FLAGS, "backend" },
|
||||||
{ "native", "native backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "backend" },
|
{ "native", "native backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "backend" },
|
||||||
#if (CONFIG_LIBTENSORFLOW == 1)
|
#if (CONFIG_LIBTENSORFLOW == 1)
|
||||||
{ "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "backend" },
|
{ "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "backend" },
|
||||||
#endif
|
#endif
|
||||||
{"scale_factor", "scale factor for SRCNN model", OFFSET(scale_factor), AV_OPT_TYPE_INT, { .i64 = 2 }, 2, 4, FLAGS},
|
{ "scale_factor", "scale factor for SRCNN model", OFFSET(scale_factor), AV_OPT_TYPE_INT, { .i64 = 2 }, 2, 4, FLAGS },
|
||||||
{ "model_filename", "path to model file specifying network architecture and its parameters", OFFSET(model_filename), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
|
{ "model", "path to model file specifying network architecture and its parameters", OFFSET(model_filename), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
|
||||||
{ NULL }
|
{ NULL }
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -77,15 +71,8 @@ static av_cold int init(AVFilterContext *context)
|
|||||||
return AVERROR(ENOMEM);
|
return AVERROR(ENOMEM);
|
||||||
}
|
}
|
||||||
if (!sr_context->model_filename){
|
if (!sr_context->model_filename){
|
||||||
av_log(context, AV_LOG_VERBOSE, "model file for network was not specified, using default network for x2 upsampling\n");
|
av_log(context, AV_LOG_ERROR, "model file for network was not specified\n");
|
||||||
sr_context->scale_factor = 2;
|
return AVERROR(EIO);
|
||||||
switch (sr_context->model_type){
|
|
||||||
case SRCNN:
|
|
||||||
sr_context->model = (sr_context->dnn_module->load_default_model)(DNN_SRCNN);
|
|
||||||
break;
|
|
||||||
case ESPCN:
|
|
||||||
sr_context->model = (sr_context->dnn_module->load_default_model)(DNN_ESPCN);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
sr_context->model = (sr_context->dnn_module->load_model)(sr_context->model_filename);
|
sr_context->model = (sr_context->dnn_module->load_model)(sr_context->model_filename);
|
||||||
@ -126,15 +113,8 @@ static int config_props(AVFilterLink *inlink)
|
|||||||
DNNReturnType result;
|
DNNReturnType result;
|
||||||
int sws_src_h, sws_src_w, sws_dst_h, sws_dst_w;
|
int sws_src_h, sws_src_w, sws_dst_h, sws_dst_w;
|
||||||
|
|
||||||
switch (sr_context->model_type){
|
sr_context->input.width = inlink->w * sr_context->scale_factor;
|
||||||
case SRCNN:
|
sr_context->input.height = inlink->h * sr_context->scale_factor;
|
||||||
sr_context->input.width = inlink->w * sr_context->scale_factor;
|
|
||||||
sr_context->input.height = inlink->h * sr_context->scale_factor;
|
|
||||||
break;
|
|
||||||
case ESPCN:
|
|
||||||
sr_context->input.width = inlink->w;
|
|
||||||
sr_context->input.height = inlink->h;
|
|
||||||
}
|
|
||||||
sr_context->input.channels = 1;
|
sr_context->input.channels = 1;
|
||||||
|
|
||||||
result = (sr_context->model->set_input_output)(sr_context->model->model, &sr_context->input, &sr_context->output);
|
result = (sr_context->model->set_input_output)(sr_context->model->model, &sr_context->input, &sr_context->output);
|
||||||
@ -143,6 +123,16 @@ static int config_props(AVFilterLink *inlink)
|
|||||||
return AVERROR(EIO);
|
return AVERROR(EIO);
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
|
if (sr_context->input.height != sr_context->output.height || sr_context->input.width != sr_context->output.width){
|
||||||
|
sr_context->input.width = inlink->w;
|
||||||
|
sr_context->input.height = inlink->h;
|
||||||
|
result = (sr_context->model->set_input_output)(sr_context->model->model, &sr_context->input, &sr_context->output);
|
||||||
|
if (result != DNN_SUCCESS){
|
||||||
|
av_log(context, AV_LOG_ERROR, "could not set input and output for the model\n");
|
||||||
|
return AVERROR(EIO);
|
||||||
|
}
|
||||||
|
sr_context->scale_factor = 0;
|
||||||
|
}
|
||||||
outlink->h = sr_context->output.height;
|
outlink->h = sr_context->output.height;
|
||||||
outlink->w = sr_context->output.width;
|
outlink->w = sr_context->output.width;
|
||||||
sr_context->sws_contexts[1] = sws_getContext(sr_context->input.width, sr_context->input.height, AV_PIX_FMT_GRAY8,
|
sr_context->sws_contexts[1] = sws_getContext(sr_context->input.width, sr_context->input.height, AV_PIX_FMT_GRAY8,
|
||||||
@ -157,8 +147,7 @@ static int config_props(AVFilterLink *inlink)
|
|||||||
av_log(context, AV_LOG_ERROR, "could not create SwsContext for conversions\n");
|
av_log(context, AV_LOG_ERROR, "could not create SwsContext for conversions\n");
|
||||||
return AVERROR(ENOMEM);
|
return AVERROR(ENOMEM);
|
||||||
}
|
}
|
||||||
switch (sr_context->model_type){
|
if (sr_context->scale_factor){
|
||||||
case SRCNN:
|
|
||||||
sr_context->sws_contexts[0] = sws_getContext(inlink->w, inlink->h, inlink->format,
|
sr_context->sws_contexts[0] = sws_getContext(inlink->w, inlink->h, inlink->format,
|
||||||
outlink->w, outlink->h, outlink->format,
|
outlink->w, outlink->h, outlink->format,
|
||||||
SWS_BICUBIC, NULL, NULL, NULL);
|
SWS_BICUBIC, NULL, NULL, NULL);
|
||||||
@ -167,8 +156,8 @@ static int config_props(AVFilterLink *inlink)
|
|||||||
return AVERROR(ENOMEM);
|
return AVERROR(ENOMEM);
|
||||||
}
|
}
|
||||||
sr_context->sws_slice_h = inlink->h;
|
sr_context->sws_slice_h = inlink->h;
|
||||||
break;
|
}
|
||||||
case ESPCN:
|
else{
|
||||||
if (inlink->format != AV_PIX_FMT_GRAY8){
|
if (inlink->format != AV_PIX_FMT_GRAY8){
|
||||||
sws_src_h = sr_context->input.height;
|
sws_src_h = sr_context->input.height;
|
||||||
sws_src_w = sr_context->input.width;
|
sws_src_w = sr_context->input.width;
|
||||||
@ -233,15 +222,14 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
|
|||||||
av_frame_copy_props(out, in);
|
av_frame_copy_props(out, in);
|
||||||
out->height = sr_context->output.height;
|
out->height = sr_context->output.height;
|
||||||
out->width = sr_context->output.width;
|
out->width = sr_context->output.width;
|
||||||
switch (sr_context->model_type){
|
if (sr_context->scale_factor){
|
||||||
case SRCNN:
|
|
||||||
sws_scale(sr_context->sws_contexts[0], (const uint8_t **)in->data, in->linesize,
|
sws_scale(sr_context->sws_contexts[0], (const uint8_t **)in->data, in->linesize,
|
||||||
0, sr_context->sws_slice_h, out->data, out->linesize);
|
0, sr_context->sws_slice_h, out->data, out->linesize);
|
||||||
|
|
||||||
sws_scale(sr_context->sws_contexts[1], (const uint8_t **)out->data, out->linesize,
|
sws_scale(sr_context->sws_contexts[1], (const uint8_t **)out->data, out->linesize,
|
||||||
0, out->height, (uint8_t * const*)(&sr_context->input.data), &sr_context->sws_input_linesize);
|
0, out->height, (uint8_t * const*)(&sr_context->input.data), &sr_context->sws_input_linesize);
|
||||||
break;
|
}
|
||||||
case ESPCN:
|
else{
|
||||||
if (sr_context->sws_contexts[0]){
|
if (sr_context->sws_contexts[0]){
|
||||||
sws_scale(sr_context->sws_contexts[0], (const uint8_t **)(in->data + 1), in->linesize + 1,
|
sws_scale(sr_context->sws_contexts[0], (const uint8_t **)(in->data + 1), in->linesize + 1,
|
||||||
0, sr_context->sws_slice_h, out->data + 1, out->linesize + 1);
|
0, sr_context->sws_slice_h, out->data + 1, out->linesize + 1);
|
||||||
|
Loading…
Reference in New Issue
Block a user