You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-09-16 08:36:51 +02:00
libavfilter/dnn: support multiple outputs for tensorflow model
some models such as ssd, yolo have more than one output. the clean up code in this patch is a little complex, it is because that set_input_output_tf could be called for many times together with ff_dnn_execute_model_tf, we have to clean resources for the case that the two interfaces are called interleaved. Signed-off-by: Guo, Yejun <yejun.guo@intel.com> Signed-off-by: Pedro Arthur <bygrandao@gmail.com>
This commit is contained in:
@@ -25,7 +25,7 @@
|
|||||||
|
|
||||||
#include "dnn_backend_native.h"
|
#include "dnn_backend_native.h"
|
||||||
|
|
||||||
static DNNReturnType set_input_output_native(void *model, DNNData *input, const char *input_name, const char *output_name)
|
static DNNReturnType set_input_output_native(void *model, DNNData *input, const char *input_name, const char **output_names, uint32_t nb_output)
|
||||||
{
|
{
|
||||||
ConvolutionalNetwork *network = (ConvolutionalNetwork *)model;
|
ConvolutionalNetwork *network = (ConvolutionalNetwork *)model;
|
||||||
InputParams *input_params;
|
InputParams *input_params;
|
||||||
@@ -275,7 +275,7 @@ static void depth_to_space(const float *input, float *output, int block_size, in
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output)
|
DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *outputs, uint32_t nb_output)
|
||||||
{
|
{
|
||||||
ConvolutionalNetwork *network = (ConvolutionalNetwork *)model->model;
|
ConvolutionalNetwork *network = (ConvolutionalNetwork *)model->model;
|
||||||
int cur_width, cur_height, cur_channels;
|
int cur_width, cur_height, cur_channels;
|
||||||
@@ -317,10 +317,13 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
output->data = network->layers[network->layers_num - 1].output;
|
// native mode does not support multiple outputs yet
|
||||||
output->height = cur_height;
|
if (nb_output > 1)
|
||||||
output->width = cur_width;
|
return DNN_ERROR;
|
||||||
output->channels = cur_channels;
|
outputs[0].data = network->layers[network->layers_num - 1].output;
|
||||||
|
outputs[0].height = cur_height;
|
||||||
|
outputs[0].width = cur_width;
|
||||||
|
outputs[0].channels = cur_channels;
|
||||||
|
|
||||||
return DNN_SUCCESS;
|
return DNN_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@@ -63,7 +63,7 @@ typedef struct ConvolutionalNetwork{
|
|||||||
|
|
||||||
DNNModel *ff_dnn_load_model_native(const char *model_filename);
|
DNNModel *ff_dnn_load_model_native(const char *model_filename);
|
||||||
|
|
||||||
DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output);
|
DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *outputs, uint32_t nb_output);
|
||||||
|
|
||||||
void ff_dnn_free_model_native(DNNModel **model);
|
void ff_dnn_free_model_native(DNNModel **model);
|
||||||
|
|
||||||
|
@@ -26,6 +26,7 @@
|
|||||||
#include "dnn_backend_tf.h"
|
#include "dnn_backend_tf.h"
|
||||||
#include "dnn_backend_native.h"
|
#include "dnn_backend_native.h"
|
||||||
#include "libavformat/avio.h"
|
#include "libavformat/avio.h"
|
||||||
|
#include "libavutil/avassert.h"
|
||||||
|
|
||||||
#include <tensorflow/c/c_api.h>
|
#include <tensorflow/c/c_api.h>
|
||||||
|
|
||||||
@@ -33,9 +34,11 @@ typedef struct TFModel{
|
|||||||
TF_Graph *graph;
|
TF_Graph *graph;
|
||||||
TF_Session *session;
|
TF_Session *session;
|
||||||
TF_Status *status;
|
TF_Status *status;
|
||||||
TF_Output input, output;
|
TF_Output input;
|
||||||
TF_Tensor *input_tensor;
|
TF_Tensor *input_tensor;
|
||||||
TF_Tensor *output_tensor;
|
TF_Output *outputs;
|
||||||
|
TF_Tensor **output_tensors;
|
||||||
|
uint32_t nb_output;
|
||||||
} TFModel;
|
} TFModel;
|
||||||
|
|
||||||
static void free_buffer(void *data, size_t length)
|
static void free_buffer(void *data, size_t length)
|
||||||
@@ -76,7 +79,7 @@ static TF_Buffer *read_graph(const char *model_filename)
|
|||||||
return graph_buf;
|
return graph_buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
static DNNReturnType set_input_output_tf(void *model, DNNData *input, const char *input_name, const char *output_name)
|
static DNNReturnType set_input_output_tf(void *model, DNNData *input, const char *input_name, const char **output_names, uint32_t nb_output)
|
||||||
{
|
{
|
||||||
TFModel *tf_model = (TFModel *)model;
|
TFModel *tf_model = (TFModel *)model;
|
||||||
int64_t input_dims[] = {1, input->height, input->width, input->channels};
|
int64_t input_dims[] = {1, input->height, input->width, input->channels};
|
||||||
@@ -100,11 +103,38 @@ static DNNReturnType set_input_output_tf(void *model, DNNData *input, const char
|
|||||||
input->data = (float *)TF_TensorData(tf_model->input_tensor);
|
input->data = (float *)TF_TensorData(tf_model->input_tensor);
|
||||||
|
|
||||||
// Output operation
|
// Output operation
|
||||||
tf_model->output.oper = TF_GraphOperationByName(tf_model->graph, output_name);
|
if (nb_output == 0)
|
||||||
if (!tf_model->output.oper){
|
return DNN_ERROR;
|
||||||
|
|
||||||
|
av_freep(&tf_model->outputs);
|
||||||
|
tf_model->outputs = av_malloc_array(nb_output, sizeof(*tf_model->outputs));
|
||||||
|
if (!tf_model->outputs)
|
||||||
|
return DNN_ERROR;
|
||||||
|
for (int i = 0; i < nb_output; ++i) {
|
||||||
|
tf_model->outputs[i].oper = TF_GraphOperationByName(tf_model->graph, output_names[i]);
|
||||||
|
if (!tf_model->outputs[i].oper){
|
||||||
|
av_freep(&tf_model->outputs);
|
||||||
|
return DNN_ERROR;
|
||||||
|
}
|
||||||
|
tf_model->outputs[i].index = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tf_model->output_tensors) {
|
||||||
|
for (uint32_t i = 0; i < tf_model->nb_output; ++i) {
|
||||||
|
if (tf_model->output_tensors[i]) {
|
||||||
|
TF_DeleteTensor(tf_model->output_tensors[i]);
|
||||||
|
tf_model->output_tensors[i] = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
av_freep(&tf_model->output_tensors);
|
||||||
|
tf_model->output_tensors = av_mallocz_array(nb_output, sizeof(*tf_model->output_tensors));
|
||||||
|
if (!tf_model->output_tensors) {
|
||||||
|
av_freep(&tf_model->outputs);
|
||||||
return DNN_ERROR;
|
return DNN_ERROR;
|
||||||
}
|
}
|
||||||
tf_model->output.index = 0;
|
|
||||||
|
tf_model->nb_output = nb_output;
|
||||||
|
|
||||||
if (tf_model->session){
|
if (tf_model->session){
|
||||||
TF_CloseSession(tf_model->session, tf_model->status);
|
TF_CloseSession(tf_model->session, tf_model->status);
|
||||||
@@ -484,25 +514,36 @@ DNNModel *ff_dnn_load_model_tf(const char *model_filename)
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, DNNData *output)
|
DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, DNNData *outputs, uint32_t nb_output)
|
||||||
{
|
{
|
||||||
TFModel *tf_model = (TFModel *)model->model;
|
TFModel *tf_model = (TFModel *)model->model;
|
||||||
if (tf_model->output_tensor)
|
uint32_t nb = FFMIN(nb_output, tf_model->nb_output);
|
||||||
TF_DeleteTensor(tf_model->output_tensor);
|
if (nb == 0)
|
||||||
|
return DNN_ERROR;
|
||||||
|
|
||||||
|
av_assert0(tf_model->output_tensors);
|
||||||
|
for (uint32_t i = 0; i < tf_model->nb_output; ++i) {
|
||||||
|
if (tf_model->output_tensors[i]) {
|
||||||
|
TF_DeleteTensor(tf_model->output_tensors[i]);
|
||||||
|
tf_model->output_tensors[i] = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TF_SessionRun(tf_model->session, NULL,
|
TF_SessionRun(tf_model->session, NULL,
|
||||||
&tf_model->input, &tf_model->input_tensor, 1,
|
&tf_model->input, &tf_model->input_tensor, 1,
|
||||||
&tf_model->output, &tf_model->output_tensor, 1,
|
tf_model->outputs, tf_model->output_tensors, nb,
|
||||||
NULL, 0, NULL, tf_model->status);
|
NULL, 0, NULL, tf_model->status);
|
||||||
|
|
||||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||||
return DNN_ERROR;
|
return DNN_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
output->height = TF_Dim(tf_model->output_tensor, 1);
|
for (uint32_t i = 0; i < nb; ++i) {
|
||||||
output->width = TF_Dim(tf_model->output_tensor, 2);
|
outputs[i].height = TF_Dim(tf_model->output_tensors[i], 1);
|
||||||
output->channels = TF_Dim(tf_model->output_tensor, 3);
|
outputs[i].width = TF_Dim(tf_model->output_tensors[i], 2);
|
||||||
output->data = TF_TensorData(tf_model->output_tensor);
|
outputs[i].channels = TF_Dim(tf_model->output_tensors[i], 3);
|
||||||
|
outputs[i].data = TF_TensorData(tf_model->output_tensors[i]);
|
||||||
|
}
|
||||||
|
|
||||||
return DNN_SUCCESS;
|
return DNN_SUCCESS;
|
||||||
}
|
}
|
||||||
@@ -526,9 +567,16 @@ void ff_dnn_free_model_tf(DNNModel **model)
|
|||||||
if (tf_model->input_tensor){
|
if (tf_model->input_tensor){
|
||||||
TF_DeleteTensor(tf_model->input_tensor);
|
TF_DeleteTensor(tf_model->input_tensor);
|
||||||
}
|
}
|
||||||
if (tf_model->output_tensor){
|
if (tf_model->output_tensors) {
|
||||||
TF_DeleteTensor(tf_model->output_tensor);
|
for (uint32_t i = 0; i < tf_model->nb_output; ++i) {
|
||||||
|
if (tf_model->output_tensors[i]) {
|
||||||
|
TF_DeleteTensor(tf_model->output_tensors[i]);
|
||||||
|
tf_model->output_tensors[i] = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
av_freep(&tf_model->outputs);
|
||||||
|
av_freep(&tf_model->output_tensors);
|
||||||
av_freep(&tf_model);
|
av_freep(&tf_model);
|
||||||
av_freep(model);
|
av_freep(model);
|
||||||
}
|
}
|
||||||
|
@@ -31,7 +31,7 @@
|
|||||||
|
|
||||||
DNNModel *ff_dnn_load_model_tf(const char *model_filename);
|
DNNModel *ff_dnn_load_model_tf(const char *model_filename);
|
||||||
|
|
||||||
DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, DNNData *output);
|
DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, DNNData *outputs, uint32_t nb_output);
|
||||||
|
|
||||||
void ff_dnn_free_model_tf(DNNModel **model);
|
void ff_dnn_free_model_tf(DNNModel **model);
|
||||||
|
|
||||||
|
@@ -26,6 +26,8 @@
|
|||||||
#ifndef AVFILTER_DNN_INTERFACE_H
|
#ifndef AVFILTER_DNN_INTERFACE_H
|
||||||
#define AVFILTER_DNN_INTERFACE_H
|
#define AVFILTER_DNN_INTERFACE_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
typedef enum {DNN_SUCCESS, DNN_ERROR} DNNReturnType;
|
typedef enum {DNN_SUCCESS, DNN_ERROR} DNNReturnType;
|
||||||
|
|
||||||
typedef enum {DNN_NATIVE, DNN_TF} DNNBackendType;
|
typedef enum {DNN_NATIVE, DNN_TF} DNNBackendType;
|
||||||
@@ -40,7 +42,7 @@ typedef struct DNNModel{
|
|||||||
void *model;
|
void *model;
|
||||||
// Sets model input and output.
|
// Sets model input and output.
|
||||||
// Should be called at least once before model execution.
|
// Should be called at least once before model execution.
|
||||||
DNNReturnType (*set_input_output)(void *model, DNNData *input, const char *input_name, const char *output_name);
|
DNNReturnType (*set_input_output)(void *model, DNNData *input, const char *input_name, const char **output_names, uint32_t nb_output);
|
||||||
} DNNModel;
|
} DNNModel;
|
||||||
|
|
||||||
// Stores pointers to functions for loading, executing, freeing DNN models for one of the backends.
|
// Stores pointers to functions for loading, executing, freeing DNN models for one of the backends.
|
||||||
@@ -48,7 +50,7 @@ typedef struct DNNModule{
|
|||||||
// Loads model and parameters from given file. Returns NULL if it is not possible.
|
// Loads model and parameters from given file. Returns NULL if it is not possible.
|
||||||
DNNModel *(*load_model)(const char *model_filename);
|
DNNModel *(*load_model)(const char *model_filename);
|
||||||
// Executes model with specified input and output. Returns DNN_ERROR otherwise.
|
// Executes model with specified input and output. Returns DNN_ERROR otherwise.
|
||||||
DNNReturnType (*execute_model)(const DNNModel *model, DNNData *output);
|
DNNReturnType (*execute_model)(const DNNModel *model, DNNData *outputs, uint32_t nb_output);
|
||||||
// Frees memory allocated for model.
|
// Frees memory allocated for model.
|
||||||
void (*free_model)(DNNModel **model);
|
void (*free_model)(DNNModel **model);
|
||||||
} DNNModule;
|
} DNNModule;
|
||||||
|
@@ -116,18 +116,19 @@ static int config_props(AVFilterLink *inlink)
|
|||||||
AVFilterLink *outlink = context->outputs[0];
|
AVFilterLink *outlink = context->outputs[0];
|
||||||
DNNReturnType result;
|
DNNReturnType result;
|
||||||
int sws_src_h, sws_src_w, sws_dst_h, sws_dst_w;
|
int sws_src_h, sws_src_w, sws_dst_h, sws_dst_w;
|
||||||
|
const char *model_output_name = "y";
|
||||||
|
|
||||||
sr_context->input.width = inlink->w * sr_context->scale_factor;
|
sr_context->input.width = inlink->w * sr_context->scale_factor;
|
||||||
sr_context->input.height = inlink->h * sr_context->scale_factor;
|
sr_context->input.height = inlink->h * sr_context->scale_factor;
|
||||||
sr_context->input.channels = 1;
|
sr_context->input.channels = 1;
|
||||||
|
|
||||||
result = (sr_context->model->set_input_output)(sr_context->model->model, &sr_context->input, "x", "y");
|
result = (sr_context->model->set_input_output)(sr_context->model->model, &sr_context->input, "x", &model_output_name, 1);
|
||||||
if (result != DNN_SUCCESS){
|
if (result != DNN_SUCCESS){
|
||||||
av_log(context, AV_LOG_ERROR, "could not set input and output for the model\n");
|
av_log(context, AV_LOG_ERROR, "could not set input and output for the model\n");
|
||||||
return AVERROR(EIO);
|
return AVERROR(EIO);
|
||||||
}
|
}
|
||||||
|
|
||||||
result = (sr_context->dnn_module->execute_model)(sr_context->model, &sr_context->output);
|
result = (sr_context->dnn_module->execute_model)(sr_context->model, &sr_context->output, 1);
|
||||||
if (result != DNN_SUCCESS){
|
if (result != DNN_SUCCESS){
|
||||||
av_log(context, AV_LOG_ERROR, "failed to execute loaded model\n");
|
av_log(context, AV_LOG_ERROR, "failed to execute loaded model\n");
|
||||||
return AVERROR(EIO);
|
return AVERROR(EIO);
|
||||||
@@ -136,12 +137,12 @@ static int config_props(AVFilterLink *inlink)
|
|||||||
if (sr_context->input.height != sr_context->output.height || sr_context->input.width != sr_context->output.width){
|
if (sr_context->input.height != sr_context->output.height || sr_context->input.width != sr_context->output.width){
|
||||||
sr_context->input.width = inlink->w;
|
sr_context->input.width = inlink->w;
|
||||||
sr_context->input.height = inlink->h;
|
sr_context->input.height = inlink->h;
|
||||||
result = (sr_context->model->set_input_output)(sr_context->model->model, &sr_context->input, "x", "y");
|
result = (sr_context->model->set_input_output)(sr_context->model->model, &sr_context->input, "x", &model_output_name, 1);
|
||||||
if (result != DNN_SUCCESS){
|
if (result != DNN_SUCCESS){
|
||||||
av_log(context, AV_LOG_ERROR, "could not set input and output for the model\n");
|
av_log(context, AV_LOG_ERROR, "could not set input and output for the model\n");
|
||||||
return AVERROR(EIO);
|
return AVERROR(EIO);
|
||||||
}
|
}
|
||||||
result = (sr_context->dnn_module->execute_model)(sr_context->model, &sr_context->output);
|
result = (sr_context->dnn_module->execute_model)(sr_context->model, &sr_context->output, 1);
|
||||||
if (result != DNN_SUCCESS){
|
if (result != DNN_SUCCESS){
|
||||||
av_log(context, AV_LOG_ERROR, "failed to execute loaded model\n");
|
av_log(context, AV_LOG_ERROR, "failed to execute loaded model\n");
|
||||||
return AVERROR(EIO);
|
return AVERROR(EIO);
|
||||||
@@ -256,7 +257,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
|
|||||||
}
|
}
|
||||||
av_frame_free(&in);
|
av_frame_free(&in);
|
||||||
|
|
||||||
dnn_result = (sr_context->dnn_module->execute_model)(sr_context->model, &sr_context->output);
|
dnn_result = (sr_context->dnn_module->execute_model)(sr_context->model, &sr_context->output, 1);
|
||||||
if (dnn_result != DNN_SUCCESS){
|
if (dnn_result != DNN_SUCCESS){
|
||||||
av_log(context, AV_LOG_ERROR, "failed to execute loaded model\n");
|
av_log(context, AV_LOG_ERROR, "failed to execute loaded model\n");
|
||||||
return AVERROR(EIO);
|
return AVERROR(EIO);
|
||||||
|
Reference in New Issue
Block a user