You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-10 06:10:52 +02:00
lavfi/dnn: Remove DNN native backend
According to discussion in https://etherpad.mit.edu/p/FF_dev_meeting_20221202 and the proposal in http://ffmpeg.org/pipermail/ffmpeg-devel/2022-December/304534.html, the DNN native backend should be removed at first step. All the DNN native backend related codes are deleted. Signed-off-by: Ting Fu <ting.fu@intel.com>
This commit is contained in:
@@ -3,16 +3,6 @@ OBJS-$(CONFIG_DNN) += dnn/dnn_io_proc.o
|
||||
OBJS-$(CONFIG_DNN) += dnn/queue.o
|
||||
OBJS-$(CONFIG_DNN) += dnn/safe_queue.o
|
||||
OBJS-$(CONFIG_DNN) += dnn/dnn_backend_common.o
|
||||
OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native.o
|
||||
OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layers.o
|
||||
OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_avgpool.o
|
||||
OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_dense.o
|
||||
OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_pad.o
|
||||
OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_conv2d.o
|
||||
OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_depth2space.o
|
||||
OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_maximum.o
|
||||
OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_mathbinary.o
|
||||
OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_mathunary.o
|
||||
|
||||
DNN-OBJS-$(CONFIG_LIBTENSORFLOW) += dnn/dnn_backend_tf.o
|
||||
DNN-OBJS-$(CONFIG_LIBOPENVINO) += dnn/dnn_backend_openvino.o
|
||||
|
@@ -1,561 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2018 Sergey Lavrushkin
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* DNN native backend implementation.
|
||||
*/
|
||||
|
||||
#include "dnn_backend_native.h"
|
||||
#include "libavutil/avassert.h"
|
||||
#include "dnn_backend_native_layer_conv2d.h"
|
||||
#include "dnn_backend_native_layers.h"
|
||||
#include "dnn_io_proc.h"
|
||||
#include "dnn_backend_common.h"
|
||||
|
||||
#define OFFSET(x) offsetof(NativeContext, x)
|
||||
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM
|
||||
static const AVOption dnn_native_options[] = {
|
||||
{ "conv2d_threads", "threads num for conv2d layer", OFFSET(options.conv2d_threads), AV_OPT_TYPE_INT, { .i64 = 0 }, INT_MIN, INT_MAX, FLAGS },
|
||||
{ "async", "use DNN async inference", OFFSET(options.async), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
|
||||
{ NULL },
|
||||
};
|
||||
|
||||
static const AVClass dnn_native_class = {
|
||||
.class_name = "dnn_native",
|
||||
.item_name = av_default_item_name,
|
||||
.option = dnn_native_options,
|
||||
.version = LIBAVUTIL_VERSION_INT,
|
||||
.category = AV_CLASS_CATEGORY_FILTER,
|
||||
};
|
||||
|
||||
static int execute_model_native(Queue *lltask_queue);
|
||||
|
||||
static int extract_lltask_from_task(TaskItem *task, Queue *lltask_queue)
|
||||
{
|
||||
NativeModel *native_model = task->model;
|
||||
NativeContext *ctx = &native_model->ctx;
|
||||
LastLevelTaskItem *lltask = av_malloc(sizeof(*lltask));
|
||||
|
||||
if (!lltask) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Unable to allocate space for LastLevelTaskItem\n");
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
task->inference_todo = 1;
|
||||
task->inference_done = 0;
|
||||
lltask->task = task;
|
||||
|
||||
if (ff_queue_push_back(lltask_queue, lltask) < 0) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to push back lltask_queue.\n");
|
||||
av_freep(&lltask);
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_input_native(void *model, DNNData *input, const char *input_name)
|
||||
{
|
||||
NativeModel *native_model = model;
|
||||
NativeContext *ctx = &native_model->ctx;
|
||||
|
||||
for (int i = 0; i < native_model->operands_num; ++i) {
|
||||
DnnOperand *oprd = &native_model->operands[i];
|
||||
if (strcmp(oprd->name, input_name) == 0) {
|
||||
if (oprd->type != DOT_INPUT) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Found \"%s\" in model, but it is not input node\n", input_name);
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
input->dt = oprd->data_type;
|
||||
av_assert0(oprd->dims[0] == 1);
|
||||
input->height = oprd->dims[1];
|
||||
input->width = oprd->dims[2];
|
||||
input->channels = oprd->dims[3];
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// do not find the input operand
|
||||
av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", input_name);
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
static int get_output_native(void *model, const char *input_name, int input_width, int input_height,
|
||||
const char *output_name, int *output_width, int *output_height)
|
||||
{
|
||||
int ret = 0;
|
||||
NativeModel *native_model = model;
|
||||
NativeContext *ctx = &native_model->ctx;
|
||||
TaskItem task;
|
||||
DNNExecBaseParams exec_params = {
|
||||
.input_name = input_name,
|
||||
.output_names = &output_name,
|
||||
.nb_output = 1,
|
||||
.in_frame = NULL,
|
||||
.out_frame = NULL,
|
||||
};
|
||||
|
||||
ret = ff_dnn_fill_gettingoutput_task(&task, &exec_params, native_model, input_height, input_width, ctx);
|
||||
if (ret != 0) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = extract_lltask_from_task(&task, native_model->lltask_queue);
|
||||
if (ret != 0) {
|
||||
av_log(ctx, AV_LOG_ERROR, "unable to extract last level task from task.\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = execute_model_native(native_model->lltask_queue);
|
||||
*output_width = task.out_frame->width;
|
||||
*output_height = task.out_frame->height;
|
||||
|
||||
err:
|
||||
av_frame_free(&task.out_frame);
|
||||
av_frame_free(&task.in_frame);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Loads model and its parameters that are stored in a binary file with following structure:
|
||||
// layers_num,layer_type,layer_parameterss,layer_type,layer_parameters...
|
||||
// For CONV layer: activation_function, input_num, output_num, kernel_size, kernel, biases
|
||||
// For DEPTH_TO_SPACE layer: block_size
|
||||
DNNModel *ff_dnn_load_model_native(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx)
|
||||
{
|
||||
#define DNN_NATIVE_MAGIC "FFMPEGDNNNATIVE"
|
||||
DNNModel *model = NULL;
|
||||
// sizeof - 1 to skip the terminating '\0' which is not written in the file
|
||||
char buf[sizeof(DNN_NATIVE_MAGIC) - 1];
|
||||
int version, header_size, major_version_expected = 1;
|
||||
NativeModel *native_model = NULL;
|
||||
AVIOContext *model_file_context;
|
||||
int file_size, dnn_size, parsed_size;
|
||||
int32_t layer;
|
||||
DNNLayerType layer_type;
|
||||
|
||||
if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
|
||||
return NULL;
|
||||
}
|
||||
file_size = avio_size(model_file_context);
|
||||
|
||||
model = av_mallocz(sizeof(DNNModel));
|
||||
if (!model){
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/**
|
||||
* check file header with string and version
|
||||
*/
|
||||
if (avio_read(model_file_context, buf, sizeof(buf)) != sizeof(buf) ||
|
||||
memcmp(buf, DNN_NATIVE_MAGIC, sizeof(buf)))
|
||||
goto fail;
|
||||
dnn_size = sizeof(buf);
|
||||
|
||||
version = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 4;
|
||||
if (version != major_version_expected) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
// currently no need to check minor version
|
||||
version = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 4;
|
||||
header_size = dnn_size;
|
||||
|
||||
native_model = av_mallocz(sizeof(NativeModel));
|
||||
if (!native_model){
|
||||
goto fail;
|
||||
}
|
||||
model->model = native_model;
|
||||
|
||||
native_model->ctx.class = &dnn_native_class;
|
||||
model->options = options;
|
||||
if (av_opt_set_from_string(&native_model->ctx, model->options, NULL, "=", "&") < 0)
|
||||
goto fail;
|
||||
native_model->model = model;
|
||||
|
||||
if (native_model->ctx.options.async) {
|
||||
av_log(&native_model->ctx, AV_LOG_WARNING, "Async not supported. Rolling back to sync\n");
|
||||
native_model->ctx.options.async = 0;
|
||||
}
|
||||
|
||||
#if !HAVE_PTHREAD_CANCEL
|
||||
if (native_model->ctx.options.conv2d_threads > 1){
|
||||
av_log(&native_model->ctx, AV_LOG_WARNING, "'conv2d_threads' option was set but it is not supported "
|
||||
"on this build (pthread support is required)\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
avio_seek(model_file_context, file_size - 8, SEEK_SET);
|
||||
native_model->layers_num = (int32_t)avio_rl32(model_file_context);
|
||||
native_model->operands_num = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 8;
|
||||
avio_seek(model_file_context, header_size, SEEK_SET);
|
||||
|
||||
native_model->layers = av_mallocz(native_model->layers_num * sizeof(Layer));
|
||||
if (!native_model->layers){
|
||||
goto fail;
|
||||
}
|
||||
|
||||
native_model->operands = av_mallocz(native_model->operands_num * sizeof(DnnOperand));
|
||||
if (!native_model->operands){
|
||||
goto fail;
|
||||
}
|
||||
|
||||
native_model->task_queue = ff_queue_create();
|
||||
if (!native_model->task_queue) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
native_model->lltask_queue = ff_queue_create();
|
||||
if (!native_model->lltask_queue) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
for (layer = 0; layer < native_model->layers_num; ++layer){
|
||||
layer_type = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 4;
|
||||
|
||||
if (layer_type >= DLT_COUNT) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
native_model->layers[layer].type = layer_type;
|
||||
parsed_size = ff_layer_funcs[layer_type].pf_load(&native_model->layers[layer], model_file_context, file_size, native_model->operands_num);
|
||||
if (!parsed_size) {
|
||||
goto fail;
|
||||
}
|
||||
dnn_size += parsed_size;
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < native_model->operands_num; ++i){
|
||||
DnnOperand *oprd;
|
||||
int32_t name_len;
|
||||
int32_t operand_index = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 4;
|
||||
|
||||
if (operand_index >= native_model->operands_num) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
oprd = &native_model->operands[operand_index];
|
||||
name_len = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 4;
|
||||
|
||||
avio_get_str(model_file_context, name_len, oprd->name, sizeof(oprd->name));
|
||||
dnn_size += name_len;
|
||||
|
||||
oprd->type = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 4;
|
||||
|
||||
oprd->data_type = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 4;
|
||||
|
||||
for (int32_t dim = 0; dim < 4; ++dim) {
|
||||
oprd->dims[dim] = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 4;
|
||||
}
|
||||
if (oprd->type == DOT_INPUT && oprd->dims[0] != 1)
|
||||
goto fail;
|
||||
|
||||
oprd->isNHWC = 1;
|
||||
}
|
||||
|
||||
avio_closep(&model_file_context);
|
||||
|
||||
if (dnn_size != file_size){
|
||||
ff_dnn_free_model_native(&model);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
model->get_input = &get_input_native;
|
||||
model->get_output = &get_output_native;
|
||||
model->filter_ctx = filter_ctx;
|
||||
model->func_type = func_type;
|
||||
|
||||
return model;
|
||||
|
||||
fail:
|
||||
ff_dnn_free_model_native(&model);
|
||||
avio_closep(&model_file_context);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int execute_model_native(Queue *lltask_queue)
|
||||
{
|
||||
NativeModel *native_model = NULL;
|
||||
NativeContext *ctx = NULL;
|
||||
int32_t layer;
|
||||
DNNData input, output;
|
||||
DnnOperand *oprd = NULL;
|
||||
LastLevelTaskItem *lltask = NULL;
|
||||
TaskItem *task = NULL;
|
||||
int ret = 0;
|
||||
|
||||
lltask = ff_queue_pop_front(lltask_queue);
|
||||
if (!lltask) {
|
||||
av_log(NULL, AV_LOG_ERROR, "Failed to get LastLevelTaskItem\n");
|
||||
ret = AVERROR(EINVAL);
|
||||
goto err;
|
||||
}
|
||||
task = lltask->task;
|
||||
native_model = task->model;
|
||||
ctx = &native_model->ctx;
|
||||
|
||||
if (native_model->layers_num <= 0 || native_model->operands_num <= 0) {
|
||||
av_log(ctx, AV_LOG_ERROR, "No operands or layers in model\n");
|
||||
ret = AVERROR(EINVAL);
|
||||
goto err;
|
||||
}
|
||||
|
||||
for (int i = 0; i < native_model->operands_num; ++i) {
|
||||
oprd = &native_model->operands[i];
|
||||
if (strcmp(oprd->name, task->input_name) == 0) {
|
||||
if (oprd->type != DOT_INPUT) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Found \"%s\" in model, but it is not input node\n", task->input_name);
|
||||
ret = AVERROR(EINVAL);
|
||||
goto err;
|
||||
}
|
||||
break;
|
||||
}
|
||||
oprd = NULL;
|
||||
}
|
||||
if (!oprd) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", task->input_name);
|
||||
ret = AVERROR(EINVAL);
|
||||
goto err;
|
||||
}
|
||||
|
||||
oprd->dims[1] = task->in_frame->height;
|
||||
oprd->dims[2] = task->in_frame->width;
|
||||
|
||||
av_freep(&oprd->data);
|
||||
oprd->length = ff_calculate_operand_data_length(oprd);
|
||||
if (oprd->length <= 0) {
|
||||
av_log(ctx, AV_LOG_ERROR, "The input data length overflow\n");
|
||||
ret = AVERROR(EINVAL);
|
||||
goto err;
|
||||
}
|
||||
oprd->data = av_malloc(oprd->length);
|
||||
if (!oprd->data) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to malloc memory for input data\n");
|
||||
ret = AVERROR(ENOMEM);
|
||||
goto err;
|
||||
}
|
||||
|
||||
input.height = oprd->dims[1];
|
||||
input.width = oprd->dims[2];
|
||||
input.channels = oprd->dims[3];
|
||||
input.data = oprd->data;
|
||||
input.dt = oprd->data_type;
|
||||
if (task->do_ioproc) {
|
||||
if (native_model->model->frame_pre_proc != NULL) {
|
||||
native_model->model->frame_pre_proc(task->in_frame, &input, native_model->model->filter_ctx);
|
||||
} else {
|
||||
ff_proc_from_frame_to_dnn(task->in_frame, &input, ctx);
|
||||
}
|
||||
}
|
||||
|
||||
if (task->nb_output != 1) {
|
||||
// currently, the filter does not need multiple outputs,
|
||||
// so we just pending the support until we really need it.
|
||||
avpriv_report_missing_feature(ctx, "multiple outputs");
|
||||
ret = AVERROR(ENOSYS);
|
||||
goto err;
|
||||
}
|
||||
|
||||
for (layer = 0; layer < native_model->layers_num; ++layer){
|
||||
DNNLayerType layer_type = native_model->layers[layer].type;
|
||||
ret = ff_layer_funcs[layer_type].pf_exec(native_model->operands,
|
||||
native_model->layers[layer].input_operand_indexes,
|
||||
native_model->layers[layer].output_operand_index,
|
||||
native_model->layers[layer].params,
|
||||
&native_model->ctx);
|
||||
if (ret != 0) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to execute model\n");
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < task->nb_output; ++i) {
|
||||
DnnOperand *oprd = NULL;
|
||||
const char *output_name = task->output_names[i];
|
||||
for (int j = 0; j < native_model->operands_num; ++j) {
|
||||
if (strcmp(native_model->operands[j].name, output_name) == 0) {
|
||||
oprd = &native_model->operands[j];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (oprd == NULL) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Could not find output in model\n");
|
||||
ret = AVERROR(EINVAL);
|
||||
goto err;
|
||||
}
|
||||
|
||||
output.data = oprd->data;
|
||||
output.height = oprd->dims[1];
|
||||
output.width = oprd->dims[2];
|
||||
output.channels = oprd->dims[3];
|
||||
output.dt = oprd->data_type;
|
||||
|
||||
if (task->do_ioproc) {
|
||||
if (native_model->model->frame_post_proc != NULL) {
|
||||
native_model->model->frame_post_proc(task->out_frame, &output, native_model->model->filter_ctx);
|
||||
} else {
|
||||
ff_proc_from_dnn_to_frame(task->out_frame, &output, ctx);
|
||||
}
|
||||
} else {
|
||||
task->out_frame->width = output.width;
|
||||
task->out_frame->height = output.height;
|
||||
}
|
||||
}
|
||||
task->inference_done++;
|
||||
err:
|
||||
av_freep(&lltask);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ff_dnn_execute_model_native(const DNNModel *model, DNNExecBaseParams *exec_params)
|
||||
{
|
||||
NativeModel *native_model = model->model;
|
||||
NativeContext *ctx = &native_model->ctx;
|
||||
TaskItem *task;
|
||||
int ret = 0;
|
||||
|
||||
ret = ff_check_exec_params(ctx, DNN_NATIVE, model->func_type, exec_params);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
task = av_malloc(sizeof(*task));
|
||||
if (!task) {
|
||||
av_log(ctx, AV_LOG_ERROR, "unable to alloc memory for task item.\n");
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
ret = ff_dnn_fill_task(task, exec_params, native_model, ctx->options.async, 1);
|
||||
if (ret != 0) {
|
||||
av_freep(&task);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (ff_queue_push_back(native_model->task_queue, task) < 0) {
|
||||
av_freep(&task);
|
||||
av_log(ctx, AV_LOG_ERROR, "unable to push back task_queue.\n");
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
ret = extract_lltask_from_task(task, native_model->lltask_queue);
|
||||
if (ret != 0) {
|
||||
av_log(ctx, AV_LOG_ERROR, "unable to extract last level task from task.\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return execute_model_native(native_model->lltask_queue);
|
||||
}
|
||||
|
||||
int ff_dnn_flush_native(const DNNModel *model)
|
||||
{
|
||||
NativeModel *native_model = model->model;
|
||||
|
||||
if (ff_queue_size(native_model->lltask_queue) == 0) {
|
||||
// no pending task need to flush
|
||||
return 0;
|
||||
}
|
||||
|
||||
// for now, use sync node with flush operation
|
||||
// Switch to async when it is supported
|
||||
return execute_model_native(native_model->lltask_queue);
|
||||
}
|
||||
|
||||
DNNAsyncStatusType ff_dnn_get_result_native(const DNNModel *model, AVFrame **in, AVFrame **out)
|
||||
{
|
||||
NativeModel *native_model = model->model;
|
||||
return ff_dnn_get_result_common(native_model->task_queue, in, out);
|
||||
}
|
||||
|
||||
int32_t ff_calculate_operand_dims_count(const DnnOperand *oprd)
|
||||
{
|
||||
int32_t result = 1;
|
||||
for (int i = 0; i < 4; ++i)
|
||||
result *= oprd->dims[i];
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int32_t ff_calculate_operand_data_length(const DnnOperand* oprd)
|
||||
{
|
||||
// currently, we just support DNN_FLOAT
|
||||
uint64_t len = sizeof(float);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
len *= oprd->dims[i];
|
||||
if (len > INT32_MAX)
|
||||
return 0;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
void ff_dnn_free_model_native(DNNModel **model)
|
||||
{
|
||||
NativeModel *native_model;
|
||||
ConvolutionalParams *conv_params;
|
||||
int32_t layer;
|
||||
|
||||
if (*model)
|
||||
{
|
||||
if ((*model)->model) {
|
||||
native_model = (*model)->model;
|
||||
if (native_model->layers) {
|
||||
for (layer = 0; layer < native_model->layers_num; ++layer){
|
||||
if (native_model->layers[layer].type == DLT_CONV2D){
|
||||
conv_params = (ConvolutionalParams *)native_model->layers[layer].params;
|
||||
av_freep(&conv_params->kernel);
|
||||
av_freep(&conv_params->biases);
|
||||
}
|
||||
av_freep(&native_model->layers[layer].params);
|
||||
}
|
||||
av_freep(&native_model->layers);
|
||||
}
|
||||
|
||||
if (native_model->operands) {
|
||||
for (uint32_t operand = 0; operand < native_model->operands_num; ++operand)
|
||||
av_freep(&native_model->operands[operand].data);
|
||||
av_freep(&native_model->operands);
|
||||
}
|
||||
|
||||
while (ff_queue_size(native_model->lltask_queue) != 0) {
|
||||
LastLevelTaskItem *item = ff_queue_pop_front(native_model->lltask_queue);
|
||||
av_freep(&item);
|
||||
}
|
||||
ff_queue_destroy(native_model->lltask_queue);
|
||||
|
||||
while (ff_queue_size(native_model->task_queue) != 0) {
|
||||
TaskItem *item = ff_queue_pop_front(native_model->task_queue);
|
||||
av_frame_free(&item->in_frame);
|
||||
av_frame_free(&item->out_frame);
|
||||
av_freep(&item);
|
||||
}
|
||||
ff_queue_destroy(native_model->task_queue);
|
||||
|
||||
av_freep(&native_model);
|
||||
}
|
||||
av_freep(model);
|
||||
}
|
||||
}
|
@@ -1,149 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2018 Sergey Lavrushkin
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* DNN inference functions interface for native backend.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_H
|
||||
#define AVFILTER_DNN_DNN_BACKEND_NATIVE_H
|
||||
|
||||
#include "../dnn_interface.h"
|
||||
#include "libavformat/avio.h"
|
||||
#include "libavutil/opt.h"
|
||||
#include "queue.h"
|
||||
|
||||
/**
|
||||
* the enum value of DNNLayerType should not be changed,
|
||||
* the same values are used in convert_from_tensorflow.py
|
||||
* and, it is used to index the layer execution/load function pointer.
|
||||
*/
|
||||
typedef enum {
|
||||
DLT_INPUT = 0,
|
||||
DLT_CONV2D = 1,
|
||||
DLT_DEPTH_TO_SPACE = 2,
|
||||
DLT_MIRROR_PAD = 3,
|
||||
DLT_MAXIMUM = 4,
|
||||
DLT_MATH_BINARY = 5,
|
||||
DLT_MATH_UNARY = 6,
|
||||
DLT_AVG_POOL = 7,
|
||||
DLT_DENSE = 8,
|
||||
DLT_COUNT
|
||||
} DNNLayerType;
|
||||
|
||||
typedef enum {DOT_INPUT = 1, DOT_OUTPUT = 2, DOT_INTERMEDIATE = DOT_INPUT | DOT_OUTPUT} DNNOperandType;
|
||||
typedef enum {VALID, SAME, SAME_CLAMP_TO_EDGE} DNNPaddingParam;
|
||||
typedef enum {RELU, TANH, SIGMOID, NONE, LEAKY_RELU} DNNActivationFunc;
|
||||
|
||||
typedef struct Layer{
|
||||
DNNLayerType type;
|
||||
/**
|
||||
* a layer can have multiple inputs and one output.
|
||||
* 4 is just a big enough number for input operands (increase it if necessary),
|
||||
* do not use 'int32_t *input_operand_indexes', so we don't worry about mem leaks.
|
||||
*/
|
||||
int32_t input_operand_indexes[4];
|
||||
int32_t output_operand_index;
|
||||
void *params;
|
||||
} Layer;
|
||||
|
||||
typedef struct DnnOperand{
|
||||
/**
|
||||
* there are two memory layouts, NHWC or NCHW, so we use dims,
|
||||
* dims[0] is Number.
|
||||
*/
|
||||
int32_t dims[4];
|
||||
|
||||
/**
|
||||
* input/output/intermediate operand of the network
|
||||
*/
|
||||
DNNOperandType type;
|
||||
|
||||
/**
|
||||
* support different kinds of data type such as float, half float, int8 etc,
|
||||
* first support float now.
|
||||
*/
|
||||
DNNDataType data_type;
|
||||
|
||||
/**
|
||||
* NHWC if 1, otherwise NCHW.
|
||||
* let's first support NHWC only, this flag is for extensive usage.
|
||||
*/
|
||||
int8_t isNHWC;
|
||||
|
||||
/**
|
||||
* to avoid possible memory leak, do not use char *name
|
||||
*/
|
||||
char name[128];
|
||||
|
||||
/**
|
||||
* data pointer with data length in bytes.
|
||||
* usedNumbersLeft is only valid for intermediate operand,
|
||||
* it means how many layers still depend on this operand,
|
||||
* todo: the memory can be reused when usedNumbersLeft is zero.
|
||||
*/
|
||||
void *data;
|
||||
int32_t length;
|
||||
int32_t usedNumbersLeft;
|
||||
}DnnOperand;
|
||||
|
||||
typedef struct InputParams{
|
||||
int height, width, channels;
|
||||
} InputParams;
|
||||
|
||||
typedef struct NativeOptions{
|
||||
uint8_t async;
|
||||
uint32_t conv2d_threads;
|
||||
} NativeOptions;
|
||||
|
||||
typedef struct NativeContext {
|
||||
const AVClass *class;
|
||||
NativeOptions options;
|
||||
} NativeContext;
|
||||
|
||||
// Represents simple feed-forward convolutional network.
|
||||
typedef struct NativeModel{
|
||||
NativeContext ctx;
|
||||
DNNModel *model;
|
||||
Layer *layers;
|
||||
int32_t layers_num;
|
||||
DnnOperand *operands;
|
||||
int32_t operands_num;
|
||||
Queue *task_queue;
|
||||
Queue *lltask_queue;
|
||||
} NativeModel;
|
||||
|
||||
DNNModel *ff_dnn_load_model_native(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx);
|
||||
|
||||
int ff_dnn_execute_model_native(const DNNModel *model, DNNExecBaseParams *exec_params);
|
||||
|
||||
DNNAsyncStatusType ff_dnn_get_result_native(const DNNModel *model, AVFrame **in, AVFrame **out);
|
||||
|
||||
int ff_dnn_flush_native(const DNNModel *model);
|
||||
|
||||
void ff_dnn_free_model_native(DNNModel **model);
|
||||
|
||||
// NOTE: User must check for error (return value <= 0) to handle
|
||||
// case like integer overflow.
|
||||
int32_t ff_calculate_operand_data_length(const DnnOperand *oprd);
|
||||
int32_t ff_calculate_operand_dims_count(const DnnOperand *oprd);
|
||||
#endif
|
@@ -1,147 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2020
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* DNN native backend implementation.
|
||||
*/
|
||||
|
||||
#include "libavutil/avassert.h"
|
||||
#include "dnn_backend_native_layer_avgpool.h"
|
||||
|
||||
int ff_dnn_load_layer_avg_pool(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num)
|
||||
{
|
||||
AvgPoolParams *avgpool_params;
|
||||
int dnn_size = 0;
|
||||
avgpool_params = av_malloc(sizeof(*avgpool_params));
|
||||
if(!avgpool_params)
|
||||
return 0;
|
||||
|
||||
avgpool_params->strides = (int32_t)avio_rl32(model_file_context);
|
||||
avgpool_params->padding_method = (int32_t)avio_rl32(model_file_context);
|
||||
avgpool_params->kernel_size = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 12;
|
||||
|
||||
if (dnn_size > file_size || avgpool_params->kernel_size <= 0 || avgpool_params->strides <=0){
|
||||
av_freep(&avgpool_params);
|
||||
return 0;
|
||||
}
|
||||
|
||||
layer->params = avgpool_params;
|
||||
layer->input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
|
||||
layer->output_operand_index = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 8;
|
||||
|
||||
if (layer->input_operand_indexes[0] >= operands_num || layer->output_operand_index >= operands_num) {
|
||||
return 0;
|
||||
}
|
||||
return dnn_size;
|
||||
}
|
||||
|
||||
int ff_dnn_execute_layer_avg_pool(DnnOperand *operands, const int32_t *input_operand_indexes,
|
||||
int32_t output_operand_index, const void *parameters, NativeContext *ctx)
|
||||
{
|
||||
float *output;
|
||||
int height_end, width_end, height_radius, width_radius, output_height, output_width, kernel_area;
|
||||
int32_t input_operand_index = input_operand_indexes[0];
|
||||
int number = operands[input_operand_index].dims[0];
|
||||
int height = operands[input_operand_index].dims[1];
|
||||
int width = operands[input_operand_index].dims[2];
|
||||
int channel = operands[input_operand_index].dims[3];
|
||||
const float *input = operands[input_operand_index].data;
|
||||
const AvgPoolParams *avgpool_params = parameters;
|
||||
|
||||
int kernel_strides = avgpool_params->strides;
|
||||
int src_linesize = width * channel;
|
||||
DnnOperand *output_operand = &operands[output_operand_index];
|
||||
|
||||
/**
|
||||
* When padding_method = SAME, the tensorflow will only padding the hald number of 0 pixels
|
||||
* except the remainders.
|
||||
* Eg: assuming the input height = 1080, the strides = 11, so the remainders = 1080 % 11 = 2
|
||||
* and if ksize = 5: it will fill (5 - 2) >> 1 = 1 line before the first line of input image,
|
||||
* and 5 - 2 - 1 = 2 lines after the last line of input image.
|
||||
* and if ksize = 7: it will fill (7 - 2) >> 1 = 2 lines before the first line of input image,
|
||||
* and 7 - 2 - 2 = 3 lines after the last line of input image.
|
||||
*/
|
||||
if (avgpool_params->padding_method == SAME) {
|
||||
height_end = height;
|
||||
width_end = width;
|
||||
height_radius = avgpool_params->kernel_size - ((height - 1) % kernel_strides + 1);
|
||||
width_radius = avgpool_params->kernel_size - ((width - 1) % kernel_strides + 1);
|
||||
height_radius = height_radius < 0 ? 0 : height_radius >> 1;
|
||||
width_radius = width_radius < 0 ? 0 : width_radius >> 1;
|
||||
output_height = ceil(height / (kernel_strides * 1.0));
|
||||
output_width = ceil(width / (kernel_strides * 1.0));
|
||||
} else {
|
||||
av_assert0(avgpool_params->padding_method == VALID);
|
||||
height_end = height - avgpool_params->kernel_size + 1;
|
||||
width_end = width - avgpool_params->kernel_size + 1;
|
||||
height_radius = 0;
|
||||
width_radius = 0;
|
||||
output_height = ceil((height - avgpool_params->kernel_size + 1) / (kernel_strides * 1.0));
|
||||
output_width = ceil((width - avgpool_params->kernel_size + 1) / (kernel_strides * 1.0));
|
||||
}
|
||||
|
||||
output_operand->dims[0] = number;
|
||||
output_operand->dims[1] = output_height;
|
||||
output_operand->dims[2] = output_width;
|
||||
// not support pooling in channel dimension now
|
||||
output_operand->dims[3] = channel;
|
||||
output_operand->data_type = operands[input_operand_index].data_type;
|
||||
output_operand->length = ff_calculate_operand_data_length(output_operand);
|
||||
if (output_operand->length <= 0) {
|
||||
av_log(ctx, AV_LOG_ERROR, "The output data length overflow\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
output_operand->data = av_realloc(output_operand->data, output_operand->length);
|
||||
if (!output_operand->data) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to reallocate memory for output\n");
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
output = output_operand->data;
|
||||
|
||||
for (int y = 0; y < height_end; y += kernel_strides) {
|
||||
for (int x = 0; x < width_end; x += kernel_strides) {
|
||||
for (int n_channel = 0; n_channel < channel; ++n_channel) {
|
||||
output[n_channel] = 0.0;
|
||||
kernel_area = 0;
|
||||
for (int kernel_y = 0; kernel_y < avgpool_params->kernel_size; ++kernel_y) {
|
||||
for (int kernel_x = 0; kernel_x < avgpool_params->kernel_size; ++kernel_x) {
|
||||
float input_pel;
|
||||
int y_pos = y + (kernel_y - height_radius);
|
||||
int x_pos = x + (kernel_x - width_radius);
|
||||
if (x_pos < 0 || x_pos >= width || y_pos < 0 || y_pos >= height) {
|
||||
input_pel = 0.0;
|
||||
} else {
|
||||
kernel_area++;
|
||||
input_pel = input[y_pos * src_linesize + x_pos * channel + n_channel];
|
||||
}
|
||||
output[n_channel] += input_pel;
|
||||
}
|
||||
}
|
||||
output[n_channel] /= kernel_area;
|
||||
}
|
||||
output += channel;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@@ -1,69 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2020
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* DNN inference functions interface for native backend.
|
||||
*/
|
||||
|
||||
#ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_AVGPOOL_H
|
||||
#define AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_AVGPOOL_H
|
||||
|
||||
#include "dnn_backend_native.h"
|
||||
|
||||
typedef struct AvgPoolParams{
|
||||
int32_t strides, kernel_size;
|
||||
DNNPaddingParam padding_method;
|
||||
} AvgPoolParams;
|
||||
|
||||
/**
|
||||
* @brief Load Average Pooling Layer.
|
||||
*
|
||||
* It assigns the Average Pooling layer with AvgPoolParams
|
||||
* after parsing from the model file context.
|
||||
*
|
||||
* @param layer pointer to the DNN layer instance
|
||||
* @param model_file_context pointer to model file context
|
||||
* @param file_size model file size to check if data is read
|
||||
* correctly from the model file
|
||||
* @param operands_num operand count of the whole model to
|
||||
* check if data is read correctly from the model file
|
||||
* @return number of bytes read from the model file
|
||||
* @retval 0 if out of memory or an error occurs
|
||||
*/
|
||||
int ff_dnn_load_layer_avg_pool(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num);
|
||||
|
||||
/**
|
||||
* @brief Execute the Average Pooling Layer.
|
||||
* Padding in channel dimensions is currently not supported.
|
||||
*
|
||||
* @param operands all operands for the model
|
||||
* @param input_operand_indexes input operand indexes for this layer
|
||||
* @param output_operand_index output operand index for this layer
|
||||
* @param parameters average pooling parameters
|
||||
* @param ctx pointer to Native model context for logging
|
||||
* @retval 0 if the execution succeeds
|
||||
* @retval AVERROR(ENOMEM) if memory allocation fails
|
||||
* @retval AVERROR(EINVAL) for invalid arguments
|
||||
*/
|
||||
int ff_dnn_execute_layer_avg_pool(DnnOperand *operands, const int32_t *input_operand_indexes,
|
||||
int32_t output_operand_index, const void *parameters, NativeContext *ctx);
|
||||
|
||||
#endif
|
@@ -1,265 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2018 Sergey Lavrushkin
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/avassert.h"
|
||||
#include "libavutil/thread.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "dnn_backend_native_layer_conv2d.h"
|
||||
|
||||
#define CLAMP_TO_EDGE(x, w) ((x) < 0 ? 0 : ((x) >= (w) ? (w - 1) : (x)))
|
||||
|
||||
//struct to pass parameters
|
||||
typedef struct ThreadCommonParam{
|
||||
DnnOperand *operands;
|
||||
const int32_t *input_operand_indexes;
|
||||
int32_t output_operand_index;
|
||||
const void *parameters;
|
||||
NativeContext *ctx;
|
||||
float *output_data;
|
||||
} ThreadCommonParam;
|
||||
|
||||
typedef struct ThreadParam{
|
||||
ThreadCommonParam *thread_common_param;
|
||||
int thread_start, thread_end;
|
||||
#if HAVE_PTHREAD_CANCEL
|
||||
pthread_t thread;
|
||||
#endif
|
||||
} ThreadParam;
|
||||
|
||||
int ff_dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num)
|
||||
{
|
||||
ConvolutionalParams *conv_params;
|
||||
int kernel_size;
|
||||
int dnn_size = 0;
|
||||
conv_params = av_malloc(sizeof(*conv_params));
|
||||
if (!conv_params)
|
||||
return 0;
|
||||
|
||||
conv_params->dilation = (int32_t)avio_rl32(model_file_context);
|
||||
conv_params->padding_method = (int32_t)avio_rl32(model_file_context);
|
||||
conv_params->activation = (int32_t)avio_rl32(model_file_context);
|
||||
conv_params->input_num = (int32_t)avio_rl32(model_file_context);
|
||||
conv_params->output_num = (int32_t)avio_rl32(model_file_context);
|
||||
conv_params->kernel_size = (int32_t)avio_rl32(model_file_context);
|
||||
conv_params->has_bias = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 28;
|
||||
|
||||
kernel_size = conv_params->input_num * conv_params->output_num *
|
||||
conv_params->kernel_size * conv_params->kernel_size;
|
||||
dnn_size += kernel_size * 4;
|
||||
if (conv_params->has_bias)
|
||||
dnn_size += conv_params->output_num * 4;
|
||||
|
||||
if (dnn_size > file_size || conv_params->input_num <= 0 ||
|
||||
conv_params->output_num <= 0 || conv_params->kernel_size <= 0){
|
||||
av_freep(&conv_params);
|
||||
return 0;
|
||||
}
|
||||
|
||||
conv_params->kernel = av_malloc_array(kernel_size, sizeof(*conv_params->kernel));
|
||||
if (!conv_params->kernel) {
|
||||
av_freep(&conv_params);
|
||||
return 0;
|
||||
}
|
||||
for (int i = 0; i < kernel_size; ++i) {
|
||||
conv_params->kernel[i] = av_int2float(avio_rl32(model_file_context));
|
||||
}
|
||||
|
||||
conv_params->biases = NULL;
|
||||
if (conv_params->has_bias) {
|
||||
conv_params->biases = av_malloc_array(conv_params->output_num, sizeof(*conv_params->biases));
|
||||
if (!conv_params->biases){
|
||||
av_freep(&conv_params->kernel);
|
||||
av_freep(&conv_params);
|
||||
return 0;
|
||||
}
|
||||
for (int i = 0; i < conv_params->output_num; ++i){
|
||||
conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
|
||||
}
|
||||
}
|
||||
|
||||
layer->params = conv_params;
|
||||
|
||||
layer->input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
|
||||
layer->output_operand_index = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 8;
|
||||
|
||||
if (layer->input_operand_indexes[0] >= operands_num || layer->output_operand_index >= operands_num) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return dnn_size;
|
||||
}
|
||||
|
||||
static void * dnn_execute_layer_conv2d_thread(void *threadarg)
|
||||
{
|
||||
//pass parameters
|
||||
ThreadParam *thread_param = threadarg;
|
||||
ThreadCommonParam *thread_common_param = thread_param->thread_common_param;
|
||||
DnnOperand *operands = thread_common_param->operands;
|
||||
int32_t input_operand_index = thread_common_param->input_operand_indexes[0];
|
||||
int height = operands[input_operand_index].dims[1];
|
||||
int width = operands[input_operand_index].dims[2];
|
||||
int channel = operands[input_operand_index].dims[3];
|
||||
const float *input = operands[input_operand_index].data;
|
||||
const ConvolutionalParams *conv_params = thread_common_param->parameters;
|
||||
|
||||
int radius = conv_params->kernel_size >> 1;
|
||||
int src_linesize = width * conv_params->input_num;
|
||||
int filter_linesize = conv_params->kernel_size * conv_params->input_num;
|
||||
int filter_size = conv_params->kernel_size * filter_linesize;
|
||||
int pad_size = (conv_params->padding_method == VALID) ? (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0;
|
||||
|
||||
float *output = thread_common_param->output_data;
|
||||
output += (conv_params->output_num) * (width - 2 * pad_size) * (thread_param->thread_start - pad_size);
|
||||
|
||||
av_assert0(channel == conv_params->input_num);
|
||||
|
||||
for (int y = thread_param->thread_start; y < thread_param->thread_end; ++y) {
|
||||
for (int x = pad_size; x < width - pad_size; ++x) {
|
||||
for (int n_filter = 0; n_filter < conv_params->output_num; ++n_filter) {
|
||||
if (conv_params->has_bias)
|
||||
output[n_filter] = conv_params->biases[n_filter];
|
||||
else
|
||||
output[n_filter] = 0.f;
|
||||
|
||||
for (int ch = 0; ch < conv_params->input_num; ++ch) {
|
||||
for (int kernel_y = 0; kernel_y < conv_params->kernel_size; ++kernel_y) {
|
||||
for (int kernel_x = 0; kernel_x < conv_params->kernel_size; ++kernel_x) {
|
||||
float input_pel;
|
||||
if (conv_params->padding_method == SAME_CLAMP_TO_EDGE) {
|
||||
int y_pos = CLAMP_TO_EDGE(y + (kernel_y - radius) * conv_params->dilation, height);
|
||||
int x_pos = CLAMP_TO_EDGE(x + (kernel_x - radius) * conv_params->dilation, width);
|
||||
input_pel = input[y_pos * src_linesize + x_pos * conv_params->input_num + ch];
|
||||
} else {
|
||||
int y_pos = y + (kernel_y - radius) * conv_params->dilation;
|
||||
int x_pos = x + (kernel_x - radius) * conv_params->dilation;
|
||||
input_pel = (x_pos < 0 || x_pos >= width || y_pos < 0 || y_pos >= height) ? 0.0 :
|
||||
input[y_pos * src_linesize + x_pos * conv_params->input_num + ch];
|
||||
}
|
||||
|
||||
|
||||
output[n_filter] += input_pel * conv_params->kernel[n_filter * filter_size + kernel_y * filter_linesize +
|
||||
kernel_x * conv_params->input_num + ch];
|
||||
}
|
||||
}
|
||||
}
|
||||
switch (conv_params->activation){
|
||||
case RELU:
|
||||
output[n_filter] = FFMAX(output[n_filter], 0.0);
|
||||
break;
|
||||
case TANH:
|
||||
output[n_filter] = 2.0f / (1.0f + exp(-2.0f * output[n_filter])) - 1.0f;
|
||||
break;
|
||||
case SIGMOID:
|
||||
output[n_filter] = 1.0f / (1.0f + exp(-output[n_filter]));
|
||||
break;
|
||||
case NONE:
|
||||
break;
|
||||
case LEAKY_RELU:
|
||||
output[n_filter] = FFMAX(output[n_filter], 0.0) + 0.2 * FFMIN(output[n_filter], 0.0);
|
||||
}
|
||||
}
|
||||
output += conv_params->output_num;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
int ff_dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_indexes,
|
||||
int32_t output_operand_index, const void *parameters, NativeContext *ctx)
|
||||
{
|
||||
#if HAVE_PTHREAD_CANCEL
|
||||
int thread_num = (ctx->options.conv2d_threads <= 0 || ctx->options.conv2d_threads > av_cpu_count())
|
||||
? (av_cpu_count() + 1) : (ctx->options.conv2d_threads);
|
||||
int ret = 0, thread_stride;
|
||||
ThreadParam *thread_param;
|
||||
#else
|
||||
ThreadParam thread_param = { 0 };
|
||||
#endif
|
||||
ThreadCommonParam thread_common_param;
|
||||
const ConvolutionalParams *conv_params = parameters;
|
||||
int height = operands[input_operand_indexes[0]].dims[1];
|
||||
int width = operands[input_operand_indexes[0]].dims[2];
|
||||
int pad_size = (conv_params->padding_method == VALID) ? (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0;
|
||||
DnnOperand *output_operand = &operands[output_operand_index];
|
||||
void *tmp;
|
||||
|
||||
output_operand->dims[0] = operands[input_operand_indexes[0]].dims[0];
|
||||
output_operand->dims[1] = height - pad_size * 2;
|
||||
output_operand->dims[2] = width - pad_size * 2;
|
||||
output_operand->dims[3] = conv_params->output_num;
|
||||
output_operand->data_type = operands[input_operand_indexes[0]].data_type;
|
||||
output_operand->length = ff_calculate_operand_data_length(output_operand);
|
||||
if (output_operand->length <= 0) {
|
||||
av_log(ctx, AV_LOG_ERROR, "The output data length overflow\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
tmp = av_realloc(output_operand->data, output_operand->length);
|
||||
if (!tmp) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to reallocate memory for output\n");
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
output_operand->data = tmp;
|
||||
thread_common_param.output_data = output_operand->data;
|
||||
thread_common_param.operands = operands;
|
||||
thread_common_param.input_operand_indexes = input_operand_indexes;
|
||||
thread_common_param.output_operand_index = output_operand_index;
|
||||
thread_common_param.parameters = parameters;
|
||||
thread_common_param.ctx = ctx;
|
||||
|
||||
#if HAVE_PTHREAD_CANCEL
|
||||
thread_param = av_malloc_array(thread_num, sizeof(*thread_param));
|
||||
if (!thread_param)
|
||||
return AVERROR(ENOMEM);
|
||||
thread_stride = (height - pad_size * 2) / thread_num;
|
||||
//create threads
|
||||
for (int i = 0; i < thread_num; i++){
|
||||
int thread_ret = 0;
|
||||
thread_param[i].thread_common_param = &thread_common_param;
|
||||
thread_param[i].thread_start = thread_stride * i + pad_size;
|
||||
thread_param[i].thread_end = (i == thread_num - 1) ? (height - pad_size) : (thread_param[i].thread_start + thread_stride);
|
||||
thread_ret = pthread_create(&thread_param[i].thread, NULL,
|
||||
dnn_execute_layer_conv2d_thread, &thread_param[i]);
|
||||
if (thread_ret) {
|
||||
thread_num = i;
|
||||
ret = AVERROR(thread_ret);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < thread_num; i++){
|
||||
pthread_join(thread_param[i].thread, NULL);
|
||||
}
|
||||
|
||||
//release memory
|
||||
av_freep(&thread_param);
|
||||
|
||||
return ret;
|
||||
#else
|
||||
thread_param.thread_common_param = &thread_common_param;
|
||||
thread_param.thread_start = pad_size;
|
||||
thread_param.thread_end = height - pad_size;
|
||||
dnn_execute_layer_conv2d_thread(&thread_param);
|
||||
|
||||
return 0;
|
||||
#endif
|
||||
}
|
@@ -1,68 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2018 Sergey Lavrushkin
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_CONV2D_H
|
||||
#define AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_CONV2D_H
|
||||
|
||||
#include "dnn_backend_native.h"
|
||||
|
||||
|
||||
typedef struct ConvolutionalParams{
|
||||
int32_t input_num, output_num, kernel_size;
|
||||
DNNActivationFunc activation;
|
||||
DNNPaddingParam padding_method;
|
||||
int32_t dilation;
|
||||
int32_t has_bias;
|
||||
float *kernel;
|
||||
float *biases;
|
||||
} ConvolutionalParams;
|
||||
|
||||
/**
|
||||
* @brief Load the 2D Convolution Layer.
|
||||
*
|
||||
* It assigns the 2D convolution layer with ConvolutionalParams
|
||||
* after parsing from the model file context.
|
||||
*
|
||||
* @param layer pointer to the DNN layer instance
|
||||
* @param model_file_context pointer to model file context
|
||||
* @param file_size model file size to check if data is read
|
||||
* correctly from the model file
|
||||
* @param operands_num operand count of the whole model to
|
||||
* check if data is read correctly from the model file
|
||||
* @return number of bytes read from the model file
|
||||
* @retval 0 if out of memory or an error occurs
|
||||
*/
|
||||
int ff_dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num);
|
||||
|
||||
/**
|
||||
* @brief Execute the 2D Convolution Layer.
|
||||
*
|
||||
* @param operands all operands for the model
|
||||
* @param input_operand_indexes input operand indexes for this layer
|
||||
* @param output_operand_index output operand index for this layer
|
||||
* @param parameters convolution parameters
|
||||
* @param ctx pointer to Native model context for logging
|
||||
* @retval 0 if the execution succeeds
|
||||
* @retval AVERROR(ENOMEM) if memory allocation fails
|
||||
* @retval AVERROR(EINVAL) for invalid arguments
|
||||
*/
|
||||
int ff_dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_indexes,
|
||||
int32_t output_operand_index, const void *parameters, NativeContext *ctx);
|
||||
#endif
|
@@ -1,151 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2020
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/avassert.h"
|
||||
#include "dnn_backend_native_layer_dense.h"
|
||||
|
||||
int ff_dnn_load_layer_dense(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num)
|
||||
{
|
||||
DenseParams *dense_params;
|
||||
int kernel_size;
|
||||
int dnn_size = 0;
|
||||
dense_params = av_malloc(sizeof(*dense_params));
|
||||
if (!dense_params)
|
||||
return 0;
|
||||
|
||||
dense_params->activation = (int32_t)avio_rl32(model_file_context);
|
||||
dense_params->input_num = (int32_t)avio_rl32(model_file_context);
|
||||
dense_params->output_num = (int32_t)avio_rl32(model_file_context);
|
||||
dense_params->has_bias = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 16;
|
||||
|
||||
kernel_size = dense_params->input_num * dense_params->output_num;
|
||||
dnn_size += kernel_size * 4;
|
||||
if (dense_params->has_bias)
|
||||
dnn_size += dense_params->output_num * 4;
|
||||
|
||||
if (dnn_size > file_size || dense_params->input_num <= 0 ||
|
||||
dense_params->output_num <= 0){
|
||||
av_freep(&dense_params);
|
||||
return 0;
|
||||
}
|
||||
|
||||
dense_params->kernel = av_malloc(kernel_size * sizeof(float));
|
||||
if (!dense_params->kernel) {
|
||||
av_freep(&dense_params);
|
||||
return 0;
|
||||
}
|
||||
for (int i = 0; i < kernel_size; ++i) {
|
||||
dense_params->kernel[i] = av_int2float(avio_rl32(model_file_context));
|
||||
}
|
||||
|
||||
dense_params->biases = NULL;
|
||||
if (dense_params->has_bias) {
|
||||
dense_params->biases = av_malloc(dense_params->output_num * sizeof(float));
|
||||
if (!dense_params->biases){
|
||||
av_freep(&dense_params->kernel);
|
||||
av_freep(&dense_params);
|
||||
return 0;
|
||||
}
|
||||
for (int i = 0; i < dense_params->output_num; ++i){
|
||||
dense_params->biases[i] = av_int2float(avio_rl32(model_file_context));
|
||||
}
|
||||
}
|
||||
|
||||
layer->params = dense_params;
|
||||
|
||||
layer->input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
|
||||
layer->output_operand_index = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 8;
|
||||
|
||||
if (layer->input_operand_indexes[0] >= operands_num || layer->output_operand_index >= operands_num) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return dnn_size;
|
||||
}
|
||||
|
||||
int ff_dnn_execute_layer_dense(DnnOperand *operands, const int32_t *input_operand_indexes,
|
||||
int32_t output_operand_index, const void *parameters, NativeContext *ctx)
|
||||
{
|
||||
float *output;
|
||||
int32_t input_operand_index = input_operand_indexes[0];
|
||||
int number = operands[input_operand_index].dims[0];
|
||||
int height = operands[input_operand_index].dims[1];
|
||||
int width = operands[input_operand_index].dims[2];
|
||||
int channel = operands[input_operand_index].dims[3];
|
||||
const float *input = operands[input_operand_index].data;
|
||||
const DenseParams *dense_params = parameters;
|
||||
|
||||
int src_linesize = width * channel;
|
||||
DnnOperand *output_operand = &operands[output_operand_index];
|
||||
output_operand->dims[0] = number;
|
||||
output_operand->dims[1] = height;
|
||||
output_operand->dims[2] = width;
|
||||
output_operand->dims[3] = dense_params->output_num;
|
||||
output_operand->data_type = operands[input_operand_index].data_type;
|
||||
output_operand->length = ff_calculate_operand_data_length(output_operand);
|
||||
if (output_operand->length <= 0) {
|
||||
av_log(ctx, AV_LOG_ERROR, "The output data length overflow\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
output_operand->data = av_realloc(output_operand->data, output_operand->length);
|
||||
if (!output_operand->data) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to reallocate memory for output\n");
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
output = output_operand->data;
|
||||
|
||||
av_assert0(channel == dense_params->input_num);
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
for (int n_filter = 0; n_filter < dense_params->output_num; ++n_filter) {
|
||||
if (dense_params->has_bias)
|
||||
output[n_filter] = dense_params->biases[n_filter];
|
||||
else
|
||||
output[n_filter] = 0.f;
|
||||
|
||||
for (int ch = 0; ch < dense_params->input_num; ++ch) {
|
||||
float input_pel;
|
||||
input_pel = input[y * src_linesize + x * dense_params->input_num + ch];
|
||||
output[n_filter] += input_pel * dense_params->kernel[n_filter*dense_params->input_num + ch];
|
||||
}
|
||||
switch (dense_params->activation){
|
||||
case RELU:
|
||||
output[n_filter] = FFMAX(output[n_filter], 0.0);
|
||||
break;
|
||||
case TANH:
|
||||
output[n_filter] = 2.0f / (1.0f + exp(-2.0f * output[n_filter])) - 1.0f;
|
||||
break;
|
||||
case SIGMOID:
|
||||
output[n_filter] = 1.0f / (1.0f + exp(-output[n_filter]));
|
||||
break;
|
||||
case NONE:
|
||||
break;
|
||||
case LEAKY_RELU:
|
||||
output[n_filter] = FFMAX(output[n_filter], 0.0) + 0.2 * FFMIN(output[n_filter], 0.0);
|
||||
}
|
||||
}
|
||||
output += dense_params->output_num;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
@@ -1,65 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2020
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_DENSE_H
|
||||
#define AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_DENSE_H
|
||||
|
||||
#include "dnn_backend_native.h"
|
||||
|
||||
typedef struct DenseParams{
|
||||
int32_t input_num, output_num;
|
||||
DNNActivationFunc activation;
|
||||
int32_t has_bias;
|
||||
float *kernel;
|
||||
float *biases;
|
||||
} DenseParams;
|
||||
|
||||
/**
|
||||
* @brief Load the Densely-Connected Layer.
|
||||
*
|
||||
* It assigns the densely connected layer with DenseParams
|
||||
* after parsing from the model file context.
|
||||
*
|
||||
* @param layer pointer to the DNN layer instance
|
||||
* @param model_file_context pointer to model file context
|
||||
* @param file_size model file size to check if data is read
|
||||
* correctly from the model file
|
||||
* @param operands_num operand count of the whole model to
|
||||
* check if data is read correctly from the model file
|
||||
* @return number of bytes read from the model file
|
||||
* @retval 0 if out of memory or an error occurs
|
||||
*/
|
||||
int ff_dnn_load_layer_dense(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num);
|
||||
|
||||
/**
|
||||
* @brief Execute the Densely-Connected Layer.
|
||||
*
|
||||
* @param operands all operands for the model
|
||||
* @param input_operand_indexes input operand indexes for this layer
|
||||
* @param output_operand_index output operand index for this layer
|
||||
* @param parameters dense layer parameters
|
||||
* @param ctx pointer to Native model context for logging
|
||||
* @retval 0 if the execution succeeds
|
||||
* @retval AVERROR(ENOMEM) if memory allocation fails
|
||||
* @retval AVERROR(EINVAL) for invalid arguments
|
||||
*/
|
||||
int ff_dnn_execute_layer_dense(DnnOperand *operands, const int32_t *input_operand_indexes,
|
||||
int32_t output_operand_index, const void *parameters, NativeContext *ctx);
|
||||
#endif
|
@@ -1,102 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2018 Sergey Lavrushkin
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* DNN native backend implementation.
|
||||
*/
|
||||
|
||||
#include "dnn_backend_native.h"
|
||||
#include "dnn_backend_native_layer_depth2space.h"
|
||||
|
||||
int ff_dnn_load_layer_depth2space(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num)
|
||||
{
|
||||
DepthToSpaceParams *params;
|
||||
int dnn_size = 0;
|
||||
params = av_malloc(sizeof(*params));
|
||||
if (!params)
|
||||
return 0;
|
||||
|
||||
params->block_size = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 4;
|
||||
layer->input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
|
||||
layer->output_operand_index = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 8;
|
||||
layer->params = params;
|
||||
|
||||
if (layer->input_operand_indexes[0] >= operands_num || layer->output_operand_index >= operands_num) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return dnn_size;
|
||||
}
|
||||
|
||||
int ff_dnn_execute_layer_depth2space(DnnOperand *operands, const int32_t *input_operand_indexes,
|
||||
int32_t output_operand_index, const void *parameters, NativeContext *ctx)
|
||||
{
|
||||
float *output;
|
||||
const DepthToSpaceParams *params = parameters;
|
||||
int block_size = params->block_size;
|
||||
int32_t input_operand_index = input_operand_indexes[0];
|
||||
int number = operands[input_operand_index].dims[0];
|
||||
int height = operands[input_operand_index].dims[1];
|
||||
int width = operands[input_operand_index].dims[2];
|
||||
int channels = operands[input_operand_index].dims[3];
|
||||
const float *input = operands[input_operand_index].data;
|
||||
|
||||
int y, x, by, bx, ch;
|
||||
int new_channels = channels / (block_size * block_size);
|
||||
int output_linesize = width * channels;
|
||||
int by_linesize = output_linesize / block_size;
|
||||
int x_linesize = new_channels * block_size;
|
||||
|
||||
DnnOperand *output_operand = &operands[output_operand_index];
|
||||
output_operand->dims[0] = number;
|
||||
output_operand->dims[1] = height * block_size;
|
||||
output_operand->dims[2] = width * block_size;
|
||||
output_operand->dims[3] = new_channels;
|
||||
output_operand->data_type = operands[input_operand_index].data_type;
|
||||
output_operand->length = ff_calculate_operand_data_length(output_operand);
|
||||
if (output_operand->length <= 0) {
|
||||
av_log(ctx, AV_LOG_ERROR, "The output data length overflow\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
output_operand->data = av_realloc(output_operand->data, output_operand->length);
|
||||
if (!output_operand->data) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to reallocate memory for output\n");
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
output = output_operand->data;
|
||||
|
||||
for (y = 0; y < height; ++y){
|
||||
for (x = 0; x < width; ++x){
|
||||
for (by = 0; by < block_size; ++by){
|
||||
for (bx = 0; bx < block_size; ++bx){
|
||||
for (ch = 0; ch < new_channels; ++ch){
|
||||
output[by * by_linesize + x * x_linesize + bx * new_channels + ch] = input[ch];
|
||||
}
|
||||
input += new_channels;
|
||||
}
|
||||
}
|
||||
}
|
||||
output += output_linesize;
|
||||
}
|
||||
return 0;
|
||||
}
|
@@ -1,72 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2018 Sergey Lavrushkin
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* DNN inference functions interface for native backend.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_DEPTH2SPACE_H
|
||||
#define AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_DEPTH2SPACE_H
|
||||
|
||||
#include "../dnn_interface.h"
|
||||
#include "libavformat/avio.h"
|
||||
|
||||
typedef struct DepthToSpaceParams{
|
||||
int block_size;
|
||||
} DepthToSpaceParams;
|
||||
|
||||
/**
|
||||
* @brief Load the Depth to Space Layer.
|
||||
*
|
||||
* It assigns the depth to space layer with DepthToSpaceParams
|
||||
* after parsing from the model file context.
|
||||
*
|
||||
* @param layer pointer to the DNN layer instance
|
||||
* @param model_file_context pointer to model file context
|
||||
* @param file_size model file size to check if data is read
|
||||
* correctly from the model file
|
||||
* @param operands_num operand count of the whole model to
|
||||
* check if data is read correctly from the model file
|
||||
* @return number of bytes read from the model file
|
||||
* @retval 0 if an error occurs or out of memory
|
||||
*/
|
||||
int ff_dnn_load_layer_depth2space(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num);
|
||||
|
||||
/**
|
||||
* @brief Execute the Depth to Space Layer.
|
||||
*
|
||||
* It rearranges the input data from depth into spatial
|
||||
* form by applying Depth to Space transformation.
|
||||
*
|
||||
* @param operands all operands for the model
|
||||
* @param input_operand_indexes input operand indexes for this layer
|
||||
* @param output_operand_index output operand index for this layer
|
||||
* @param parameters depth to space layer parameters
|
||||
* @param ctx pointer to Native model context for logging
|
||||
* @retval 0 if the execution succeeds
|
||||
* @retval AVERROR(ENOMEM) if memory allocation fails
|
||||
* @retval AVERROR(EINVAL) for invalid arguments
|
||||
*/
|
||||
int ff_dnn_execute_layer_depth2space(DnnOperand *operands, const int32_t *input_operand_indexes,
|
||||
int32_t output_operand_index, const void *parameters, NativeContext *ctx);
|
||||
|
||||
#endif
|
@@ -1,193 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2020
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* DNN native backend implementation.
|
||||
*/
|
||||
|
||||
#include "dnn_backend_native.h"
|
||||
#include "dnn_backend_native_layer_mathbinary.h"
|
||||
|
||||
typedef float (*FunType)(float src0, float src1);
|
||||
|
||||
static float sub(float src0, float src1)
|
||||
{
|
||||
return src0 - src1;
|
||||
}
|
||||
static float add(float src0, float src1)
|
||||
{
|
||||
return src0 + src1;
|
||||
}
|
||||
static float mul(float src0, float src1)
|
||||
{
|
||||
return src0 * src1;
|
||||
}
|
||||
static float realdiv(float src0, float src1)
|
||||
{
|
||||
return src0 / src1;
|
||||
}
|
||||
static float minimum(float src0, float src1)
|
||||
{
|
||||
return FFMIN(src0, src1);
|
||||
}
|
||||
static float floormod(float src0, float src1)
|
||||
{
|
||||
return (float)((int)(src0) % (int)(src1));
|
||||
}
|
||||
|
||||
static void math_binary_commutative(FunType pfun, const DnnLayerMathBinaryParams *params, const DnnOperand *input, DnnOperand *output, DnnOperand *operands, const int32_t *input_operand_indexes)
|
||||
{
|
||||
int dims_count;
|
||||
const float *src;
|
||||
float *dst;
|
||||
dims_count = ff_calculate_operand_dims_count(output);
|
||||
src = input->data;
|
||||
dst = output->data;
|
||||
if (params->input0_broadcast || params->input1_broadcast) {
|
||||
for (int i = 0; i < dims_count; ++i) {
|
||||
dst[i] = pfun(params->v, src[i]);
|
||||
}
|
||||
} else {
|
||||
const DnnOperand *input1 = &operands[input_operand_indexes[1]];
|
||||
const float *src1 = input1->data;
|
||||
for (int i = 0; i < dims_count; ++i) {
|
||||
dst[i] = pfun(src[i], src1[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
static void math_binary_not_commutative(FunType pfun, const DnnLayerMathBinaryParams *params, const DnnOperand *input, DnnOperand *output, DnnOperand *operands, const int32_t *input_operand_indexes)
|
||||
{
|
||||
int dims_count;
|
||||
const float *src;
|
||||
float *dst;
|
||||
dims_count = ff_calculate_operand_dims_count(output);
|
||||
src = input->data;
|
||||
dst = output->data;
|
||||
if (params->input0_broadcast) {
|
||||
for (int i = 0; i < dims_count; ++i) {
|
||||
dst[i] = pfun(params->v, src[i]);
|
||||
}
|
||||
} else if (params->input1_broadcast) {
|
||||
for (int i = 0; i < dims_count; ++i) {
|
||||
dst[i] = pfun(src[i], params->v);
|
||||
}
|
||||
} else {
|
||||
const DnnOperand *input1 = &operands[input_operand_indexes[1]];
|
||||
const float *src1 = input1->data;
|
||||
for (int i = 0; i < dims_count; ++i) {
|
||||
dst[i] = pfun(src[i], src1[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
int ff_dnn_load_layer_math_binary(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num)
|
||||
{
|
||||
DnnLayerMathBinaryParams params = { 0 };
|
||||
int dnn_size = 0;
|
||||
int input_index = 0;
|
||||
|
||||
params.bin_op = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 4;
|
||||
|
||||
params.input0_broadcast = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 4;
|
||||
if (params.input0_broadcast) {
|
||||
params.v = av_int2float(avio_rl32(model_file_context));
|
||||
} else {
|
||||
layer->input_operand_indexes[input_index] = (int32_t)avio_rl32(model_file_context);
|
||||
if (layer->input_operand_indexes[input_index] >= operands_num) {
|
||||
return 0;
|
||||
}
|
||||
input_index++;
|
||||
}
|
||||
dnn_size += 4;
|
||||
|
||||
params.input1_broadcast = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 4;
|
||||
if (params.input1_broadcast) {
|
||||
params.v = av_int2float(avio_rl32(model_file_context));
|
||||
} else {
|
||||
layer->input_operand_indexes[input_index] = (int32_t)avio_rl32(model_file_context);
|
||||
if (layer->input_operand_indexes[input_index] >= operands_num) {
|
||||
return 0;
|
||||
}
|
||||
input_index++;
|
||||
}
|
||||
dnn_size += 4;
|
||||
|
||||
layer->output_operand_index = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 4;
|
||||
|
||||
if (layer->output_operand_index >= operands_num) {
|
||||
return 0;
|
||||
}
|
||||
layer->params = av_memdup(¶ms, sizeof(params));
|
||||
if (!layer->params)
|
||||
return 0;
|
||||
|
||||
return dnn_size;
|
||||
}
|
||||
|
||||
int ff_dnn_execute_layer_math_binary(DnnOperand *operands, const int32_t *input_operand_indexes,
|
||||
int32_t output_operand_index, const void *parameters, NativeContext *ctx)
|
||||
{
|
||||
const DnnOperand *input = &operands[input_operand_indexes[0]];
|
||||
DnnOperand *output = &operands[output_operand_index];
|
||||
const DnnLayerMathBinaryParams *params = parameters;
|
||||
|
||||
for (int i = 0; i < 4; ++i)
|
||||
output->dims[i] = input->dims[i];
|
||||
|
||||
output->data_type = input->data_type;
|
||||
output->length = ff_calculate_operand_data_length(output);
|
||||
if (output->length <= 0) {
|
||||
av_log(ctx, AV_LOG_ERROR, "The output data length overflow\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
output->data = av_realloc(output->data, output->length);
|
||||
if (!output->data) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to reallocate memory for output\n");
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
switch (params->bin_op) {
|
||||
case DMBO_SUB:
|
||||
math_binary_not_commutative(sub, params, input, output, operands, input_operand_indexes);
|
||||
return 0;
|
||||
case DMBO_ADD:
|
||||
math_binary_commutative(add, params, input, output, operands, input_operand_indexes);
|
||||
return 0;
|
||||
case DMBO_MUL:
|
||||
math_binary_commutative(mul, params, input, output, operands, input_operand_indexes);
|
||||
return 0;
|
||||
case DMBO_REALDIV:
|
||||
math_binary_not_commutative(realdiv, params, input, output, operands, input_operand_indexes);
|
||||
return 0;
|
||||
case DMBO_MINIMUM:
|
||||
math_binary_commutative(minimum, params, input, output, operands, input_operand_indexes);
|
||||
return 0;
|
||||
case DMBO_FLOORMOD:
|
||||
math_binary_not_commutative(floormod, params, input, output, operands, input_operand_indexes);
|
||||
return 0;
|
||||
default:
|
||||
av_log(ctx, AV_LOG_ERROR, "Unmatch math binary operator\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
}
|
@@ -1,54 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2020
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* DNN inference functions interface for native backend.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_MATHBINARY_H
|
||||
#define AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_MATHBINARY_H
|
||||
|
||||
#include "libavformat/avio.h"
|
||||
#include "dnn_backend_native.h"
|
||||
|
||||
typedef enum {
|
||||
DMBO_SUB = 0,
|
||||
DMBO_ADD = 1,
|
||||
DMBO_MUL = 2,
|
||||
DMBO_REALDIV = 3,
|
||||
DMBO_MINIMUM = 4,
|
||||
DMBO_FLOORMOD = 5,
|
||||
DMBO_COUNT
|
||||
} DNNMathBinaryOperation;
|
||||
|
||||
typedef struct DnnLayerMathBinaryParams{
|
||||
DNNMathBinaryOperation bin_op;
|
||||
int input0_broadcast;
|
||||
int input1_broadcast;
|
||||
float v;
|
||||
} DnnLayerMathBinaryParams;
|
||||
|
||||
int ff_dnn_load_layer_math_binary(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num);
|
||||
int ff_dnn_execute_layer_math_binary(DnnOperand *operands, const int32_t *input_operand_indexes,
|
||||
int32_t output_operand_index, const void *parameters, NativeContext *ctx);
|
||||
|
||||
#endif
|
@@ -1,156 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2020
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* DNN native backend implementation.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "dnn_backend_native.h"
|
||||
#include "dnn_backend_native_layer_mathunary.h"
|
||||
|
||||
int ff_dnn_load_layer_math_unary(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num)
|
||||
{
|
||||
DnnLayerMathUnaryParams *params;
|
||||
int dnn_size = 0;
|
||||
params = av_malloc(sizeof(*params));
|
||||
if(!params)
|
||||
return 0;
|
||||
|
||||
params->un_op = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 4;
|
||||
layer->params = params;
|
||||
layer->input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
|
||||
layer->output_operand_index = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 8;
|
||||
|
||||
if (layer->input_operand_indexes[0] >= operands_num || layer->output_operand_index >= operands_num) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return dnn_size;
|
||||
|
||||
}
|
||||
|
||||
int ff_dnn_execute_layer_math_unary(DnnOperand *operands, const int32_t *input_operand_indexes,
|
||||
int32_t output_operand_index, const void *parameters, NativeContext *ctx)
|
||||
{
|
||||
const DnnOperand *input = &operands[input_operand_indexes[0]];
|
||||
DnnOperand *output = &operands[output_operand_index];
|
||||
const DnnLayerMathUnaryParams *params = parameters;
|
||||
int dims_count;
|
||||
const float *src;
|
||||
float *dst;
|
||||
|
||||
for (int i = 0; i < 4; ++i)
|
||||
output->dims[i] = input->dims[i];
|
||||
|
||||
output->data_type = input->data_type;
|
||||
output->length = ff_calculate_operand_data_length(output);
|
||||
if (output->length <= 0) {
|
||||
av_log(ctx, AV_LOG_ERROR, "The output data length overflow\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
output->data = av_realloc(output->data, output->length);
|
||||
if (!output->data) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to reallocate memory for output\n");
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
dims_count = ff_calculate_operand_dims_count(output);
|
||||
src = input->data;
|
||||
dst = output->data;
|
||||
|
||||
switch (params->un_op) {
|
||||
case DMUO_ABS:
|
||||
for (int i = 0; i < dims_count; ++i)
|
||||
dst[i] = FFABS(src[i]);
|
||||
return 0;
|
||||
case DMUO_SIN:
|
||||
for (int i = 0; i < dims_count; ++i)
|
||||
dst[i] = sin(src[i]);
|
||||
return 0;
|
||||
case DMUO_COS:
|
||||
for (int i = 0; i < dims_count; ++i)
|
||||
dst[i] = cos(src[i]);
|
||||
return 0;
|
||||
case DMUO_TAN:
|
||||
for (int i = 0; i < dims_count; ++i)
|
||||
dst[i] = tan(src[i]);
|
||||
return 0;
|
||||
case DMUO_ASIN:
|
||||
for (int i = 0; i < dims_count; ++i)
|
||||
dst[i] = asin(src[i]);
|
||||
return 0;
|
||||
case DMUO_ACOS:
|
||||
for (int i = 0; i < dims_count; ++i)
|
||||
dst[i] = acos(src[i]);
|
||||
return 0;
|
||||
case DMUO_ATAN:
|
||||
for (int i = 0; i < dims_count; ++i)
|
||||
dst[i] = atan(src[i]);
|
||||
return 0;
|
||||
case DMUO_SINH:
|
||||
for (int i = 0; i < dims_count; ++i)
|
||||
dst[i] = sinh(src[i]);
|
||||
return 0;
|
||||
case DMUO_COSH:
|
||||
for (int i = 0; i < dims_count; ++i)
|
||||
dst[i] = cosh(src[i]);
|
||||
return 0;
|
||||
case DMUO_TANH:
|
||||
for (int i = 0; i < dims_count; ++i)
|
||||
dst[i] = tanh(src[i]);
|
||||
return 0;
|
||||
case DMUO_ASINH:
|
||||
for (int i = 0; i < dims_count; ++i)
|
||||
dst[i] = asinh(src[i]);
|
||||
return 0;
|
||||
case DMUO_ACOSH:
|
||||
for (int i = 0; i < dims_count; ++i)
|
||||
dst[i] = acosh(src[i]);
|
||||
return 0;
|
||||
case DMUO_ATANH:
|
||||
for (int i = 0; i < dims_count; ++i)
|
||||
dst[i] = atanh(src[i]);
|
||||
return 0;
|
||||
case DMUO_CEIL:
|
||||
for (int i = 0; i < dims_count; ++i)
|
||||
dst[i] = ceil(src[i]);
|
||||
return 0;
|
||||
case DMUO_FLOOR:
|
||||
for (int i = 0; i < dims_count; ++i)
|
||||
dst[i] = floor(src[i]);
|
||||
return 0;
|
||||
case DMUO_ROUND:
|
||||
for (int i = 0; i < dims_count; ++i)
|
||||
dst[i] = round(src[i]);
|
||||
return 0;
|
||||
case DMUO_EXP:
|
||||
for (int i = 0; i < dims_count; ++i)
|
||||
dst[i] = exp(src[i]);
|
||||
return 0;
|
||||
default:
|
||||
av_log(ctx, AV_LOG_ERROR, "Unmatch math unary operator\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
}
|
@@ -1,92 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2020
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* DNN inference functions interface for native backend.
|
||||
*/
|
||||
|
||||
#ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_MATHUNARY_H
|
||||
#define AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_MATHUNARY_H
|
||||
|
||||
#include "libavformat/avio.h"
|
||||
#include "dnn_backend_native.h"
|
||||
|
||||
typedef enum {
|
||||
DMUO_ABS = 0,
|
||||
DMUO_SIN = 1,
|
||||
DMUO_COS = 2,
|
||||
DMUO_TAN = 3,
|
||||
DMUO_ASIN = 4,
|
||||
DMUO_ACOS = 5,
|
||||
DMUO_ATAN = 6,
|
||||
DMUO_SINH = 7,
|
||||
DMUO_COSH = 8,
|
||||
DMUO_TANH = 9,
|
||||
DMUO_ASINH = 10,
|
||||
DMUO_ACOSH = 11,
|
||||
DMUO_ATANH = 12,
|
||||
DMUO_CEIL = 13,
|
||||
DMUO_FLOOR = 14,
|
||||
DMUO_ROUND = 15,
|
||||
DMUO_EXP = 16,
|
||||
DMUO_COUNT
|
||||
} DNNMathUnaryOperation;
|
||||
|
||||
typedef struct DnnLayerMathUnaryParams{
|
||||
DNNMathUnaryOperation un_op;
|
||||
} DnnLayerMathUnaryParams;
|
||||
|
||||
/**
|
||||
* @brief Load the Unary Math Layer.
|
||||
*
|
||||
* It assigns the unary math layer with DnnLayerMathUnaryParams
|
||||
* after parsing from the model file context.
|
||||
*
|
||||
* @param layer pointer to the DNN layer instance
|
||||
* @param model_file_context pointer to model file context
|
||||
* @param file_size model file size to check if data is read
|
||||
* correctly from the model file
|
||||
* @param operands_num operand count of the whole model to
|
||||
* check if data is read correctly from the model file
|
||||
* @return number of bytes read from the model file
|
||||
* @retval 0 if out of memory or an error occurs
|
||||
*/
|
||||
int ff_dnn_load_layer_math_unary(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num);
|
||||
|
||||
/**
|
||||
* @brief Execute the Unary Math Layer.
|
||||
*
|
||||
* It applies the unary operator parsed while
|
||||
* loading to the given input operands.
|
||||
*
|
||||
* @param operands all operands for the model
|
||||
* @param input_operand_indexes input operand indexes for this layer
|
||||
* @param output_operand_index output operand index for this layer
|
||||
* @param parameters unary math layer parameters
|
||||
* @param ctx pointer to Native model context for logging
|
||||
* @retval 0 if the execution succeeds
|
||||
* @retval AVERROR(ENOMEM) if memory allocation fails
|
||||
* @retval AVERROR(EINVAL) for invalid arguments
|
||||
*/
|
||||
int ff_dnn_execute_layer_math_unary(DnnOperand *operands, const int32_t *input_operand_indexes,
|
||||
int32_t output_operand_index, const void *parameters, NativeContext *ctx);
|
||||
|
||||
#endif
|
@@ -1,83 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2019 Guo Yejun
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* DNN native backend implementation.
|
||||
*/
|
||||
|
||||
#include "dnn_backend_native.h"
|
||||
#include "dnn_backend_native_layer_maximum.h"
|
||||
|
||||
int ff_dnn_load_layer_maximum(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num)
|
||||
{
|
||||
DnnLayerMaximumParams *params;
|
||||
int dnn_size = 0;
|
||||
params = av_malloc(sizeof(*params));
|
||||
if (!params)
|
||||
return 0;
|
||||
|
||||
params->val.u32 = avio_rl32(model_file_context);
|
||||
dnn_size += 4;
|
||||
layer->params = params;
|
||||
layer->input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
|
||||
layer->output_operand_index = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 8;
|
||||
|
||||
if (layer->input_operand_indexes[0] >= operands_num || layer->output_operand_index >= operands_num) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return dnn_size;
|
||||
}
|
||||
|
||||
int ff_dnn_execute_layer_maximum(DnnOperand *operands, const int32_t *input_operand_indexes,
|
||||
int32_t output_operand_index, const void *parameters, NativeContext *ctx)
|
||||
{
|
||||
const DnnOperand *input = &operands[input_operand_indexes[0]];
|
||||
DnnOperand *output = &operands[output_operand_index];
|
||||
const DnnLayerMaximumParams *params = parameters;
|
||||
int dims_count;
|
||||
const float *src;
|
||||
float *dst;
|
||||
|
||||
for (int i = 0; i < 4; ++i)
|
||||
output->dims[i] = input->dims[i];
|
||||
|
||||
output->data_type = input->data_type;
|
||||
output->length = ff_calculate_operand_data_length(output);
|
||||
if (output->length <= 0) {
|
||||
av_log(ctx, AV_LOG_ERROR, "The output data length overflow\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
output->data = av_realloc(output->data, output->length);
|
||||
if (!output->data) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to reallocate memory for output\n");
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
dims_count = ff_calculate_operand_dims_count(output);
|
||||
src = input->data;
|
||||
dst = output->data;
|
||||
for (int i = 0; i < dims_count; ++i)
|
||||
dst[i] = FFMAX(src[i], params->val.y);
|
||||
|
||||
return 0;
|
||||
}
|
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2019 Guo Yejun
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* DNN inference functions interface for native backend.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_MAXIMUM_H
|
||||
#define AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_MAXIMUM_H
|
||||
|
||||
#include "libavformat/avio.h"
|
||||
#include "dnn_backend_native.h"
|
||||
|
||||
typedef struct DnnLayerMaximumParams{
|
||||
union {
|
||||
uint32_t u32;
|
||||
float y;
|
||||
}val;
|
||||
} DnnLayerMaximumParams;
|
||||
|
||||
int ff_dnn_load_layer_maximum(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num);
|
||||
int ff_dnn_execute_layer_maximum(DnnOperand *operands, const int32_t *input_operand_indexes,
|
||||
int32_t output_operand_index, const void *parameters, NativeContext *ctx);
|
||||
|
||||
#endif
|
@@ -1,268 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2019 Guo Yejun
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "libavutil/avassert.h"
|
||||
#include "dnn_backend_native_layer_pad.h"
|
||||
|
||||
int ff_dnn_load_layer_pad(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num)
|
||||
{
|
||||
LayerPadParams *params;
|
||||
int dnn_size = 0;
|
||||
params = av_malloc(sizeof(*params));
|
||||
if (!params)
|
||||
return 0;
|
||||
|
||||
params->mode = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 4;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
params->paddings[i][0] = avio_rl32(model_file_context);
|
||||
params->paddings[i][1] = avio_rl32(model_file_context);
|
||||
dnn_size += 8;
|
||||
}
|
||||
layer->input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
|
||||
layer->output_operand_index = (int32_t)avio_rl32(model_file_context);
|
||||
dnn_size += 8;
|
||||
layer->params = params;
|
||||
|
||||
if (layer->input_operand_indexes[0] >= operands_num || layer->output_operand_index >= operands_num) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return dnn_size;
|
||||
}
|
||||
|
||||
static int before_get_buddy(int given, int paddings, LayerPadModeParam mode)
|
||||
{
|
||||
if (mode == LPMP_SYMMETRIC) {
|
||||
return (2 * paddings - 1 - given);
|
||||
} else if (mode == LPMP_REFLECT) {
|
||||
return (2 * paddings - given);
|
||||
} else {
|
||||
av_assert0(!"should not reach here");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int after_get_buddy(int given, int border, LayerPadModeParam mode)
|
||||
{
|
||||
if (mode == LPMP_SYMMETRIC) {
|
||||
int offset = given - border;
|
||||
return (border - 1 - offset);
|
||||
} else if (mode == LPMP_REFLECT) {
|
||||
int offset = given - border;
|
||||
return (border - 2 - offset);
|
||||
} else {
|
||||
av_assert0(!"should not reach here");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int ff_dnn_execute_layer_pad(DnnOperand *operands, const int32_t *input_operand_indexes,
|
||||
int32_t output_operand_index, const void *parameters, NativeContext *ctx)
|
||||
{
|
||||
int32_t before_paddings;
|
||||
int32_t after_paddings;
|
||||
float* output;
|
||||
const LayerPadParams *params = parameters;
|
||||
|
||||
// suppose format is <N, H, W, C>
|
||||
int32_t input_operand_index = input_operand_indexes[0];
|
||||
int number = operands[input_operand_index].dims[0];
|
||||
int height = operands[input_operand_index].dims[1];
|
||||
int width = operands[input_operand_index].dims[2];
|
||||
int channel = operands[input_operand_index].dims[3];
|
||||
const float *input = operands[input_operand_index].data;
|
||||
|
||||
int new_number = number + params->paddings[0][0] + params->paddings[0][1];
|
||||
int new_height = height + params->paddings[1][0] + params->paddings[1][1];
|
||||
int new_width = width + params->paddings[2][0] + params->paddings[2][1];
|
||||
int new_channel = channel + params->paddings[3][0] + params->paddings[3][1];
|
||||
|
||||
int c_stride = channel;
|
||||
int wc_stride = c_stride * width;
|
||||
int hwc_stride = wc_stride * height;
|
||||
|
||||
int new_c_stride = new_channel;
|
||||
int new_wc_stride = new_c_stride * new_width;
|
||||
int new_hwc_stride = new_wc_stride * new_height;
|
||||
|
||||
DnnOperand *output_operand = &operands[output_operand_index];
|
||||
output_operand->dims[0] = new_number;
|
||||
output_operand->dims[1] = new_height;
|
||||
output_operand->dims[2] = new_width;
|
||||
output_operand->dims[3] = new_channel;
|
||||
output_operand->data_type = operands[input_operand_index].data_type;
|
||||
output_operand->length = ff_calculate_operand_data_length(output_operand);
|
||||
if (output_operand->length <= 0) {
|
||||
av_log(ctx, AV_LOG_ERROR, "The output data length overflow\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
output_operand->data = av_realloc(output_operand->data, output_operand->length);
|
||||
if (!output_operand->data) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to reallocate memory for output\n");
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
output = output_operand->data;
|
||||
|
||||
// copy the original data
|
||||
for (int n = 0; n < number; n++) {
|
||||
for (int h = 0; h < height; h++) {
|
||||
for (int w = 0; w < width; w++) {
|
||||
const float *src = input + n * hwc_stride + h * wc_stride + w * c_stride;
|
||||
float *dst = output + (n + params->paddings[0][0]) * new_hwc_stride
|
||||
+ (h + params->paddings[1][0]) * new_wc_stride
|
||||
+ (w + params->paddings[2][0]) * new_c_stride
|
||||
+ params->paddings[3][0];
|
||||
memcpy(dst, src, channel * sizeof(float));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handle the first dimension
|
||||
before_paddings = params->paddings[0][0];
|
||||
after_paddings = params->paddings[0][1];
|
||||
for (int n = 0; n < before_paddings; n++) {
|
||||
float *dst = output + n * new_hwc_stride;
|
||||
if (params->mode == LPMP_CONSTANT) {
|
||||
for (int i = 0; i < new_hwc_stride; i++) {
|
||||
dst[i] = params->constant_values;
|
||||
}
|
||||
}
|
||||
else {
|
||||
int buddy = before_get_buddy(n, before_paddings, params->mode);
|
||||
float *src = output + buddy * new_hwc_stride;
|
||||
memcpy(dst, src, new_hwc_stride * sizeof(float));
|
||||
}
|
||||
}
|
||||
for (int n = 0; n < after_paddings; n++) {
|
||||
int given = number + before_paddings + n;
|
||||
float *dst = output + given * new_hwc_stride;
|
||||
if (params->mode == LPMP_CONSTANT) {
|
||||
for (int i = 0; i < new_hwc_stride; i++) {
|
||||
dst[i] = params->constant_values;
|
||||
}
|
||||
} else {
|
||||
int buddy = after_get_buddy(given, number + before_paddings, params->mode);
|
||||
float *src = output + buddy * new_hwc_stride;
|
||||
memcpy(dst, src, new_hwc_stride * sizeof(float));
|
||||
}
|
||||
}
|
||||
|
||||
// handle the second dimension
|
||||
before_paddings = params->paddings[1][0];
|
||||
after_paddings = params->paddings[1][1];
|
||||
for (int n = 0; n < new_number; n++) {
|
||||
float *start = output + n * new_hwc_stride;
|
||||
for (int h = 0; h < before_paddings; h++) {
|
||||
float *dst = start + h * new_wc_stride;
|
||||
if (params->mode == LPMP_CONSTANT) {
|
||||
for (int i = 0; i < new_wc_stride; i++) {
|
||||
dst[i] = params->constant_values;
|
||||
}
|
||||
} else {
|
||||
int buddy = before_get_buddy(h, before_paddings, params->mode);
|
||||
float *src = start + buddy * new_wc_stride;
|
||||
memcpy(dst, src, new_wc_stride * sizeof(float));
|
||||
}
|
||||
}
|
||||
for (int h = 0; h < after_paddings; h++) {
|
||||
int given = height + before_paddings + h;
|
||||
float *dst = start + given * new_wc_stride;
|
||||
if (params->mode == LPMP_CONSTANT) {
|
||||
for (int i = 0; i < new_wc_stride; i++) {
|
||||
dst[i] = params->constant_values;
|
||||
}
|
||||
} else {
|
||||
int buddy = after_get_buddy(given, height + before_paddings, params->mode);
|
||||
float *src = start + buddy * new_wc_stride;
|
||||
memcpy(dst, src, new_wc_stride * sizeof(float));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handle the third dimension
|
||||
before_paddings = params->paddings[2][0];
|
||||
after_paddings = params->paddings[2][1];
|
||||
for (int n = 0; n < new_number; n++) {
|
||||
for (int h = 0; h < new_height; h++) {
|
||||
float *start = output + n * new_hwc_stride + h * new_wc_stride;
|
||||
for (int w = 0; w < before_paddings; w++) {
|
||||
float *dst = start + w * new_c_stride;
|
||||
if (params->mode == LPMP_CONSTANT) {
|
||||
for (int i = 0; i < new_c_stride; i++) {
|
||||
dst[i] = params->constant_values;
|
||||
}
|
||||
} else {
|
||||
int buddy = before_get_buddy(w, before_paddings, params->mode);
|
||||
float *src = start + buddy * new_c_stride;
|
||||
memcpy(dst, src, new_c_stride * sizeof(float));
|
||||
}
|
||||
}
|
||||
for (int w = 0; w < after_paddings; w++) {
|
||||
int given = width + before_paddings + w;
|
||||
float *dst = start + given * new_c_stride;
|
||||
if (params->mode == LPMP_CONSTANT) {
|
||||
for (int i = 0; i < new_c_stride; i++) {
|
||||
dst[i] = params->constant_values;
|
||||
}
|
||||
} else {
|
||||
int buddy = after_get_buddy(given, width + before_paddings, params->mode);
|
||||
float *src = start + buddy * new_c_stride;
|
||||
memcpy(dst, src, new_c_stride * sizeof(float));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handle the fourth dimension
|
||||
before_paddings = params->paddings[3][0];
|
||||
after_paddings = params->paddings[3][1];
|
||||
for (int n = 0; n < new_number; n++) {
|
||||
for (int h = 0; h < new_height; h++) {
|
||||
for (int w = 0; w < new_width; w++) {
|
||||
float *start = output + n * new_hwc_stride + h * new_wc_stride + w * new_c_stride;
|
||||
for (int c = 0; c < before_paddings; c++) {
|
||||
float *dst = start + c;
|
||||
if (params->mode == LPMP_CONSTANT) {
|
||||
*dst = params->constant_values;
|
||||
} else {
|
||||
int buddy = before_get_buddy(c, before_paddings, params->mode);
|
||||
float *src = start + buddy;
|
||||
*dst = *src;
|
||||
}
|
||||
}
|
||||
for (int c = 0; c < after_paddings; c++) {
|
||||
int given = channel + before_paddings + c;
|
||||
float *dst = start + given;
|
||||
if (params->mode == LPMP_CONSTANT) {
|
||||
*dst = params->constant_values;
|
||||
} else {
|
||||
int buddy = after_get_buddy(given, channel + before_paddings, params->mode);
|
||||
float *src = start + buddy;
|
||||
*dst = *src;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@@ -1,43 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2019 Guo Yejun
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* layer pad (equivalent to tf.pad) for native backend.
|
||||
*/
|
||||
#ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_PAD_H
|
||||
#define AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_PAD_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "dnn_backend_native.h"
|
||||
|
||||
typedef enum {LPMP_CONSTANT, LPMP_REFLECT, LPMP_SYMMETRIC} LayerPadModeParam;
|
||||
|
||||
typedef struct LayerPadParams{
|
||||
int32_t paddings[4][2];
|
||||
LayerPadModeParam mode;
|
||||
float constant_values;
|
||||
} LayerPadParams;
|
||||
|
||||
int ff_dnn_load_layer_pad(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num);
|
||||
int ff_dnn_execute_layer_pad(DnnOperand *operands, const int32_t *input_operand_indexes,
|
||||
int32_t output_operand_index, const void *parameters, NativeContext *ctx);
|
||||
|
||||
#endif
|
@@ -1,42 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2019 Guo Yejun
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "dnn_backend_native_layers.h"
|
||||
#include "dnn_backend_native_layer_pad.h"
|
||||
#include "dnn_backend_native_layer_conv2d.h"
|
||||
#include "dnn_backend_native_layer_depth2space.h"
|
||||
#include "dnn_backend_native_layer_maximum.h"
|
||||
#include "dnn_backend_native_layer_mathbinary.h"
|
||||
#include "dnn_backend_native_layer_mathunary.h"
|
||||
#include "dnn_backend_native_layer_avgpool.h"
|
||||
#include "dnn_backend_native_layer_dense.h"
|
||||
|
||||
const LayerFunc ff_layer_funcs[DLT_COUNT] = {
|
||||
{NULL, NULL},
|
||||
{ff_dnn_execute_layer_conv2d, ff_dnn_load_layer_conv2d},
|
||||
{ff_dnn_execute_layer_depth2space, ff_dnn_load_layer_depth2space},
|
||||
{ff_dnn_execute_layer_pad, ff_dnn_load_layer_pad},
|
||||
{ff_dnn_execute_layer_maximum, ff_dnn_load_layer_maximum},
|
||||
{ff_dnn_execute_layer_math_binary, ff_dnn_load_layer_math_binary},
|
||||
{ff_dnn_execute_layer_math_unary, ff_dnn_load_layer_math_unary},
|
||||
{ff_dnn_execute_layer_avg_pool, ff_dnn_load_layer_avg_pool},
|
||||
{ff_dnn_execute_layer_dense, ff_dnn_load_layer_dense},
|
||||
};
|
@@ -1,38 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2019 Guo Yejun
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYERS_H
|
||||
#define AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYERS_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "dnn_backend_native.h"
|
||||
|
||||
typedef int (*LAYER_EXEC_FUNC)(DnnOperand *operands, const int32_t *input_operand_indexes,
|
||||
int32_t output_operand_index, const void *parameters, NativeContext *ctx);
|
||||
typedef int (*LAYER_LOAD_FUNC)(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num);
|
||||
|
||||
typedef struct LayerFunc {
|
||||
LAYER_EXEC_FUNC pf_exec;
|
||||
LAYER_LOAD_FUNC pf_load;
|
||||
}LayerFunc;
|
||||
|
||||
extern const LayerFunc ff_layer_funcs[DLT_COUNT];
|
||||
|
||||
#endif
|
@@ -24,17 +24,13 @@
|
||||
*/
|
||||
|
||||
#include "dnn_backend_tf.h"
|
||||
#include "dnn_backend_native.h"
|
||||
#include "dnn_backend_native_layer_conv2d.h"
|
||||
#include "dnn_backend_native_layer_depth2space.h"
|
||||
#include "libavformat/avio.h"
|
||||
#include "libavutil/avassert.h"
|
||||
#include "libavutil/avstring.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/opt.h"
|
||||
#include "libavcodec/defs.h"
|
||||
#include "../internal.h"
|
||||
#include "dnn_backend_native_layer_pad.h"
|
||||
#include "dnn_backend_native_layer_maximum.h"
|
||||
#include "dnn_io_proc.h"
|
||||
#include "dnn_backend_common.h"
|
||||
#include "safe_queue.h"
|
||||
@@ -481,363 +477,6 @@ static int load_tf_model(TFModel *tf_model, const char *model_filename)
|
||||
|
||||
#define NAME_BUFFER_SIZE 256
|
||||
|
||||
static int add_conv_layer(TFModel *tf_model, TF_Operation *transpose_op, TF_Operation **cur_op,
|
||||
ConvolutionalParams* params, const int layer)
|
||||
{
|
||||
TFContext *ctx = &tf_model->ctx;
|
||||
TF_Operation *op;
|
||||
TF_OperationDescription *op_desc;
|
||||
TF_Output input;
|
||||
int64_t strides[] = {1, 1, 1, 1};
|
||||
TF_Tensor *kernel_tensor = NULL, *biases_tensor = NULL;
|
||||
int64_t dims[4];
|
||||
int dims_len;
|
||||
char name_buffer[NAME_BUFFER_SIZE];
|
||||
int32_t size;
|
||||
|
||||
size = params->input_num * params->output_num * params->kernel_size * params->kernel_size;
|
||||
input.index = 0;
|
||||
|
||||
snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_kernel%d", layer);
|
||||
op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
|
||||
TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
|
||||
dims[0] = params->output_num;
|
||||
dims[1] = params->kernel_size;
|
||||
dims[2] = params->kernel_size;
|
||||
dims[3] = params->input_num;
|
||||
dims_len = 4;
|
||||
kernel_tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, size * sizeof(float));
|
||||
memcpy(TF_TensorData(kernel_tensor), params->kernel, size * sizeof(float));
|
||||
TF_SetAttrTensor(op_desc, "value", kernel_tensor, tf_model->status);
|
||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||
goto err;
|
||||
}
|
||||
op = TF_FinishOperation(op_desc, tf_model->status);
|
||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||
goto err;
|
||||
}
|
||||
|
||||
snprintf(name_buffer, NAME_BUFFER_SIZE, "transpose%d", layer);
|
||||
op_desc = TF_NewOperation(tf_model->graph, "Transpose", name_buffer);
|
||||
input.oper = op;
|
||||
TF_AddInput(op_desc, input);
|
||||
input.oper = transpose_op;
|
||||
TF_AddInput(op_desc, input);
|
||||
TF_SetAttrType(op_desc, "T", TF_FLOAT);
|
||||
TF_SetAttrType(op_desc, "Tperm", TF_INT32);
|
||||
op = TF_FinishOperation(op_desc, tf_model->status);
|
||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||
goto err;
|
||||
}
|
||||
|
||||
snprintf(name_buffer, NAME_BUFFER_SIZE, "conv2d%d", layer);
|
||||
op_desc = TF_NewOperation(tf_model->graph, "Conv2D", name_buffer);
|
||||
input.oper = *cur_op;
|
||||
TF_AddInput(op_desc, input);
|
||||
input.oper = op;
|
||||
TF_AddInput(op_desc, input);
|
||||
TF_SetAttrType(op_desc, "T", TF_FLOAT);
|
||||
TF_SetAttrIntList(op_desc, "strides", strides, 4);
|
||||
TF_SetAttrString(op_desc, "padding", "VALID", 5);
|
||||
*cur_op = TF_FinishOperation(op_desc, tf_model->status);
|
||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||
goto err;
|
||||
}
|
||||
|
||||
snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_biases%d", layer);
|
||||
op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
|
||||
TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
|
||||
dims[0] = params->output_num;
|
||||
dims_len = 1;
|
||||
biases_tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, params->output_num * sizeof(float));
|
||||
memcpy(TF_TensorData(biases_tensor), params->biases, params->output_num * sizeof(float));
|
||||
TF_SetAttrTensor(op_desc, "value", biases_tensor, tf_model->status);
|
||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||
goto err;
|
||||
}
|
||||
op = TF_FinishOperation(op_desc, tf_model->status);
|
||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||
goto err;
|
||||
}
|
||||
|
||||
snprintf(name_buffer, NAME_BUFFER_SIZE, "bias_add%d", layer);
|
||||
op_desc = TF_NewOperation(tf_model->graph, "BiasAdd", name_buffer);
|
||||
input.oper = *cur_op;
|
||||
TF_AddInput(op_desc, input);
|
||||
input.oper = op;
|
||||
TF_AddInput(op_desc, input);
|
||||
TF_SetAttrType(op_desc, "T", TF_FLOAT);
|
||||
*cur_op = TF_FinishOperation(op_desc, tf_model->status);
|
||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||
goto err;
|
||||
}
|
||||
|
||||
snprintf(name_buffer, NAME_BUFFER_SIZE, "activation%d", layer);
|
||||
switch (params->activation){
|
||||
case RELU:
|
||||
op_desc = TF_NewOperation(tf_model->graph, "Relu", name_buffer);
|
||||
break;
|
||||
case TANH:
|
||||
op_desc = TF_NewOperation(tf_model->graph, "Tanh", name_buffer);
|
||||
break;
|
||||
case SIGMOID:
|
||||
op_desc = TF_NewOperation(tf_model->graph, "Sigmoid", name_buffer);
|
||||
break;
|
||||
default:
|
||||
avpriv_report_missing_feature(ctx, "convolutional activation function %d", params->activation);
|
||||
return AVERROR(ENOSYS);
|
||||
}
|
||||
input.oper = *cur_op;
|
||||
TF_AddInput(op_desc, input);
|
||||
TF_SetAttrType(op_desc, "T", TF_FLOAT);
|
||||
*cur_op = TF_FinishOperation(op_desc, tf_model->status);
|
||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||
goto err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
err:
|
||||
TF_DeleteTensor(kernel_tensor);
|
||||
TF_DeleteTensor(biases_tensor);
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to add conv layer %d\n", layer);
|
||||
return DNN_GENERIC_ERROR;
|
||||
}
|
||||
|
||||
static int add_depth_to_space_layer(TFModel *tf_model, TF_Operation **cur_op,
|
||||
DepthToSpaceParams *params, const int layer)
|
||||
{
|
||||
TFContext *ctx = &tf_model->ctx;
|
||||
TF_OperationDescription *op_desc;
|
||||
TF_Output input;
|
||||
char name_buffer[NAME_BUFFER_SIZE];
|
||||
|
||||
snprintf(name_buffer, NAME_BUFFER_SIZE, "depth_to_space%d", layer);
|
||||
op_desc = TF_NewOperation(tf_model->graph, "DepthToSpace", name_buffer);
|
||||
input.oper = *cur_op;
|
||||
input.index = 0;
|
||||
TF_AddInput(op_desc, input);
|
||||
TF_SetAttrType(op_desc, "T", TF_FLOAT);
|
||||
TF_SetAttrInt(op_desc, "block_size", params->block_size);
|
||||
*cur_op = TF_FinishOperation(op_desc, tf_model->status);
|
||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to add depth_to_space to layer %d\n", layer);
|
||||
return DNN_GENERIC_ERROR;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int add_pad_layer(TFModel *tf_model, TF_Operation **cur_op,
|
||||
LayerPadParams *params, const int layer)
|
||||
{
|
||||
TFContext *ctx = &tf_model->ctx;
|
||||
TF_Operation *op;
|
||||
TF_Tensor *tensor;
|
||||
TF_OperationDescription *op_desc;
|
||||
TF_Output input;
|
||||
int32_t *pads;
|
||||
int64_t pads_shape[] = {4, 2};
|
||||
|
||||
char name_buffer[NAME_BUFFER_SIZE];
|
||||
snprintf(name_buffer, NAME_BUFFER_SIZE, "pad%d", layer);
|
||||
|
||||
op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
|
||||
TF_SetAttrType(op_desc, "dtype", TF_INT32);
|
||||
tensor = TF_AllocateTensor(TF_INT32, pads_shape, 2, 4 * 2 * sizeof(int32_t));
|
||||
pads = (int32_t *)TF_TensorData(tensor);
|
||||
pads[0] = params->paddings[0][0];
|
||||
pads[1] = params->paddings[0][1];
|
||||
pads[2] = params->paddings[1][0];
|
||||
pads[3] = params->paddings[1][1];
|
||||
pads[4] = params->paddings[2][0];
|
||||
pads[5] = params->paddings[2][1];
|
||||
pads[6] = params->paddings[3][0];
|
||||
pads[7] = params->paddings[3][1];
|
||||
TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
|
||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||
TF_DeleteTensor(tensor);
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to set value for pad of layer %d\n", layer);
|
||||
return DNN_GENERIC_ERROR;
|
||||
}
|
||||
op = TF_FinishOperation(op_desc, tf_model->status);
|
||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||
TF_DeleteTensor(tensor);
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to add pad to layer %d\n", layer);
|
||||
return DNN_GENERIC_ERROR;
|
||||
}
|
||||
|
||||
op_desc = TF_NewOperation(tf_model->graph, "MirrorPad", "mirror_pad");
|
||||
input.oper = *cur_op;
|
||||
input.index = 0;
|
||||
TF_AddInput(op_desc, input);
|
||||
input.oper = op;
|
||||
TF_AddInput(op_desc, input);
|
||||
TF_SetAttrType(op_desc, "T", TF_FLOAT);
|
||||
TF_SetAttrType(op_desc, "Tpaddings", TF_INT32);
|
||||
TF_SetAttrString(op_desc, "mode", "SYMMETRIC", 9);
|
||||
*cur_op = TF_FinishOperation(op_desc, tf_model->status);
|
||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||
TF_DeleteTensor(tensor);
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to add mirror_pad to layer %d\n", layer);
|
||||
return DNN_GENERIC_ERROR;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int add_maximum_layer(TFModel *tf_model, TF_Operation **cur_op,
|
||||
DnnLayerMaximumParams *params, const int layer)
|
||||
{
|
||||
TFContext *ctx = &tf_model->ctx;
|
||||
TF_Operation *op;
|
||||
TF_Tensor *tensor;
|
||||
TF_OperationDescription *op_desc;
|
||||
TF_Output input;
|
||||
float *y;
|
||||
|
||||
char name_buffer[NAME_BUFFER_SIZE];
|
||||
snprintf(name_buffer, NAME_BUFFER_SIZE, "maximum/y%d", layer);
|
||||
|
||||
op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
|
||||
TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
|
||||
tensor = TF_AllocateTensor(TF_FLOAT, NULL, 0, TF_DataTypeSize(TF_FLOAT));
|
||||
y = (float *)TF_TensorData(tensor);
|
||||
*y = params->val.y;
|
||||
TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
|
||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||
TF_DeleteTensor(tensor);
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to set value for maximum/y of layer %d", layer);
|
||||
return DNN_GENERIC_ERROR;
|
||||
}
|
||||
op = TF_FinishOperation(op_desc, tf_model->status);
|
||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||
TF_DeleteTensor(tensor);
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to add maximum/y to layer %d\n", layer);
|
||||
return DNN_GENERIC_ERROR;
|
||||
}
|
||||
|
||||
snprintf(name_buffer, NAME_BUFFER_SIZE, "maximum%d", layer);
|
||||
op_desc = TF_NewOperation(tf_model->graph, "Maximum", name_buffer);
|
||||
input.oper = *cur_op;
|
||||
input.index = 0;
|
||||
TF_AddInput(op_desc, input);
|
||||
input.oper = op;
|
||||
TF_AddInput(op_desc, input);
|
||||
TF_SetAttrType(op_desc, "T", TF_FLOAT);
|
||||
*cur_op = TF_FinishOperation(op_desc, tf_model->status);
|
||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||
TF_DeleteTensor(tensor);
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to add maximum to layer %d\n", layer);
|
||||
return DNN_GENERIC_ERROR;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int load_native_model(TFModel *tf_model, const char *model_filename)
|
||||
{
|
||||
TFContext *ctx = &tf_model->ctx;
|
||||
int32_t layer;
|
||||
TF_OperationDescription *op_desc;
|
||||
TF_Operation *op;
|
||||
TF_Operation *transpose_op;
|
||||
TF_Tensor *tensor = NULL;
|
||||
TF_Output input;
|
||||
int32_t *transpose_perm;
|
||||
int64_t transpose_perm_shape[] = {4};
|
||||
int64_t input_shape[] = {1, -1, -1, -1};
|
||||
int layer_add_res;
|
||||
DNNModel *model = NULL;
|
||||
NativeModel *native_model;
|
||||
|
||||
model = ff_dnn_load_model_native(model_filename, DFT_PROCESS_FRAME, NULL, NULL);
|
||||
if (!model){
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to load native model\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
native_model = model->model;
|
||||
tf_model->graph = TF_NewGraph();
|
||||
tf_model->status = TF_NewStatus();
|
||||
|
||||
#define CLEANUP_ON_ERROR(tf_model) \
|
||||
{ \
|
||||
TF_DeleteTensor(tensor); \
|
||||
TF_DeleteGraph(tf_model->graph); \
|
||||
TF_DeleteStatus(tf_model->status); \
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to set value or add operator to layer\n"); \
|
||||
return DNN_GENERIC_ERROR; \
|
||||
}
|
||||
|
||||
op_desc = TF_NewOperation(tf_model->graph, "Placeholder", "x");
|
||||
TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
|
||||
TF_SetAttrShape(op_desc, "shape", input_shape, 4);
|
||||
op = TF_FinishOperation(op_desc, tf_model->status);
|
||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||
CLEANUP_ON_ERROR(tf_model);
|
||||
}
|
||||
|
||||
op_desc = TF_NewOperation(tf_model->graph, "Const", "transpose_perm");
|
||||
TF_SetAttrType(op_desc, "dtype", TF_INT32);
|
||||
tensor = TF_AllocateTensor(TF_INT32, transpose_perm_shape, 1, 4 * sizeof(int32_t));
|
||||
transpose_perm = (int32_t *)TF_TensorData(tensor);
|
||||
transpose_perm[0] = 1;
|
||||
transpose_perm[1] = 2;
|
||||
transpose_perm[2] = 3;
|
||||
transpose_perm[3] = 0;
|
||||
TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
|
||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||
CLEANUP_ON_ERROR(tf_model);
|
||||
}
|
||||
transpose_op = TF_FinishOperation(op_desc, tf_model->status);
|
||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||
CLEANUP_ON_ERROR(tf_model);
|
||||
}
|
||||
|
||||
for (layer = 0; layer < native_model->layers_num; ++layer){
|
||||
switch (native_model->layers[layer].type){
|
||||
case DLT_INPUT:
|
||||
layer_add_res = 0;
|
||||
break;
|
||||
case DLT_CONV2D:
|
||||
layer_add_res = add_conv_layer(tf_model, transpose_op, &op,
|
||||
(ConvolutionalParams *)native_model->layers[layer].params, layer);
|
||||
break;
|
||||
case DLT_DEPTH_TO_SPACE:
|
||||
layer_add_res = add_depth_to_space_layer(tf_model, &op,
|
||||
(DepthToSpaceParams *)native_model->layers[layer].params, layer);
|
||||
break;
|
||||
case DLT_MIRROR_PAD:
|
||||
layer_add_res = add_pad_layer(tf_model, &op,
|
||||
(LayerPadParams *)native_model->layers[layer].params, layer);
|
||||
break;
|
||||
case DLT_MAXIMUM:
|
||||
layer_add_res = add_maximum_layer(tf_model, &op,
|
||||
(DnnLayerMaximumParams *)native_model->layers[layer].params, layer);
|
||||
break;
|
||||
default:
|
||||
CLEANUP_ON_ERROR(tf_model);
|
||||
}
|
||||
|
||||
if (layer_add_res != 0){
|
||||
CLEANUP_ON_ERROR(tf_model);
|
||||
}
|
||||
}
|
||||
|
||||
op_desc = TF_NewOperation(tf_model->graph, "Identity", "y");
|
||||
input.oper = op;
|
||||
input.index = 0;
|
||||
TF_AddInput(op_desc, input);
|
||||
TF_FinishOperation(op_desc, tf_model->status);
|
||||
if (TF_GetCode(tf_model->status) != TF_OK){
|
||||
CLEANUP_ON_ERROR(tf_model);
|
||||
}
|
||||
|
||||
ff_dnn_free_model_native(&model);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DNNModel *ff_dnn_load_model_tf(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx)
|
||||
{
|
||||
DNNModel *model = NULL;
|
||||
@@ -867,9 +506,8 @@ DNNModel *ff_dnn_load_model_tf(const char *model_filename, DNNFunctionType func_
|
||||
}
|
||||
|
||||
if (load_tf_model(tf_model, model_filename) != 0){
|
||||
if (load_native_model(tf_model, model_filename) != 0){
|
||||
goto err;
|
||||
}
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to load TensorFlow model: \"%s\"\n", model_filename);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (ctx->options.nireq <= 0) {
|
||||
|
Reference in New Issue
Block a user