diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c index c8fb956dec..06b010d90e 100644 --- a/libavfilter/dnn/dnn_backend_native.c +++ b/libavfilter/dnn/dnn_backend_native.c @@ -25,10 +25,7 @@ #include "dnn_backend_native.h" #include "libavutil/avassert.h" -#include "dnn_backend_native_layer_pad.h" #include "dnn_backend_native_layer_conv2d.h" -#include "dnn_backend_native_layer_depth2space.h" -#include "dnn_backend_native_layer_maximum.h" #include "dnn_backend_native_layers.h" static DNNReturnType set_input_output_native(void *model, DNNInputData *input, const char *input_name, const char **output_names, uint32_t nb_output) @@ -104,13 +101,9 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename) int version, header_size, major_version_expected = 0; ConvolutionalNetwork *network = NULL; AVIOContext *model_file_context; - int file_size, dnn_size, kernel_size, i; + int file_size, dnn_size, parsed_size; int32_t layer; DNNLayerType layer_type; - ConvolutionalParams *conv_params; - DepthToSpaceParams *depth_to_space_params; - LayerPadParams *pad_params; - DnnLayerMaximumParams *maximum_params; model = av_malloc(sizeof(DNNModel)); if (!model){ @@ -189,104 +182,21 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename) for (layer = 0; layer < network->layers_num; ++layer){ layer_type = (int32_t)avio_rl32(model_file_context); dnn_size += 4; - network->layers[layer].type = layer_type; - switch (layer_type){ - case DLT_CONV2D: - conv_params = av_malloc(sizeof(ConvolutionalParams)); - if (!conv_params){ - avio_closep(&model_file_context); - ff_dnn_free_model_native(&model); - return NULL; - } - conv_params->dilation = (int32_t)avio_rl32(model_file_context); - conv_params->padding_method = (int32_t)avio_rl32(model_file_context); - conv_params->activation = (int32_t)avio_rl32(model_file_context); - conv_params->input_num = (int32_t)avio_rl32(model_file_context); - conv_params->output_num = (int32_t)avio_rl32(model_file_context); - conv_params->kernel_size = (int32_t)avio_rl32(model_file_context); - kernel_size = conv_params->input_num * conv_params->output_num * - conv_params->kernel_size * conv_params->kernel_size; - dnn_size += 24 + (kernel_size + conv_params->output_num << 2); - if (dnn_size > file_size || conv_params->input_num <= 0 || - conv_params->output_num <= 0 || conv_params->kernel_size <= 0){ - avio_closep(&model_file_context); - av_freep(&conv_params); - ff_dnn_free_model_native(&model); - return NULL; - } - conv_params->kernel = av_malloc(kernel_size * sizeof(float)); - conv_params->biases = av_malloc(conv_params->output_num * sizeof(float)); - if (!conv_params->kernel || !conv_params->biases){ - avio_closep(&model_file_context); - av_freep(&conv_params->kernel); - av_freep(&conv_params->biases); - av_freep(&conv_params); - ff_dnn_free_model_native(&model); - return NULL; - } - for (i = 0; i < kernel_size; ++i){ - conv_params->kernel[i] = av_int2float(avio_rl32(model_file_context)); - } - for (i = 0; i < conv_params->output_num; ++i){ - conv_params->biases[i] = av_int2float(avio_rl32(model_file_context)); - } - network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context); - network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context); - dnn_size += 8; - network->layers[layer].params = conv_params; - break; - case DLT_DEPTH_TO_SPACE: - depth_to_space_params = av_malloc(sizeof(DepthToSpaceParams)); - if (!depth_to_space_params){ - avio_closep(&model_file_context); - ff_dnn_free_model_native(&model); - return NULL; - } - depth_to_space_params->block_size = (int32_t)avio_rl32(model_file_context); - dnn_size += 4; - network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context); - network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context); - dnn_size += 8; - network->layers[layer].params = depth_to_space_params; - break; - case DLT_MIRROR_PAD: - pad_params = av_malloc(sizeof(LayerPadParams)); - if (!pad_params){ - avio_closep(&model_file_context); - ff_dnn_free_model_native(&model); - return NULL; - } - pad_params->mode = (int32_t)avio_rl32(model_file_context); - dnn_size += 4; - for (i = 0; i < 4; ++i) { - pad_params->paddings[i][0] = avio_rl32(model_file_context); - pad_params->paddings[i][1] = avio_rl32(model_file_context); - dnn_size += 8; - } - network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context); - network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context); - dnn_size += 8; - network->layers[layer].params = pad_params; - break; - case DLT_MAXIMUM: - maximum_params = av_malloc(sizeof(*maximum_params)); - if (!maximum_params){ - avio_closep(&model_file_context); - ff_dnn_free_model_native(&model); - return NULL; - } - maximum_params->val.u32 = avio_rl32(model_file_context); - dnn_size += 4; - network->layers[layer].params = maximum_params; - network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context); - network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context); - dnn_size += 8; - break; - default: + + if (layer_type >= DLT_COUNT) { avio_closep(&model_file_context); ff_dnn_free_model_native(&model); return NULL; } + + network->layers[layer].type = layer_type; + parsed_size = layer_funcs[layer_type].pf_load(&network->layers[layer], model_file_context, file_size); + if (!parsed_size) { + avio_closep(&model_file_context); + ff_dnn_free_model_native(&model); + return NULL; + } + dnn_size += parsed_size; } for (int32_t i = 0; i < network->operands_num; ++i){ @@ -341,7 +251,7 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output for (layer = 0; layer < network->layers_num; ++layer){ DNNLayerType layer_type = network->layers[layer].type; - layer_funcs[layer_type](network->operands, + layer_funcs[layer_type].pf_exec(network->operands, network->layers[layer].input_operand_indexes, network->layers[layer].output_operand_index, network->layers[layer].params); diff --git a/libavfilter/dnn/dnn_backend_native.h b/libavfilter/dnn/dnn_backend_native.h index 9821390194..53ed22c5e2 100644 --- a/libavfilter/dnn/dnn_backend_native.h +++ b/libavfilter/dnn/dnn_backend_native.h @@ -33,7 +33,7 @@ /** * the enum value of DNNLayerType should not be changed, * the same values are used in convert_from_tensorflow.py - * and, it is used to index the layer execution function pointer. + * and, it is used to index the layer execution/load function pointer. */ typedef enum { DLT_INPUT = 0, diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c index 594187f5b1..0de890217d 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c +++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c @@ -23,6 +23,52 @@ #define CLAMP_TO_EDGE(x, w) ((x) < 0 ? 0 : ((x) >= (w) ? (w - 1) : (x))) +int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int file_size) +{ + ConvolutionalParams *conv_params; + int kernel_size; + int dnn_size = 0; + conv_params = av_malloc(sizeof(*conv_params)); + if (!conv_params) + return 0; + + conv_params->dilation = (int32_t)avio_rl32(model_file_context); + conv_params->padding_method = (int32_t)avio_rl32(model_file_context); + conv_params->activation = (int32_t)avio_rl32(model_file_context); + conv_params->input_num = (int32_t)avio_rl32(model_file_context); + conv_params->output_num = (int32_t)avio_rl32(model_file_context); + conv_params->kernel_size = (int32_t)avio_rl32(model_file_context); + kernel_size = conv_params->input_num * conv_params->output_num * + conv_params->kernel_size * conv_params->kernel_size; + dnn_size += 24 + (kernel_size + conv_params->output_num << 2); + if (dnn_size > file_size || conv_params->input_num <= 0 || + conv_params->output_num <= 0 || conv_params->kernel_size <= 0){ + av_freep(&conv_params); + return 0; + } + conv_params->kernel = av_malloc(kernel_size * sizeof(float)); + conv_params->biases = av_malloc(conv_params->output_num * sizeof(float)); + if (!conv_params->kernel || !conv_params->biases){ + av_freep(&conv_params->kernel); + av_freep(&conv_params->biases); + av_freep(&conv_params); + return 0; + } + for (int i = 0; i < kernel_size; ++i){ + conv_params->kernel[i] = av_int2float(avio_rl32(model_file_context)); + } + for (int i = 0; i < conv_params->output_num; ++i){ + conv_params->biases[i] = av_int2float(avio_rl32(model_file_context)); + } + + layer->params = conv_params; + + layer->input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context); + layer->output_operand_index = (int32_t)avio_rl32(model_file_context); + dnn_size += 8; + return dnn_size; +} + int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, const void *parameters) { diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.h b/libavfilter/dnn/dnn_backend_native_layer_conv2d.h index 1dd84cb8f6..db90b2b6f6 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.h +++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.h @@ -35,6 +35,7 @@ typedef struct ConvolutionalParams{ float *biases; } ConvolutionalParams; +int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int file_size); int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, const void *parameters); #endif diff --git a/libavfilter/dnn/dnn_backend_native_layer_depth2space.c b/libavfilter/dnn/dnn_backend_native_layer_depth2space.c index 37200607b2..174676e14a 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_depth2space.c +++ b/libavfilter/dnn/dnn_backend_native_layer_depth2space.c @@ -27,6 +27,24 @@ #include "libavutil/avassert.h" #include "dnn_backend_native_layer_depth2space.h" +int dnn_load_layer_depth2space(Layer *layer, AVIOContext *model_file_context, int file_size) +{ + DepthToSpaceParams *params; + int dnn_size = 0; + params = av_malloc(sizeof(*params)); + if (!params) + return 0; + + params->block_size = (int32_t)avio_rl32(model_file_context); + dnn_size += 4; + layer->input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context); + layer->output_operand_index = (int32_t)avio_rl32(model_file_context); + dnn_size += 8; + layer->params = params; + + return dnn_size; +} + int dnn_execute_layer_depth2space(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, const void *parameters) { diff --git a/libavfilter/dnn/dnn_backend_native_layer_depth2space.h b/libavfilter/dnn/dnn_backend_native_layer_depth2space.h index c481bf1e5c..e5465f1cb4 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_depth2space.h +++ b/libavfilter/dnn/dnn_backend_native_layer_depth2space.h @@ -34,6 +34,7 @@ typedef struct DepthToSpaceParams{ int block_size; } DepthToSpaceParams; +int dnn_load_layer_depth2space(Layer *layer, AVIOContext *model_file_context, int file_size); int dnn_execute_layer_depth2space(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, const void *parameters); diff --git a/libavfilter/dnn/dnn_backend_native_layer_maximum.c b/libavfilter/dnn/dnn_backend_native_layer_maximum.c index 6add170319..19f0e8da01 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_maximum.c +++ b/libavfilter/dnn/dnn_backend_native_layer_maximum.c @@ -27,6 +27,24 @@ #include "libavutil/avassert.h" #include "dnn_backend_native_layer_maximum.h" +int dnn_load_layer_maximum(Layer *layer, AVIOContext *model_file_context, int file_size) +{ + DnnLayerMaximumParams *params; + int dnn_size = 0; + params = av_malloc(sizeof(*params)); + if (!params) + return 0; + + params->val.u32 = avio_rl32(model_file_context); + dnn_size += 4; + layer->params = params; + layer->input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context); + layer->output_operand_index = (int32_t)avio_rl32(model_file_context); + dnn_size += 8; + + return dnn_size; +} + int dnn_execute_layer_maximum(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, const void *parameters) { diff --git a/libavfilter/dnn/dnn_backend_native_layer_maximum.h b/libavfilter/dnn/dnn_backend_native_layer_maximum.h index 87f3bf5a80..601158b1be 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_maximum.h +++ b/libavfilter/dnn/dnn_backend_native_layer_maximum.h @@ -37,6 +37,7 @@ typedef struct DnnLayerMaximumParams{ }val; } DnnLayerMaximumParams; +int dnn_load_layer_maximum(Layer *layer, AVIOContext *model_file_context, int file_size); int dnn_execute_layer_maximum(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, const void *parameters); diff --git a/libavfilter/dnn/dnn_backend_native_layer_pad.c b/libavfilter/dnn/dnn_backend_native_layer_pad.c index f5c572728f..8fa35de196 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_pad.c +++ b/libavfilter/dnn/dnn_backend_native_layer_pad.c @@ -22,6 +22,29 @@ #include "libavutil/avassert.h" #include "dnn_backend_native_layer_pad.h" +int dnn_load_layer_pad(Layer *layer, AVIOContext *model_file_context, int file_size) +{ + LayerPadParams *params; + int dnn_size = 0; + params = av_malloc(sizeof(*params)); + if (!params) + return 0; + + params->mode = (int32_t)avio_rl32(model_file_context); + dnn_size += 4; + for (int i = 0; i < 4; ++i) { + params->paddings[i][0] = avio_rl32(model_file_context); + params->paddings[i][1] = avio_rl32(model_file_context); + dnn_size += 8; + } + layer->input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context); + layer->output_operand_index = (int32_t)avio_rl32(model_file_context); + dnn_size += 8; + layer->params = params; + + return dnn_size; +} + static int before_get_buddy(int given, int paddings, LayerPadModeParam mode) { if (mode == LPMP_SYMMETRIC) { diff --git a/libavfilter/dnn/dnn_backend_native_layer_pad.h b/libavfilter/dnn/dnn_backend_native_layer_pad.h index 036ff7b86f..936a9bd010 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_pad.h +++ b/libavfilter/dnn/dnn_backend_native_layer_pad.h @@ -36,6 +36,7 @@ typedef struct LayerPadParams{ float constant_values; } LayerPadParams; +int dnn_load_layer_pad(Layer *layer, AVIOContext *model_file_context, int file_size); int dnn_execute_layer_pad(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, const void *parameters); diff --git a/libavfilter/dnn/dnn_backend_native_layers.c b/libavfilter/dnn/dnn_backend_native_layers.c index 17b91bb7ab..d659667de1 100644 --- a/libavfilter/dnn/dnn_backend_native_layers.c +++ b/libavfilter/dnn/dnn_backend_native_layers.c @@ -25,10 +25,10 @@ #include "dnn_backend_native_layer_depth2space.h" #include "dnn_backend_native_layer_maximum.h" -LAYER_EXEC_FUNC layer_funcs[DLT_COUNT] = { - NULL, - dnn_execute_layer_conv2d, - dnn_execute_layer_depth2space, - dnn_execute_layer_pad, - dnn_execute_layer_maximum, +LayerFunc layer_funcs[DLT_COUNT] = { + {NULL, NULL}, + {dnn_execute_layer_conv2d, dnn_load_layer_conv2d}, + {dnn_execute_layer_depth2space, dnn_load_layer_depth2space}, + {dnn_execute_layer_pad, dnn_load_layer_pad}, + {dnn_execute_layer_maximum, dnn_load_layer_maximum}, }; diff --git a/libavfilter/dnn/dnn_backend_native_layers.h b/libavfilter/dnn/dnn_backend_native_layers.h index 3276aeea92..2df0ce9953 100644 --- a/libavfilter/dnn/dnn_backend_native_layers.h +++ b/libavfilter/dnn/dnn_backend_native_layers.h @@ -26,7 +26,13 @@ typedef int (*LAYER_EXEC_FUNC)(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, const void *parameters); +typedef int (*LAYER_LOAD_FUNC)(Layer *layer, AVIOContext *model_file_context, int file_size); -extern LAYER_EXEC_FUNC layer_funcs[DLT_COUNT]; +typedef struct LayerFunc { + LAYER_EXEC_FUNC pf_exec; + LAYER_LOAD_FUNC pf_load; +}LayerFunc; + +extern LayerFunc layer_funcs[DLT_COUNT]; #endif