dnn: add tf.nn.conv2d support for native model

Unlike other tf.*.conv2d layers, tf.nn.conv2d does not create many nodes (within a scope) in the graph, it just acts like other layers. tf.nn.conv2d only creates one node in the graph, and no internal nodes such as 'kernel' are created. The format of native model file is also changed, a flag named has_bias is added, so change the version number. Signed-off-by: Guo, Yejun <yejun.guo@intel.com> Signed-off-by: Pedro Arthur <bygrandao@gmail.com>
2025-07-16 22:42:38 +02:00 · 2019-10-21 20:38:03 +08:00
parent a269fa044b
commit dff39ea9f0
6 changed files with 82 additions and 18 deletions
--- a/libavfilter/dnn/dnn_backend_native.c
+++ b/libavfilter/dnn/dnn_backend_native.c
@ -98,7 +98,7 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
    char header_expected[] = "FFMPEGDNNNATIVE";
    char *buf;
    size_t size;
-    int version, header_size, major_version_expected = 0;
+    int version, header_size, major_version_expected = 1;
    ConvolutionalNetwork *network = NULL;
    AVIOContext *model_file_context;
    int file_size, dnn_size, parsed_size;
--- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
@ -38,28 +38,42 @@ int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int fil
    conv_params->input_num = (int32_t)avio_rl32(model_file_context);
    conv_params->output_num = (int32_t)avio_rl32(model_file_context);
    conv_params->kernel_size = (int32_t)avio_rl32(model_file_context);
+    conv_params->has_bias = (int32_t)avio_rl32(model_file_context);
+    dnn_size += 28;
+
    kernel_size = conv_params->input_num * conv_params->output_num *
                      conv_params->kernel_size * conv_params->kernel_size;
-    dnn_size += 24 + (kernel_size + conv_params->output_num << 2);
+    dnn_size += kernel_size * 4;
+    if (conv_params->has_bias)
+        dnn_size += conv_params->output_num * 4;
+
    if (dnn_size > file_size || conv_params->input_num <= 0 ||
        conv_params->output_num <= 0 || conv_params->kernel_size <= 0){
        av_freep(&conv_params);
        return 0;
    }
+
    conv_params->kernel = av_malloc(kernel_size * sizeof(float));
-    conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
-    if (!conv_params->kernel || !conv_params->biases){
-        av_freep(&conv_params->kernel);
-        av_freep(&conv_params->biases);
+    if (!conv_params->kernel) {
        av_freep(&conv_params);
        return 0;
    }
-    for (int i = 0; i < kernel_size; ++i){
+    for (int i = 0; i < kernel_size; ++i) {
        conv_params->kernel[i] = av_int2float(avio_rl32(model_file_context));
    }
+
+    conv_params->biases = NULL;
+    if (conv_params->has_bias) {
+        conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
+        if (!conv_params->biases){
+            av_freep(&conv_params->kernel);
+            av_freep(&conv_params);
+            return 0;
+        }
        for (int i = 0; i < conv_params->output_num; ++i){
            conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
        }
+    }

    layer->params = conv_params;

@ -103,7 +117,10 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_
    for (int y = pad_size; y < height - pad_size; ++y) {
        for (int x = pad_size; x < width - pad_size; ++x) {
            for (int n_filter = 0; n_filter < conv_params->output_num; ++n_filter) {
+                if (conv_params->has_bias)
                    output[n_filter] = conv_params->biases[n_filter];
+                else
+                    output[n_filter] = 0.f;

                for (int ch = 0; ch < conv_params->input_num; ++ch) {
                    for (int kernel_y = 0; kernel_y < conv_params->kernel_size; ++kernel_y) {
--- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
@ -31,6 +31,7 @@ typedef struct ConvolutionalParams{
    DNNActivationFunc activation;
    DNNConvPaddingParam padding_method;
    int32_t dilation;
+    int32_t has_bias;
    float *kernel;
    float *biases;
 } ConvolutionalParams;
--- a/tests/dnn/dnn-layer-conv2d-test.c
+++ b/tests/dnn/dnn-layer-conv2d-test.c
@ -97,6 +97,7 @@ static int test_with_same_dilate(void)
    float bias[2] = { -1.6574852, -0.72915393 };

    params.activation = TANH;
+    params.has_bias = 1;
    params.biases = bias;
    params.dilation = 2;
    params.input_num = 3;
@ -196,6 +197,7 @@ static int test_with_valid(void)
    float bias[2] = { -0.4773722, -0.19620377 };

    params.activation = TANH;
+    params.has_bias = 1;
    params.biases = bias;
    params.dilation = 1;
    params.input_num = 3;
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@ -118,7 +118,7 @@ class TFConverter:
        return knode, bnode, dnode, anode


-    def dump_conv2d_to_file(self, node, f):
+    def dump_complex_conv2d_to_file(self, node, f):
        assert(node.op == 'Conv2D')
        self.layer_number = self.layer_number + 1
        self.converted_nodes.add(node.name)
@ -153,7 +153,8 @@ class TFConverter:
        kernel = kernel.reshape(filter_height, filter_width, in_channels, out_channels)
        kernel = np.transpose(kernel, [3, 0, 1, 2])

-        np.array([self.op2code[node.op], dilation, padding, self.conv_activations[activation], in_channels, out_channels, filter_height], dtype=np.uint32).tofile(f)
+        has_bias = 1
+        np.array([self.op2code[node.op], dilation, padding, self.conv_activations[activation], in_channels, out_channels, filter_height, has_bias], dtype=np.uint32).tofile(f)
        kernel.tofile(f)

        btensor = bnode.attr['value'].tensor
@ -173,6 +174,41 @@ class TFConverter:
        np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)


+    def dump_simple_conv2d_to_file(self, node, f):
+        assert(node.op == 'Conv2D')
+        self.layer_number = self.layer_number + 1
+        self.converted_nodes.add(node.name)
+
+        node0 = self.name_node_dict[node.input[0]]
+        node1 = self.name_node_dict[node.input[1]]
+        if node0.op == 'Const':
+            knode = node0
+            input_name = node.input[1]
+        else:
+            knode = node1
+            input_name = node.input[0]
+
+        ktensor = knode.attr['value'].tensor
+        filter_height = ktensor.tensor_shape.dim[0].size
+        filter_width = ktensor.tensor_shape.dim[1].size
+        in_channels = ktensor.tensor_shape.dim[2].size
+        out_channels = ktensor.tensor_shape.dim[3].size
+        kernel = np.frombuffer(ktensor.tensor_content, dtype=np.float32)
+        kernel = kernel.reshape(filter_height, filter_width, in_channels, out_channels)
+        kernel = np.transpose(kernel, [3, 0, 1, 2])
+
+        has_bias = 0
+        dilation = 1
+        padding = node.attr['padding'].s.decode("utf-8")
+        np.array([self.op2code[node.op], dilation, self.conv_paddings[padding], self.conv_activations['None'],
+                  in_channels, out_channels, filter_height, has_bias], dtype=np.uint32).tofile(f)
+        kernel.tofile(f)
+
+        input_operand_index = self.add_operand(input_name, Operand.IOTYPE_INPUT)
+        output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
+        np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
+
+
    def dump_depth2space_to_file(self, node, f):
        assert(node.op == 'DepthToSpace')
        self.layer_number = self.layer_number + 1
@ -222,10 +258,12 @@ class TFConverter:
            scope_name = TFConverter.get_scope_name(node.name)
            if scope_name in self.conv2d_scope_names:
                if node.op == 'Conv2D':
-                    self.dump_conv2d_to_file(node, f)
+                    self.dump_complex_conv2d_to_file(node, f)
                continue

-            if node.op == 'DepthToSpace':
+            if node.op == 'Conv2D':
+                self.dump_simple_conv2d_to_file(node, f)
+            elif node.op == 'DepthToSpace':
                self.dump_depth2space_to_file(node, f)
            elif node.op == 'MirrorPad':
                self.dump_mirrorpad_to_file(node, f)
@ -312,10 +350,16 @@ class TFConverter:


    def generate_conv2d_scope_info(self):
-        # conv2d is a sub block in graph, get the scope name
+        # mostly, conv2d is a sub block in graph, get the scope name
        for node in self.nodes:
            if node.op == 'Conv2D':
                scope = TFConverter.get_scope_name(node.name)
+                # for the case tf.nn.conv2d is called directly
+                if scope == '':
+                    continue
+                # for the case tf.nn.conv2d is called within a scope
+                if scope + '/kernel' not in self.name_node_dict:
+                    continue
                self.conv2d_scope_names.add(scope)

        # get the input name to the conv2d sub block
--- a/tools/python/convert_header.py
+++ b/tools/python/convert_header.py
@ -20,7 +20,7 @@
 str = 'FFMPEGDNNNATIVE'

 # increase major and reset minor when we have to re-convert the model file
-major = 0
+major = 1

 # increase minor when we don't have to re-convert the model file
-minor = 2
+minor = 0