diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c index 06b010d90e..ff280b5506 100644 --- a/libavfilter/dnn/dnn_backend_native.c +++ b/libavfilter/dnn/dnn_backend_native.c @@ -98,7 +98,7 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename) char header_expected[] = "FFMPEGDNNNATIVE"; char *buf; size_t size; - int version, header_size, major_version_expected = 0; + int version, header_size, major_version_expected = 1; ConvolutionalNetwork *network = NULL; AVIOContext *model_file_context; int file_size, dnn_size, parsed_size; diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c index 0de890217d..6ec0fa7a99 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c +++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c @@ -38,27 +38,41 @@ int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int fil conv_params->input_num = (int32_t)avio_rl32(model_file_context); conv_params->output_num = (int32_t)avio_rl32(model_file_context); conv_params->kernel_size = (int32_t)avio_rl32(model_file_context); + conv_params->has_bias = (int32_t)avio_rl32(model_file_context); + dnn_size += 28; + kernel_size = conv_params->input_num * conv_params->output_num * - conv_params->kernel_size * conv_params->kernel_size; - dnn_size += 24 + (kernel_size + conv_params->output_num << 2); + conv_params->kernel_size * conv_params->kernel_size; + dnn_size += kernel_size * 4; + if (conv_params->has_bias) + dnn_size += conv_params->output_num * 4; + if (dnn_size > file_size || conv_params->input_num <= 0 || conv_params->output_num <= 0 || conv_params->kernel_size <= 0){ av_freep(&conv_params); return 0; } + conv_params->kernel = av_malloc(kernel_size * sizeof(float)); - conv_params->biases = av_malloc(conv_params->output_num * sizeof(float)); - if (!conv_params->kernel || !conv_params->biases){ - av_freep(&conv_params->kernel); - av_freep(&conv_params->biases); + if (!conv_params->kernel) { av_freep(&conv_params); return 0; } - for (int i = 0; i < kernel_size; ++i){ + for (int i = 0; i < kernel_size; ++i) { conv_params->kernel[i] = av_int2float(avio_rl32(model_file_context)); } - for (int i = 0; i < conv_params->output_num; ++i){ - conv_params->biases[i] = av_int2float(avio_rl32(model_file_context)); + + conv_params->biases = NULL; + if (conv_params->has_bias) { + conv_params->biases = av_malloc(conv_params->output_num * sizeof(float)); + if (!conv_params->biases){ + av_freep(&conv_params->kernel); + av_freep(&conv_params); + return 0; + } + for (int i = 0; i < conv_params->output_num; ++i){ + conv_params->biases[i] = av_int2float(avio_rl32(model_file_context)); + } } layer->params = conv_params; @@ -103,7 +117,10 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_ for (int y = pad_size; y < height - pad_size; ++y) { for (int x = pad_size; x < width - pad_size; ++x) { for (int n_filter = 0; n_filter < conv_params->output_num; ++n_filter) { - output[n_filter] = conv_params->biases[n_filter]; + if (conv_params->has_bias) + output[n_filter] = conv_params->biases[n_filter]; + else + output[n_filter] = 0.f; for (int ch = 0; ch < conv_params->input_num; ++ch) { for (int kernel_y = 0; kernel_y < conv_params->kernel_size; ++kernel_y) { diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.h b/libavfilter/dnn/dnn_backend_native_layer_conv2d.h index db90b2b6f6..bf872642dd 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.h +++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.h @@ -31,6 +31,7 @@ typedef struct ConvolutionalParams{ DNNActivationFunc activation; DNNConvPaddingParam padding_method; int32_t dilation; + int32_t has_bias; float *kernel; float *biases; } ConvolutionalParams; diff --git a/tests/dnn/dnn-layer-conv2d-test.c b/tests/dnn/dnn-layer-conv2d-test.c index 9d13da37c8..2da01e5372 100644 --- a/tests/dnn/dnn-layer-conv2d-test.c +++ b/tests/dnn/dnn-layer-conv2d-test.c @@ -97,6 +97,7 @@ static int test_with_same_dilate(void) float bias[2] = { -1.6574852, -0.72915393 }; params.activation = TANH; + params.has_bias = 1; params.biases = bias; params.dilation = 2; params.input_num = 3; @@ -196,6 +197,7 @@ static int test_with_valid(void) float bias[2] = { -0.4773722, -0.19620377 }; params.activation = TANH; + params.has_bias = 1; params.biases = bias; params.dilation = 1; params.input_num = 3; diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py index a663b34004..605158a32e 100644 --- a/tools/python/convert_from_tensorflow.py +++ b/tools/python/convert_from_tensorflow.py @@ -118,7 +118,7 @@ class TFConverter: return knode, bnode, dnode, anode - def dump_conv2d_to_file(self, node, f): + def dump_complex_conv2d_to_file(self, node, f): assert(node.op == 'Conv2D') self.layer_number = self.layer_number + 1 self.converted_nodes.add(node.name) @@ -153,7 +153,8 @@ class TFConverter: kernel = kernel.reshape(filter_height, filter_width, in_channels, out_channels) kernel = np.transpose(kernel, [3, 0, 1, 2]) - np.array([self.op2code[node.op], dilation, padding, self.conv_activations[activation], in_channels, out_channels, filter_height], dtype=np.uint32).tofile(f) + has_bias = 1 + np.array([self.op2code[node.op], dilation, padding, self.conv_activations[activation], in_channels, out_channels, filter_height, has_bias], dtype=np.uint32).tofile(f) kernel.tofile(f) btensor = bnode.attr['value'].tensor @@ -173,6 +174,41 @@ class TFConverter: np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f) + def dump_simple_conv2d_to_file(self, node, f): + assert(node.op == 'Conv2D') + self.layer_number = self.layer_number + 1 + self.converted_nodes.add(node.name) + + node0 = self.name_node_dict[node.input[0]] + node1 = self.name_node_dict[node.input[1]] + if node0.op == 'Const': + knode = node0 + input_name = node.input[1] + else: + knode = node1 + input_name = node.input[0] + + ktensor = knode.attr['value'].tensor + filter_height = ktensor.tensor_shape.dim[0].size + filter_width = ktensor.tensor_shape.dim[1].size + in_channels = ktensor.tensor_shape.dim[2].size + out_channels = ktensor.tensor_shape.dim[3].size + kernel = np.frombuffer(ktensor.tensor_content, dtype=np.float32) + kernel = kernel.reshape(filter_height, filter_width, in_channels, out_channels) + kernel = np.transpose(kernel, [3, 0, 1, 2]) + + has_bias = 0 + dilation = 1 + padding = node.attr['padding'].s.decode("utf-8") + np.array([self.op2code[node.op], dilation, self.conv_paddings[padding], self.conv_activations['None'], + in_channels, out_channels, filter_height, has_bias], dtype=np.uint32).tofile(f) + kernel.tofile(f) + + input_operand_index = self.add_operand(input_name, Operand.IOTYPE_INPUT) + output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT) + np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f) + + def dump_depth2space_to_file(self, node, f): assert(node.op == 'DepthToSpace') self.layer_number = self.layer_number + 1 @@ -222,10 +258,12 @@ class TFConverter: scope_name = TFConverter.get_scope_name(node.name) if scope_name in self.conv2d_scope_names: if node.op == 'Conv2D': - self.dump_conv2d_to_file(node, f) + self.dump_complex_conv2d_to_file(node, f) continue - if node.op == 'DepthToSpace': + if node.op == 'Conv2D': + self.dump_simple_conv2d_to_file(node, f) + elif node.op == 'DepthToSpace': self.dump_depth2space_to_file(node, f) elif node.op == 'MirrorPad': self.dump_mirrorpad_to_file(node, f) @@ -312,10 +350,16 @@ class TFConverter: def generate_conv2d_scope_info(self): - # conv2d is a sub block in graph, get the scope name + # mostly, conv2d is a sub block in graph, get the scope name for node in self.nodes: if node.op == 'Conv2D': scope = TFConverter.get_scope_name(node.name) + # for the case tf.nn.conv2d is called directly + if scope == '': + continue + # for the case tf.nn.conv2d is called within a scope + if scope + '/kernel' not in self.name_node_dict: + continue self.conv2d_scope_names.add(scope) # get the input name to the conv2d sub block diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py index 3c2acd5b15..67672b2785 100644 --- a/tools/python/convert_header.py +++ b/tools/python/convert_header.py @@ -20,7 +20,7 @@ str = 'FFMPEGDNNNATIVE' # increase major and reset minor when we have to re-convert the model file -major = 0 +major = 1 # increase minor when we don't have to re-convert the model file -minor = 2 +minor = 0