1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-12-23 12:43:46 +02:00

dnn: add tf.nn.conv2d support for native model

Unlike other tf.*.conv2d layers, tf.nn.conv2d does not create many
nodes (within a scope) in the graph, it just acts like other layers.
tf.nn.conv2d only creates one node in the graph, and no internal
nodes such as 'kernel' are created.

The format of native model file is also changed, a flag named
has_bias is added, so change the version number.

Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
Signed-off-by: Pedro Arthur <bygrandao@gmail.com>
This commit is contained in:
Guo, Yejun 2019-10-21 20:38:03 +08:00 committed by Pedro Arthur
parent a269fa044b
commit dff39ea9f0
6 changed files with 82 additions and 18 deletions

View File

@ -98,7 +98,7 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
char header_expected[] = "FFMPEGDNNNATIVE";
char *buf;
size_t size;
int version, header_size, major_version_expected = 0;
int version, header_size, major_version_expected = 1;
ConvolutionalNetwork *network = NULL;
AVIOContext *model_file_context;
int file_size, dnn_size, parsed_size;

View File

@ -38,28 +38,42 @@ int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int fil
conv_params->input_num = (int32_t)avio_rl32(model_file_context);
conv_params->output_num = (int32_t)avio_rl32(model_file_context);
conv_params->kernel_size = (int32_t)avio_rl32(model_file_context);
conv_params->has_bias = (int32_t)avio_rl32(model_file_context);
dnn_size += 28;
kernel_size = conv_params->input_num * conv_params->output_num *
conv_params->kernel_size * conv_params->kernel_size;
dnn_size += 24 + (kernel_size + conv_params->output_num << 2);
dnn_size += kernel_size * 4;
if (conv_params->has_bias)
dnn_size += conv_params->output_num * 4;
if (dnn_size > file_size || conv_params->input_num <= 0 ||
conv_params->output_num <= 0 || conv_params->kernel_size <= 0){
av_freep(&conv_params);
return 0;
}
conv_params->kernel = av_malloc(kernel_size * sizeof(float));
conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
if (!conv_params->kernel || !conv_params->biases){
av_freep(&conv_params->kernel);
av_freep(&conv_params->biases);
if (!conv_params->kernel) {
av_freep(&conv_params);
return 0;
}
for (int i = 0; i < kernel_size; ++i) {
conv_params->kernel[i] = av_int2float(avio_rl32(model_file_context));
}
conv_params->biases = NULL;
if (conv_params->has_bias) {
conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
if (!conv_params->biases){
av_freep(&conv_params->kernel);
av_freep(&conv_params);
return 0;
}
for (int i = 0; i < conv_params->output_num; ++i){
conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
}
}
layer->params = conv_params;
@ -103,7 +117,10 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_
for (int y = pad_size; y < height - pad_size; ++y) {
for (int x = pad_size; x < width - pad_size; ++x) {
for (int n_filter = 0; n_filter < conv_params->output_num; ++n_filter) {
if (conv_params->has_bias)
output[n_filter] = conv_params->biases[n_filter];
else
output[n_filter] = 0.f;
for (int ch = 0; ch < conv_params->input_num; ++ch) {
for (int kernel_y = 0; kernel_y < conv_params->kernel_size; ++kernel_y) {

View File

@ -31,6 +31,7 @@ typedef struct ConvolutionalParams{
DNNActivationFunc activation;
DNNConvPaddingParam padding_method;
int32_t dilation;
int32_t has_bias;
float *kernel;
float *biases;
} ConvolutionalParams;

View File

@ -97,6 +97,7 @@ static int test_with_same_dilate(void)
float bias[2] = { -1.6574852, -0.72915393 };
params.activation = TANH;
params.has_bias = 1;
params.biases = bias;
params.dilation = 2;
params.input_num = 3;
@ -196,6 +197,7 @@ static int test_with_valid(void)
float bias[2] = { -0.4773722, -0.19620377 };
params.activation = TANH;
params.has_bias = 1;
params.biases = bias;
params.dilation = 1;
params.input_num = 3;

View File

@ -118,7 +118,7 @@ class TFConverter:
return knode, bnode, dnode, anode
def dump_conv2d_to_file(self, node, f):
def dump_complex_conv2d_to_file(self, node, f):
assert(node.op == 'Conv2D')
self.layer_number = self.layer_number + 1
self.converted_nodes.add(node.name)
@ -153,7 +153,8 @@ class TFConverter:
kernel = kernel.reshape(filter_height, filter_width, in_channels, out_channels)
kernel = np.transpose(kernel, [3, 0, 1, 2])
np.array([self.op2code[node.op], dilation, padding, self.conv_activations[activation], in_channels, out_channels, filter_height], dtype=np.uint32).tofile(f)
has_bias = 1
np.array([self.op2code[node.op], dilation, padding, self.conv_activations[activation], in_channels, out_channels, filter_height, has_bias], dtype=np.uint32).tofile(f)
kernel.tofile(f)
btensor = bnode.attr['value'].tensor
@ -173,6 +174,41 @@ class TFConverter:
np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
def dump_simple_conv2d_to_file(self, node, f):
assert(node.op == 'Conv2D')
self.layer_number = self.layer_number + 1
self.converted_nodes.add(node.name)
node0 = self.name_node_dict[node.input[0]]
node1 = self.name_node_dict[node.input[1]]
if node0.op == 'Const':
knode = node0
input_name = node.input[1]
else:
knode = node1
input_name = node.input[0]
ktensor = knode.attr['value'].tensor
filter_height = ktensor.tensor_shape.dim[0].size
filter_width = ktensor.tensor_shape.dim[1].size
in_channels = ktensor.tensor_shape.dim[2].size
out_channels = ktensor.tensor_shape.dim[3].size
kernel = np.frombuffer(ktensor.tensor_content, dtype=np.float32)
kernel = kernel.reshape(filter_height, filter_width, in_channels, out_channels)
kernel = np.transpose(kernel, [3, 0, 1, 2])
has_bias = 0
dilation = 1
padding = node.attr['padding'].s.decode("utf-8")
np.array([self.op2code[node.op], dilation, self.conv_paddings[padding], self.conv_activations['None'],
in_channels, out_channels, filter_height, has_bias], dtype=np.uint32).tofile(f)
kernel.tofile(f)
input_operand_index = self.add_operand(input_name, Operand.IOTYPE_INPUT)
output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
def dump_depth2space_to_file(self, node, f):
assert(node.op == 'DepthToSpace')
self.layer_number = self.layer_number + 1
@ -222,10 +258,12 @@ class TFConverter:
scope_name = TFConverter.get_scope_name(node.name)
if scope_name in self.conv2d_scope_names:
if node.op == 'Conv2D':
self.dump_conv2d_to_file(node, f)
self.dump_complex_conv2d_to_file(node, f)
continue
if node.op == 'DepthToSpace':
if node.op == 'Conv2D':
self.dump_simple_conv2d_to_file(node, f)
elif node.op == 'DepthToSpace':
self.dump_depth2space_to_file(node, f)
elif node.op == 'MirrorPad':
self.dump_mirrorpad_to_file(node, f)
@ -312,10 +350,16 @@ class TFConverter:
def generate_conv2d_scope_info(self):
# conv2d is a sub block in graph, get the scope name
# mostly, conv2d is a sub block in graph, get the scope name
for node in self.nodes:
if node.op == 'Conv2D':
scope = TFConverter.get_scope_name(node.name)
# for the case tf.nn.conv2d is called directly
if scope == '':
continue
# for the case tf.nn.conv2d is called within a scope
if scope + '/kernel' not in self.name_node_dict:
continue
self.conv2d_scope_names.add(scope)
# get the input name to the conv2d sub block

View File

@ -20,7 +20,7 @@
str = 'FFMPEGDNNNATIVE'
# increase major and reset minor when we have to re-convert the model file
major = 0
major = 1
# increase minor when we don't have to re-convert the model file
minor = 2
minor = 0