[FFmpeg-devel] [PATCH V2 1/4] dnn: add tf.nn.conv2d support for native model
Pedro Arthur
bygrandao at gmail.com
Wed Oct 30 16:48:42 EET 2019
Em seg, 21 de out de 2019 às 09:44, Guo, Yejun <yejun.guo at intel.com>
escreveu:
> Unlike other tf.*.conv2d layers, tf.nn.conv2d does not create many
> nodes (within a scope) in the graph, it just acts like other layers.
> tf.nn.conv2d only creates one node in the graph, and no internal
> nodes such as 'kernel' are created.
>
> The format of native model file is also changed, a flag named
> has_bias is added, so change the version number.
>
> Signed-off-by: Guo, Yejun <yejun.guo at intel.com>
> ---
> libavfilter/dnn/dnn_backend_native.c | 2 +-
> libavfilter/dnn/dnn_backend_native_layer_conv2d.c | 37 +++++++++++-----
> libavfilter/dnn/dnn_backend_native_layer_conv2d.h | 1 +
> tests/dnn/dnn-layer-conv2d-test.c | 2 +
> tools/python/convert_from_tensorflow.py | 54
> ++++++++++++++++++++---
> tools/python/convert_header.py | 4 +-
> 6 files changed, 82 insertions(+), 18 deletions(-)
>
> diff --git a/libavfilter/dnn/dnn_backend_native.c
> b/libavfilter/dnn/dnn_backend_native.c
> index 06b010d..ff280b5 100644
> --- a/libavfilter/dnn/dnn_backend_native.c
> +++ b/libavfilter/dnn/dnn_backend_native.c
> @@ -98,7 +98,7 @@ DNNModel *ff_dnn_load_model_native(const char
> *model_filename)
> char header_expected[] = "FFMPEGDNNNATIVE";
> char *buf;
> size_t size;
> - int version, header_size, major_version_expected = 0;
> + int version, header_size, major_version_expected = 1;
> ConvolutionalNetwork *network = NULL;
> AVIOContext *model_file_context;
> int file_size, dnn_size, parsed_size;
> diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
> b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
> index 0de8902..6ec0fa7 100644
> --- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
> +++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
> @@ -38,27 +38,41 @@ int dnn_load_layer_conv2d(Layer *layer, AVIOContext
> *model_file_context, int fil
> conv_params->input_num = (int32_t)avio_rl32(model_file_context);
> conv_params->output_num = (int32_t)avio_rl32(model_file_context);
> conv_params->kernel_size = (int32_t)avio_rl32(model_file_context);
> + conv_params->has_bias = (int32_t)avio_rl32(model_file_context);
> + dnn_size += 28;
> +
> kernel_size = conv_params->input_num * conv_params->output_num *
> - conv_params->kernel_size * conv_params->kernel_size;
> - dnn_size += 24 + (kernel_size + conv_params->output_num << 2);
> + conv_params->kernel_size * conv_params->kernel_size;
> + dnn_size += kernel_size * 4;
> + if (conv_params->has_bias)
> + dnn_size += conv_params->output_num * 4;
> +
> if (dnn_size > file_size || conv_params->input_num <= 0 ||
> conv_params->output_num <= 0 || conv_params->kernel_size <= 0){
> av_freep(&conv_params);
> return 0;
> }
> +
> conv_params->kernel = av_malloc(kernel_size * sizeof(float));
> - conv_params->biases = av_malloc(conv_params->output_num *
> sizeof(float));
> - if (!conv_params->kernel || !conv_params->biases){
> - av_freep(&conv_params->kernel);
> - av_freep(&conv_params->biases);
> + if (!conv_params->kernel) {
> av_freep(&conv_params);
> return 0;
> }
> - for (int i = 0; i < kernel_size; ++i){
> + for (int i = 0; i < kernel_size; ++i) {
> conv_params->kernel[i] =
> av_int2float(avio_rl32(model_file_context));
> }
> - for (int i = 0; i < conv_params->output_num; ++i){
> - conv_params->biases[i] =
> av_int2float(avio_rl32(model_file_context));
> +
> + conv_params->biases = NULL;
> + if (conv_params->has_bias) {
> + conv_params->biases = av_malloc(conv_params->output_num *
> sizeof(float));
> + if (!conv_params->biases){
> + av_freep(&conv_params->kernel);
> + av_freep(&conv_params);
> + return 0;
> + }
> + for (int i = 0; i < conv_params->output_num; ++i){
> + conv_params->biases[i] =
> av_int2float(avio_rl32(model_file_context));
> + }
> }
>
> layer->params = conv_params;
> @@ -103,7 +117,10 @@ int dnn_execute_layer_conv2d(DnnOperand *operands,
> const int32_t *input_operand_
> for (int y = pad_size; y < height - pad_size; ++y) {
> for (int x = pad_size; x < width - pad_size; ++x) {
> for (int n_filter = 0; n_filter < conv_params->output_num;
> ++n_filter) {
> - output[n_filter] = conv_params->biases[n_filter];
> + if (conv_params->has_bias)
> + output[n_filter] = conv_params->biases[n_filter];
> + else
> + output[n_filter] = 0.f;
>
> for (int ch = 0; ch < conv_params->input_num; ++ch) {
> for (int kernel_y = 0; kernel_y <
> conv_params->kernel_size; ++kernel_y) {
> diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
> b/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
> index db90b2b..bf87264 100644
> --- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
> +++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
> @@ -31,6 +31,7 @@ typedef struct ConvolutionalParams{
> DNNActivationFunc activation;
> DNNConvPaddingParam padding_method;
> int32_t dilation;
> + int32_t has_bias;
> float *kernel;
> float *biases;
> } ConvolutionalParams;
> diff --git a/tests/dnn/dnn-layer-conv2d-test.c
> b/tests/dnn/dnn-layer-conv2d-test.c
> index 9d13da3..2da01e5 100644
> --- a/tests/dnn/dnn-layer-conv2d-test.c
> +++ b/tests/dnn/dnn-layer-conv2d-test.c
> @@ -97,6 +97,7 @@ static int test_with_same_dilate(void)
> float bias[2] = { -1.6574852, -0.72915393 };
>
> params.activation = TANH;
> + params.has_bias = 1;
> params.biases = bias;
> params.dilation = 2;
> params.input_num = 3;
> @@ -196,6 +197,7 @@ static int test_with_valid(void)
> float bias[2] = { -0.4773722, -0.19620377 };
>
> params.activation = TANH;
> + params.has_bias = 1;
> params.biases = bias;
> params.dilation = 1;
> params.input_num = 3;
> diff --git a/tools/python/convert_from_tensorflow.py
> b/tools/python/convert_from_tensorflow.py
> index a663b34..605158a 100644
> --- a/tools/python/convert_from_tensorflow.py
> +++ b/tools/python/convert_from_tensorflow.py
> @@ -118,7 +118,7 @@ class TFConverter:
> return knode, bnode, dnode, anode
>
>
> - def dump_conv2d_to_file(self, node, f):
> + def dump_complex_conv2d_to_file(self, node, f):
> assert(node.op == 'Conv2D')
> self.layer_number = self.layer_number + 1
> self.converted_nodes.add(node.name)
> @@ -153,7 +153,8 @@ class TFConverter:
> kernel = kernel.reshape(filter_height, filter_width, in_channels,
> out_channels)
> kernel = np.transpose(kernel, [3, 0, 1, 2])
>
> - np.array([self.op2code[node.op], dilation, padding,
> self.conv_activations[activation], in_channels, out_channels,
> filter_height], dtype=np.uint32).tofile(f)
> + has_bias = 1
> + np.array([self.op2code[node.op], dilation, padding,
> self.conv_activations[activation], in_channels, out_channels,
> filter_height, has_bias], dtype=np.uint32).tofile(f)
> kernel.tofile(f)
>
> btensor = bnode.attr['value'].tensor
> @@ -173,6 +174,41 @@ class TFConverter:
> np.array([input_operand_index, output_operand_index],
> dtype=np.uint32).tofile(f)
>
>
> + def dump_simple_conv2d_to_file(self, node, f):
> + assert(node.op == 'Conv2D')
> + self.layer_number = self.layer_number + 1
> + self.converted_nodes.add(node.name)
> +
> + node0 = self.name_node_dict[node.input[0]]
> + node1 = self.name_node_dict[node.input[1]]
> + if node0.op == 'Const':
> + knode = node0
> + input_name = node.input[1]
> + else:
> + knode = node1
> + input_name = node.input[0]
> +
> + ktensor = knode.attr['value'].tensor
> + filter_height = ktensor.tensor_shape.dim[0].size
> + filter_width = ktensor.tensor_shape.dim[1].size
> + in_channels = ktensor.tensor_shape.dim[2].size
> + out_channels = ktensor.tensor_shape.dim[3].size
> + kernel = np.frombuffer(ktensor.tensor_content, dtype=np.float32)
> + kernel = kernel.reshape(filter_height, filter_width, in_channels,
> out_channels)
> + kernel = np.transpose(kernel, [3, 0, 1, 2])
> +
> + has_bias = 0
> + dilation = 1
> + padding = node.attr['padding'].s.decode("utf-8")
> + np.array([self.op2code[node.op], dilation,
> self.conv_paddings[padding], self.conv_activations['None'],
> + in_channels, out_channels, filter_height, has_bias],
> dtype=np.uint32).tofile(f)
> + kernel.tofile(f)
> +
> + input_operand_index = self.add_operand(input_name,
> Operand.IOTYPE_INPUT)
> + output_operand_index = self.add_operand(node.name,
> Operand.IOTYPE_OUTPUT)
> + np.array([input_operand_index, output_operand_index],
> dtype=np.uint32).tofile(f)
> +
> +
> def dump_depth2space_to_file(self, node, f):
> assert(node.op == 'DepthToSpace')
> self.layer_number = self.layer_number + 1
> @@ -222,10 +258,12 @@ class TFConverter:
> scope_name = TFConverter.get_scope_name(node.name)
> if scope_name in self.conv2d_scope_names:
> if node.op == 'Conv2D':
> - self.dump_conv2d_to_file(node, f)
> + self.dump_complex_conv2d_to_file(node, f)
> continue
>
> - if node.op == 'DepthToSpace':
> + if node.op == 'Conv2D':
> + self.dump_simple_conv2d_to_file(node, f)
> + elif node.op == 'DepthToSpace':
> self.dump_depth2space_to_file(node, f)
> elif node.op == 'MirrorPad':
> self.dump_mirrorpad_to_file(node, f)
> @@ -312,10 +350,16 @@ class TFConverter:
>
>
> def generate_conv2d_scope_info(self):
> - # conv2d is a sub block in graph, get the scope name
> + # mostly, conv2d is a sub block in graph, get the scope name
> for node in self.nodes:
> if node.op == 'Conv2D':
> scope = TFConverter.get_scope_name(node.name)
> + # for the case tf.nn.conv2d is called directly
> + if scope == '':
> + continue
> + # for the case tf.nn.conv2d is called within a scope
> + if scope + '/kernel' not in self.name_node_dict:
> + continue
> self.conv2d_scope_names.add(scope)
>
> # get the input name to the conv2d sub block
> diff --git a/tools/python/convert_header.py
> b/tools/python/convert_header.py
> index 3c2acd5..67672b2 100644
> --- a/tools/python/convert_header.py
> +++ b/tools/python/convert_header.py
> @@ -20,7 +20,7 @@
> str = 'FFMPEGDNNNATIVE'
>
> # increase major and reset minor when we have to re-convert the model file
> -major = 0
> +major = 1
>
> # increase minor when we don't have to re-convert the model file
> -minor = 2
> +minor = 0
> --
> 2.7.4
>
LGTM
Should push soon.
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
More information about the ffmpeg-devel
mailing list