[FFmpeg-devel] [PATCH V3 3/3] avfilter/dnn: unify the layer load function in native mode
Guo, Yejun
yejun.guo at intel.com
Wed Oct 9 17:08:18 EEST 2019
Signed-off-by: Guo, Yejun <yejun.guo at intel.com>
---
libavfilter/dnn/dnn_backend_native.c | 114 +++------------------
libavfilter/dnn/dnn_backend_native.h | 2 +-
libavfilter/dnn/dnn_backend_native_layer_conv2d.c | 46 +++++++++
libavfilter/dnn/dnn_backend_native_layer_conv2d.h | 1 +
.../dnn/dnn_backend_native_layer_depth2space.c | 18 ++++
.../dnn/dnn_backend_native_layer_depth2space.h | 1 +
libavfilter/dnn/dnn_backend_native_layer_maximum.c | 18 ++++
libavfilter/dnn/dnn_backend_native_layer_maximum.h | 1 +
libavfilter/dnn/dnn_backend_native_layer_pad.c | 23 +++++
libavfilter/dnn/dnn_backend_native_layer_pad.h | 1 +
libavfilter/dnn/dnn_backend_native_layers.c | 12 +--
libavfilter/dnn/dnn_backend_native_layers.h | 8 +-
12 files changed, 135 insertions(+), 110 deletions(-)
diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c
index c8fb956..06b010d 100644
--- a/libavfilter/dnn/dnn_backend_native.c
+++ b/libavfilter/dnn/dnn_backend_native.c
@@ -25,10 +25,7 @@
#include "dnn_backend_native.h"
#include "libavutil/avassert.h"
-#include "dnn_backend_native_layer_pad.h"
#include "dnn_backend_native_layer_conv2d.h"
-#include "dnn_backend_native_layer_depth2space.h"
-#include "dnn_backend_native_layer_maximum.h"
#include "dnn_backend_native_layers.h"
static DNNReturnType set_input_output_native(void *model, DNNInputData *input, const char *input_name, const char **output_names, uint32_t nb_output)
@@ -104,13 +101,9 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
int version, header_size, major_version_expected = 0;
ConvolutionalNetwork *network = NULL;
AVIOContext *model_file_context;
- int file_size, dnn_size, kernel_size, i;
+ int file_size, dnn_size, parsed_size;
int32_t layer;
DNNLayerType layer_type;
- ConvolutionalParams *conv_params;
- DepthToSpaceParams *depth_to_space_params;
- LayerPadParams *pad_params;
- DnnLayerMaximumParams *maximum_params;
model = av_malloc(sizeof(DNNModel));
if (!model){
@@ -189,104 +182,21 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
for (layer = 0; layer < network->layers_num; ++layer){
layer_type = (int32_t)avio_rl32(model_file_context);
dnn_size += 4;
+
+ if (layer_type >= DLT_COUNT) {
+ avio_closep(&model_file_context);
+ ff_dnn_free_model_native(&model);
+ return NULL;
+ }
+
network->layers[layer].type = layer_type;
- switch (layer_type){
- case DLT_CONV2D:
- conv_params = av_malloc(sizeof(ConvolutionalParams));
- if (!conv_params){
- avio_closep(&model_file_context);
- ff_dnn_free_model_native(&model);
- return NULL;
- }
- conv_params->dilation = (int32_t)avio_rl32(model_file_context);
- conv_params->padding_method = (int32_t)avio_rl32(model_file_context);
- conv_params->activation = (int32_t)avio_rl32(model_file_context);
- conv_params->input_num = (int32_t)avio_rl32(model_file_context);
- conv_params->output_num = (int32_t)avio_rl32(model_file_context);
- conv_params->kernel_size = (int32_t)avio_rl32(model_file_context);
- kernel_size = conv_params->input_num * conv_params->output_num *
- conv_params->kernel_size * conv_params->kernel_size;
- dnn_size += 24 + (kernel_size + conv_params->output_num << 2);
- if (dnn_size > file_size || conv_params->input_num <= 0 ||
- conv_params->output_num <= 0 || conv_params->kernel_size <= 0){
- avio_closep(&model_file_context);
- av_freep(&conv_params);
- ff_dnn_free_model_native(&model);
- return NULL;
- }
- conv_params->kernel = av_malloc(kernel_size * sizeof(float));
- conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
- if (!conv_params->kernel || !conv_params->biases){
- avio_closep(&model_file_context);
- av_freep(&conv_params->kernel);
- av_freep(&conv_params->biases);
- av_freep(&conv_params);
- ff_dnn_free_model_native(&model);
- return NULL;
- }
- for (i = 0; i < kernel_size; ++i){
- conv_params->kernel[i] = av_int2float(avio_rl32(model_file_context));
- }
- for (i = 0; i < conv_params->output_num; ++i){
- conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
- }
- network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
- network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context);
- dnn_size += 8;
- network->layers[layer].params = conv_params;
- break;
- case DLT_DEPTH_TO_SPACE:
- depth_to_space_params = av_malloc(sizeof(DepthToSpaceParams));
- if (!depth_to_space_params){
- avio_closep(&model_file_context);
- ff_dnn_free_model_native(&model);
- return NULL;
- }
- depth_to_space_params->block_size = (int32_t)avio_rl32(model_file_context);
- dnn_size += 4;
- network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
- network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context);
- dnn_size += 8;
- network->layers[layer].params = depth_to_space_params;
- break;
- case DLT_MIRROR_PAD:
- pad_params = av_malloc(sizeof(LayerPadParams));
- if (!pad_params){
- avio_closep(&model_file_context);
- ff_dnn_free_model_native(&model);
- return NULL;
- }
- pad_params->mode = (int32_t)avio_rl32(model_file_context);
- dnn_size += 4;
- for (i = 0; i < 4; ++i) {
- pad_params->paddings[i][0] = avio_rl32(model_file_context);
- pad_params->paddings[i][1] = avio_rl32(model_file_context);
- dnn_size += 8;
- }
- network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
- network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context);
- dnn_size += 8;
- network->layers[layer].params = pad_params;
- break;
- case DLT_MAXIMUM:
- maximum_params = av_malloc(sizeof(*maximum_params));
- if (!maximum_params){
- avio_closep(&model_file_context);
- ff_dnn_free_model_native(&model);
- return NULL;
- }
- maximum_params->val.u32 = avio_rl32(model_file_context);
- dnn_size += 4;
- network->layers[layer].params = maximum_params;
- network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
- network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context);
- dnn_size += 8;
- break;
- default:
+ parsed_size = layer_funcs[layer_type].pf_load(&network->layers[layer], model_file_context, file_size);
+ if (!parsed_size) {
avio_closep(&model_file_context);
ff_dnn_free_model_native(&model);
return NULL;
}
+ dnn_size += parsed_size;
}
for (int32_t i = 0; i < network->operands_num; ++i){
@@ -341,7 +251,7 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output
for (layer = 0; layer < network->layers_num; ++layer){
DNNLayerType layer_type = network->layers[layer].type;
- layer_funcs[layer_type](network->operands,
+ layer_funcs[layer_type].pf_exec(network->operands,
network->layers[layer].input_operand_indexes,
network->layers[layer].output_operand_index,
network->layers[layer].params);
diff --git a/libavfilter/dnn/dnn_backend_native.h b/libavfilter/dnn/dnn_backend_native.h
index 9821390..53ed22c 100644
--- a/libavfilter/dnn/dnn_backend_native.h
+++ b/libavfilter/dnn/dnn_backend_native.h
@@ -33,7 +33,7 @@
/**
* the enum value of DNNLayerType should not be changed,
* the same values are used in convert_from_tensorflow.py
- * and, it is used to index the layer execution function pointer.
+ * and, it is used to index the layer execution/load function pointer.
*/
typedef enum {
DLT_INPUT = 0,
diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
index 594187f..0de8902 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
@@ -23,6 +23,52 @@
#define CLAMP_TO_EDGE(x, w) ((x) < 0 ? 0 : ((x) >= (w) ? (w - 1) : (x)))
+int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int file_size)
+{
+ ConvolutionalParams *conv_params;
+ int kernel_size;
+ int dnn_size = 0;
+ conv_params = av_malloc(sizeof(*conv_params));
+ if (!conv_params)
+ return 0;
+
+ conv_params->dilation = (int32_t)avio_rl32(model_file_context);
+ conv_params->padding_method = (int32_t)avio_rl32(model_file_context);
+ conv_params->activation = (int32_t)avio_rl32(model_file_context);
+ conv_params->input_num = (int32_t)avio_rl32(model_file_context);
+ conv_params->output_num = (int32_t)avio_rl32(model_file_context);
+ conv_params->kernel_size = (int32_t)avio_rl32(model_file_context);
+ kernel_size = conv_params->input_num * conv_params->output_num *
+ conv_params->kernel_size * conv_params->kernel_size;
+ dnn_size += 24 + (kernel_size + conv_params->output_num << 2);
+ if (dnn_size > file_size || conv_params->input_num <= 0 ||
+ conv_params->output_num <= 0 || conv_params->kernel_size <= 0){
+ av_freep(&conv_params);
+ return 0;
+ }
+ conv_params->kernel = av_malloc(kernel_size * sizeof(float));
+ conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
+ if (!conv_params->kernel || !conv_params->biases){
+ av_freep(&conv_params->kernel);
+ av_freep(&conv_params->biases);
+ av_freep(&conv_params);
+ return 0;
+ }
+ for (int i = 0; i < kernel_size; ++i){
+ conv_params->kernel[i] = av_int2float(avio_rl32(model_file_context));
+ }
+ for (int i = 0; i < conv_params->output_num; ++i){
+ conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
+ }
+
+ layer->params = conv_params;
+
+ layer->input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
+ layer->output_operand_index = (int32_t)avio_rl32(model_file_context);
+ dnn_size += 8;
+ return dnn_size;
+}
+
int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_indexes,
int32_t output_operand_index, const void *parameters)
{
diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.h b/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
index 1dd84cb..db90b2b 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
@@ -35,6 +35,7 @@ typedef struct ConvolutionalParams{
float *biases;
} ConvolutionalParams;
+int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int file_size);
int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_indexes,
int32_t output_operand_index, const void *parameters);
#endif
diff --git a/libavfilter/dnn/dnn_backend_native_layer_depth2space.c b/libavfilter/dnn/dnn_backend_native_layer_depth2space.c
index 3720060..174676e 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_depth2space.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_depth2space.c
@@ -27,6 +27,24 @@
#include "libavutil/avassert.h"
#include "dnn_backend_native_layer_depth2space.h"
+int dnn_load_layer_depth2space(Layer *layer, AVIOContext *model_file_context, int file_size)
+{
+ DepthToSpaceParams *params;
+ int dnn_size = 0;
+ params = av_malloc(sizeof(*params));
+ if (!params)
+ return 0;
+
+ params->block_size = (int32_t)avio_rl32(model_file_context);
+ dnn_size += 4;
+ layer->input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
+ layer->output_operand_index = (int32_t)avio_rl32(model_file_context);
+ dnn_size += 8;
+ layer->params = params;
+
+ return dnn_size;
+}
+
int dnn_execute_layer_depth2space(DnnOperand *operands, const int32_t *input_operand_indexes,
int32_t output_operand_index, const void *parameters)
{
diff --git a/libavfilter/dnn/dnn_backend_native_layer_depth2space.h b/libavfilter/dnn/dnn_backend_native_layer_depth2space.h
index c481bf1..e5465f1 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_depth2space.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_depth2space.h
@@ -34,6 +34,7 @@ typedef struct DepthToSpaceParams{
int block_size;
} DepthToSpaceParams;
+int dnn_load_layer_depth2space(Layer *layer, AVIOContext *model_file_context, int file_size);
int dnn_execute_layer_depth2space(DnnOperand *operands, const int32_t *input_operand_indexes,
int32_t output_operand_index, const void *parameters);
diff --git a/libavfilter/dnn/dnn_backend_native_layer_maximum.c b/libavfilter/dnn/dnn_backend_native_layer_maximum.c
index 6add170..19f0e8d 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_maximum.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_maximum.c
@@ -27,6 +27,24 @@
#include "libavutil/avassert.h"
#include "dnn_backend_native_layer_maximum.h"
+int dnn_load_layer_maximum(Layer *layer, AVIOContext *model_file_context, int file_size)
+{
+ DnnLayerMaximumParams *params;
+ int dnn_size = 0;
+ params = av_malloc(sizeof(*params));
+ if (!params)
+ return 0;
+
+ params->val.u32 = avio_rl32(model_file_context);
+ dnn_size += 4;
+ layer->params = params;
+ layer->input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
+ layer->output_operand_index = (int32_t)avio_rl32(model_file_context);
+ dnn_size += 8;
+
+ return dnn_size;
+}
+
int dnn_execute_layer_maximum(DnnOperand *operands, const int32_t *input_operand_indexes,
int32_t output_operand_index, const void *parameters)
{
diff --git a/libavfilter/dnn/dnn_backend_native_layer_maximum.h b/libavfilter/dnn/dnn_backend_native_layer_maximum.h
index 87f3bf5..601158b 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_maximum.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_maximum.h
@@ -37,6 +37,7 @@ typedef struct DnnLayerMaximumParams{
}val;
} DnnLayerMaximumParams;
+int dnn_load_layer_maximum(Layer *layer, AVIOContext *model_file_context, int file_size);
int dnn_execute_layer_maximum(DnnOperand *operands, const int32_t *input_operand_indexes,
int32_t output_operand_index, const void *parameters);
diff --git a/libavfilter/dnn/dnn_backend_native_layer_pad.c b/libavfilter/dnn/dnn_backend_native_layer_pad.c
index f5c5727..8fa35de 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_pad.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_pad.c
@@ -22,6 +22,29 @@
#include "libavutil/avassert.h"
#include "dnn_backend_native_layer_pad.h"
+int dnn_load_layer_pad(Layer *layer, AVIOContext *model_file_context, int file_size)
+{
+ LayerPadParams *params;
+ int dnn_size = 0;
+ params = av_malloc(sizeof(*params));
+ if (!params)
+ return 0;
+
+ params->mode = (int32_t)avio_rl32(model_file_context);
+ dnn_size += 4;
+ for (int i = 0; i < 4; ++i) {
+ params->paddings[i][0] = avio_rl32(model_file_context);
+ params->paddings[i][1] = avio_rl32(model_file_context);
+ dnn_size += 8;
+ }
+ layer->input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
+ layer->output_operand_index = (int32_t)avio_rl32(model_file_context);
+ dnn_size += 8;
+ layer->params = params;
+
+ return dnn_size;
+}
+
static int before_get_buddy(int given, int paddings, LayerPadModeParam mode)
{
if (mode == LPMP_SYMMETRIC) {
diff --git a/libavfilter/dnn/dnn_backend_native_layer_pad.h b/libavfilter/dnn/dnn_backend_native_layer_pad.h
index 036ff7b..936a9bd 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_pad.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_pad.h
@@ -36,6 +36,7 @@ typedef struct LayerPadParams{
float constant_values;
} LayerPadParams;
+int dnn_load_layer_pad(Layer *layer, AVIOContext *model_file_context, int file_size);
int dnn_execute_layer_pad(DnnOperand *operands, const int32_t *input_operand_indexes,
int32_t output_operand_index, const void *parameters);
diff --git a/libavfilter/dnn/dnn_backend_native_layers.c b/libavfilter/dnn/dnn_backend_native_layers.c
index 5f81a09..196872d 100644
--- a/libavfilter/dnn/dnn_backend_native_layers.c
+++ b/libavfilter/dnn/dnn_backend_native_layers.c
@@ -25,10 +25,10 @@
#include "dnn_backend_native_layer_depth2space.h"
#include "dnn_backend_native_layer_maximum.h"
-LAYER_EXEC_FUNC layer_funcs[DLT_COUNT] = {
- NULL,
- dnn_execute_layer_conv2d,
- dnn_execute_layer_depth2space,
- dnn_execute_layer_pad,
- dnn_execute_layer_maximum,
+LayerFunc layer_funcs[DLT_COUNT] = {
+ {NULL, NULL},
+ {dnn_execute_layer_conv2d, dnn_load_layer_conv2d},
+ {dnn_execute_layer_depth2space, dnn_load_layer_depth2space},
+ {dnn_execute_layer_pad, dnn_load_layer_pad},
+ {dnn_execute_layer_maximum, dnn_load_layer_maximum},
};
\ No newline at end of file
diff --git a/libavfilter/dnn/dnn_backend_native_layers.h b/libavfilter/dnn/dnn_backend_native_layers.h
index 3276aee..2df0ce9 100644
--- a/libavfilter/dnn/dnn_backend_native_layers.h
+++ b/libavfilter/dnn/dnn_backend_native_layers.h
@@ -26,7 +26,13 @@
typedef int (*LAYER_EXEC_FUNC)(DnnOperand *operands, const int32_t *input_operand_indexes,
int32_t output_operand_index, const void *parameters);
+typedef int (*LAYER_LOAD_FUNC)(Layer *layer, AVIOContext *model_file_context, int file_size);
-extern LAYER_EXEC_FUNC layer_funcs[DLT_COUNT];
+typedef struct LayerFunc {
+ LAYER_EXEC_FUNC pf_exec;
+ LAYER_LOAD_FUNC pf_load;
+}LayerFunc;
+
+extern LayerFunc layer_funcs[DLT_COUNT];
#endif
--
2.7.4
More information about the ffmpeg-devel
mailing list