[FFmpeg-devel] [PATCH 2/2] libavfilter/vf_dnn_detect: Add two outputs ssd support
wenbin.chen at intel.com
wenbin.chen at intel.com
Wed Dec 27 06:16:58 EET 2023
From: Wenbin Chen <wenbin.chen at intel.com>
For this kind of model, we can directly use its output as final result
just like ssd model. The difference is that it splits output into two
tensors. [x_min, y_min, x_max, y_max, confidence] and [lable_id].
Model example refer to: https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/intel/person-detection-0106
Signed-off-by: Wenbin Chen <wenbin.chen at intel.com>
---
libavfilter/vf_dnn_detect.c | 64 +++++++++++++++++++++++++++++--------
1 file changed, 50 insertions(+), 14 deletions(-)
diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index 88865c8a8e..249cbba0f7 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -359,24 +359,48 @@ static int dnn_detect_post_proc_yolov3(AVFrame *frame, DNNData *output,
return 0;
}
-static int dnn_detect_post_proc_ssd(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx)
+static int dnn_detect_post_proc_ssd(AVFrame *frame, DNNData *output, int nb_outputs,
+ AVFilterContext *filter_ctx)
{
DnnDetectContext *ctx = filter_ctx->priv;
float conf_threshold = ctx->confidence;
- int proposal_count = output->height;
- int detect_size = output->width;
- float *detections = output->data;
+ int proposal_count = 0;
+ int detect_size = 0;
+ float *detections = NULL, *labels = NULL;
int nb_bboxes = 0;
AVDetectionBBoxHeader *header;
AVDetectionBBox *bbox;
-
- if (output->width != 7) {
+ int scale_w = ctx->scale_width;
+ int scale_h = ctx->scale_height;
+
+ if (nb_outputs == 1 && output->width == 7) {
+ proposal_count = output->height;
+ detect_size = output->width;
+ detections = output->data;
+ } else if (nb_outputs == 2 && output[0].width == 5) {
+ proposal_count = output[0].height;
+ detect_size = output[0].width;
+ detections = output[0].data;
+ labels = output[1].data;
+ } else if (nb_outputs == 2 && output[1].width == 5) {
+ proposal_count = output[1].height;
+ detect_size = output[1].width;
+ detections = output[1].data;
+ labels = output[0].data;
+ } else {
av_log(filter_ctx, AV_LOG_ERROR, "Model output shape doesn't match ssd requirement.\n");
return AVERROR(EINVAL);
}
+ if (proposal_count == 0)
+ return 0;
+
for (int i = 0; i < proposal_count; ++i) {
- float conf = detections[i * detect_size + 2];
+ float conf;
+ if (nb_outputs == 1)
+ conf = detections[i * detect_size + 2];
+ else
+ conf = detections[i * detect_size + 4];
if (conf < conf_threshold) {
continue;
}
@@ -398,12 +422,24 @@ static int dnn_detect_post_proc_ssd(AVFrame *frame, DNNData *output, AVFilterCon
for (int i = 0; i < proposal_count; ++i) {
int av_unused image_id = (int)detections[i * detect_size + 0];
- int label_id = (int)detections[i * detect_size + 1];
- float conf = detections[i * detect_size + 2];
- float x0 = detections[i * detect_size + 3];
- float y0 = detections[i * detect_size + 4];
- float x1 = detections[i * detect_size + 5];
- float y1 = detections[i * detect_size + 6];
+ int label_id;
+ float conf, x0, y0, x1, y1;
+
+ if (nb_outputs == 1) {
+ label_id = (int)detections[i * detect_size + 1];
+ conf = detections[i * detect_size + 2];
+ x0 = detections[i * detect_size + 3];
+ y0 = detections[i * detect_size + 4];
+ x1 = detections[i * detect_size + 5];
+ y1 = detections[i * detect_size + 6];
+ } else {
+ label_id = (int)labels[i];
+ x0 = detections[i * detect_size] / scale_w;
+ y0 = detections[i * detect_size + 1] / scale_h;
+ x1 = detections[i * detect_size + 2] / scale_w;
+ y1 = detections[i * detect_size + 3] / scale_h;
+ conf = detections[i * detect_size + 4];
+ }
if (conf < conf_threshold) {
continue;
@@ -447,7 +483,7 @@ static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output, int nb_outpu
switch (ctx->model_type) {
case DDMT_SSD:
- ret = dnn_detect_post_proc_ssd(frame, output, filter_ctx);
+ ret = dnn_detect_post_proc_ssd(frame, output, nb_outputs, filter_ctx);
if (ret < 0)
return ret;
break;
--
2.34.1
More information about the ffmpeg-devel
mailing list