[FFmpeg-devel] [PATCH 1/3] avfilter/vf_libopencv: add opencv HaarCascade classifier simple face detection filter
lance.lmwang at gmail.com
lance.lmwang at gmail.com
Wed May 13 17:42:35 EEST 2020
From: Limin Wang <lance.lmwang at gmail.com>
Signed-off-by: Limin Wang <lance.lmwang at gmail.com>
---
Have tested with opencv 2.4.13 and 3.4.10 with static link
configure | 1 +
doc/filters.texi | 29 ++++++++
libavfilter/vf_libopencv.c | 164 ++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 191 insertions(+), 3 deletions(-)
diff --git a/configure b/configure
index a45c0fb..99d008a 100755
--- a/configure
+++ b/configure
@@ -2123,6 +2123,7 @@ HEADERS_LIST="
machine_ioctl_meteor_h
malloc_h
opencv2_core_core_c_h
+ opencv2_objdetect_objdetect_c_h
OpenGL_gl3_h
poll_h
sys_param_h
diff --git a/doc/filters.texi b/doc/filters.texi
index d19fd34..e50dcf3 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -14176,6 +14176,35 @@ other parameters is 0.
These parameters correspond to the parameters assigned to the
libopencv function @code{cvSmooth}.
+ at subsection facedetect
+Face detection using Haar Feature-based Cascade Classifiers.
+
+The filter takes the following parameters:
+ at var{xml_model}|@var{qoffset}.
+
+ at var{xml_model} is the path of pre-trained classifiers, The C API still
+does not support the newer cascade format, please use the old format
+haarcascade_frontalface_alt.xml which type_id is opencv-haar-classifier.
+
+ at var{qoffset}
+If you want export the detected faces by ROI side data in frame, please set the
+parameters, See also the @ref{addroi} filter. The range of qoffset is from [-1.0, 1.0]
+
+By default the filter will report these metadata values if face are
+detected:
+ at table @option
+ at item lavfi.facedetect.nb_faces
+Display the detected face number
+
+ at item lavfi.facedetect.face_id.x, lavfi.facedetect.face_id.y
+Display x and y of every faces, face_id is the face index which is range
+from [0, nb_faces-1]
+
+ at item lavfi.facedetect.face_id.w, lavfi.facedetect.face_id.h
+Display width and height of every faces, face_id is the face index
+which is range from [0, nb_faces-1]
+ at end table
+
@section oscilloscope
2D Video Oscilloscope.
diff --git a/libavfilter/vf_libopencv.c b/libavfilter/vf_libopencv.c
index 8128030..b7a9282 100644
--- a/libavfilter/vf_libopencv.c
+++ b/libavfilter/vf_libopencv.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2010 Stefano Sabatini
+ * Copyright (c) 2020 Limin Wang
*
* This file is part of FFmpeg.
*
@@ -27,10 +28,16 @@
#if HAVE_OPENCV2_CORE_CORE_C_H
#include <opencv2/core/core_c.h>
#include <opencv2/imgproc/imgproc_c.h>
+#if HAVE_OPENCV2_OBJECTDETECT_OBJECTDETECT_C_H
+#include <opencv2/objdetect/objdetect_c.h>
+#else
+#include <opencv/cv.h>
+#endif
#else
#include <opencv/cv.h>
#include <opencv/cxcore.h>
#endif
+
#include "libavutil/avstring.h"
#include "libavutil/common.h"
#include "libavutil/file.h"
@@ -82,6 +89,7 @@ typedef struct OCVContext {
int (*init)(AVFilterContext *ctx, const char *args);
void (*uninit)(AVFilterContext *ctx);
void (*end_frame_filter)(AVFilterContext *ctx, IplImage *inimg, IplImage *outimg);
+ void (*update_metadata)(AVFilterContext *ctx, AVFrame *frame);
void *priv;
} OCVContext;
@@ -326,18 +334,152 @@ static void erode_end_frame_filter(AVFilterContext *ctx, IplImage *inimg, IplIma
cvErode(inimg, outimg, dilate->kernel, dilate->nb_iterations);
}
+typedef struct FaceDetectContext {
+ char *xml_model;
+ CvHaarClassifierCascade* cascade;
+ CvMemStorage* storage;
+ int nb_faces;
+ CvSeq *faces;
+ int add_roi;
+ AVRational qoffset;
+} FaceDetectContext;
+
+static av_cold int facedetect_init(AVFilterContext *ctx, const char *args)
+{
+ OCVContext *s = ctx->priv;
+ FaceDetectContext *facedetect = s->priv;
+ const char *buf = args;
+ double qoffset;
+
+ if (args) {
+ facedetect->xml_model = av_get_token(&buf, "|");
+ if (!facedetect->xml_model) {
+ av_log(ctx, AV_LOG_ERROR, "failed to get %s, %s\n", args, facedetect->xml_model);
+ return AVERROR(EINVAL);
+ }
+
+ if (buf && sscanf(buf, "|%lf", &qoffset) == 1) {
+ if (qoffset < -1.0 || qoffset > 1.0) {
+ av_log(ctx, AV_LOG_ERROR, "failed to get valid qoffset(%f))\n", qoffset);
+ return AVERROR(EINVAL);
+ }
+ facedetect->add_roi = 1;
+ facedetect->qoffset = av_d2q(qoffset, 255);
+ }
+ } else {
+ av_log(ctx, AV_LOG_ERROR, "failed to get haarcascade_frontalface_alt.xml model file\n");
+ return AVERROR(EINVAL);
+ }
+
+ av_log(ctx, AV_LOG_VERBOSE, "xml_model: %s add_roi: %d qoffset: %d/%d\n",
+ facedetect->xml_model, facedetect->add_roi, facedetect->qoffset.num, facedetect->qoffset.den);
+
+ facedetect->storage = cvCreateMemStorage(0);
+ if (!facedetect->storage) {
+ av_log(ctx, AV_LOG_ERROR, "cvCreateMemStorage() failed\n");
+ return AVERROR(EINVAL);
+ }
+ cvClearMemStorage(facedetect->storage);
+
+ facedetect->cascade = (CvHaarClassifierCascade*)cvLoad( facedetect->xml_model, NULL, NULL, NULL );
+ if (!facedetect->cascade) {
+ av_log(ctx, AV_LOG_ERROR, "failed to load classifier cascade: %s \n", facedetect->xml_model);
+ return AVERROR(EINVAL);
+ }
+
+ return 0;
+}
+
+static av_cold void facedetect_uninit(AVFilterContext *ctx)
+{
+ OCVContext *s = ctx->priv;
+ FaceDetectContext *facedetect = s->priv;
+
+ if (facedetect->cascade)
+ cvReleaseHaarClassifierCascade(&facedetect->cascade);
+ if (facedetect->storage)
+ cvReleaseMemStorage(&facedetect->storage);
+}
+
+static void set_meta_int(AVDictionary **metadata, const char *key, int idx, int d)
+{
+ char value[128];
+ char key2[128];
+
+ snprintf(value, sizeof(value), "%d", d);
+ snprintf(key2, sizeof(key2), "lavfi.facedetect.%d.%s", idx, key);
+ av_dict_set(metadata, key2, value, 0);
+}
+
+static void facedetect_end_frame_filter(AVFilterContext *ctx, IplImage *inimg, IplImage *outimg)
+{
+ OCVContext *s = ctx->priv;
+ FaceDetectContext *facedetect = s->priv;
+
+ facedetect->faces = cvHaarDetectObjects(inimg, facedetect->cascade, facedetect->storage,
+ 1.25, 3, CV_HAAR_DO_CANNY_PRUNING,
+ cvSize(inimg->width/16,inimg->height/16), cvSize(0,0));
+
+ facedetect->nb_faces = facedetect->faces ? facedetect->faces->total : 0;
+}
+
+static void facedetect_update_metadata(AVFilterContext *ctx, AVFrame *out)
+{
+ OCVContext *s = ctx->priv;
+ FaceDetectContext *facedetect = s->priv;
+ AVRegionOfInterest *roi;
+ AVFrameSideData *sd;
+ AVBufferRef *roi_buf;
+ int i;
+
+ if (facedetect->add_roi && facedetect->nb_faces > 0) {
+ sd = av_frame_new_side_data(out, AV_FRAME_DATA_REGIONS_OF_INTEREST,
+ facedetect->nb_faces * sizeof(AVRegionOfInterest));
+ if (!sd) {
+ return AVERROR(ENOMEM);
+ }
+ roi = (AVRegionOfInterest*)sd->data;
+ for(i = 0; i < facedetect->nb_faces; i++ ) {
+ CvRect *r = (CvRect*) cvGetSeqElem(facedetect->faces, i);
+
+ roi[i] = (AVRegionOfInterest) {
+ .self_size = sizeof(*roi),
+ .top = r->y,
+ .bottom = r->y + r->height,
+ .left = r->x,
+ .right = r->x + r->width,
+ .qoffset = facedetect->qoffset,
+ };
+ }
+ }
+
+ if (facedetect->nb_faces > 0)
+ av_dict_set_int(&out->metadata, "lavfi.facedetect.nb_faces", facedetect->nb_faces, 0);
+
+ for(i = 0; i < facedetect->nb_faces; i++ ) {
+ CvRect *r = (CvRect*) cvGetSeqElem(facedetect->faces, i);
+
+ set_meta_int(&out->metadata, "x", i, r->x);
+ set_meta_int(&out->metadata, "y", i, r->y);
+ set_meta_int(&out->metadata, "w", i, r->width);
+ set_meta_int(&out->metadata, "h", i, r->height);
+ }
+}
+
typedef struct OCVFilterEntry {
const char *name;
size_t priv_size;
int (*init)(AVFilterContext *ctx, const char *args);
void (*uninit)(AVFilterContext *ctx);
void (*end_frame_filter)(AVFilterContext *ctx, IplImage *inimg, IplImage *outimg);
+ void (*update_metadata)(AVFilterContext *ctx, AVFrame *frame);
} OCVFilterEntry;
static const OCVFilterEntry ocv_filter_entries[] = {
- { "dilate", sizeof(DilateContext), dilate_init, dilate_uninit, dilate_end_frame_filter },
- { "erode", sizeof(DilateContext), dilate_init, dilate_uninit, erode_end_frame_filter },
- { "smooth", sizeof(SmoothContext), smooth_init, NULL, smooth_end_frame_filter },
+ { "dilate", sizeof(DilateContext), dilate_init, dilate_uninit, dilate_end_frame_filter, NULL },
+ { "erode", sizeof(DilateContext), dilate_init, dilate_uninit, erode_end_frame_filter, NULL },
+ { "smooth", sizeof(SmoothContext), smooth_init, NULL, smooth_end_frame_filter, NULL },
+ { "facedetect", sizeof(FaceDetectContext), facedetect_init, facedetect_uninit, facedetect_end_frame_filter, facedetect_update_metadata },
};
static av_cold int init(AVFilterContext *ctx)
@@ -355,6 +497,7 @@ static av_cold int init(AVFilterContext *ctx)
s->init = entry->init;
s->uninit = entry->uninit;
s->end_frame_filter = entry->end_frame_filter;
+ s->update_metadata = entry->update_metadata;
if (!(s->priv = av_mallocz(entry->priv_size)))
return AVERROR(ENOMEM);
@@ -383,18 +526,33 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
AVFrame *out;
IplImage inimg, outimg;
+ /* facedetect filter will passthrought the input frame */
+ if (strcmp(s->name, "facedetect")) {
out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
if (!out) {
av_frame_free(&in);
return AVERROR(ENOMEM);
}
av_frame_copy_props(out, in);
+ } else {
+ out = in;
+ }
fill_iplimage_from_frame(&inimg , in , inlink->format);
+
+ if (strcmp(s->name, "facedetect")) {
fill_iplimage_from_frame(&outimg, out, inlink->format);
s->end_frame_filter(ctx, &inimg, &outimg);
fill_frame_from_iplimage(out, &outimg, inlink->format);
+ } else {
+ s->end_frame_filter(ctx, &inimg, NULL);
+ }
+
+ if (s->update_metadata) {
+ s->update_metadata(ctx, out);
+ }
+ if (out != in)
av_frame_free(&in);
return ff_filter_frame(outlink, out);
--
2.9.4
More information about the ffmpeg-devel
mailing list