[FFmpeg-devel] [PATCH v3 2/5] avfilter/vf_libopencv: add opencv HaarCascade classifier simple face detection filter
Paul B Mahol
onemda at gmail.com
Mon May 18 14:11:12 EEST 2020
This opencv module is obsolete for latest opencv.
Instead there should be C++ wrapper.
On 5/18/20, lance.lmwang at gmail.com <lance.lmwang at gmail.com> wrote:
> From: Limin Wang <lance.lmwang at gmail.com>
>
> Signed-off-by: Limin Wang <lance.lmwang at gmail.com>
> ---
> change the update_metadata() to postprocess() only, I'll add opencv drawbox
> filter and it need preprocess() to get the meta, so I prefer to change the
> function name for better readablity, in future, it may have other processing
> than metadata only.
>
> configure | 1 +
> doc/filters.texi | 29 +++++++
> libavfilter/vf_libopencv.c | 164 ++++++++++++++++++++++++++++++++++++-
> 3 files changed, 191 insertions(+), 3 deletions(-)
>
> diff --git a/configure b/configure
> index 34afdaad28..281b67efc4 100755
> --- a/configure
> +++ b/configure
> @@ -2123,6 +2123,7 @@ HEADERS_LIST="
> machine_ioctl_meteor_h
> malloc_h
> opencv2_core_core_c_h
> + opencv2_objdetect_objdetect_c_h
> OpenGL_gl3_h
> poll_h
> sys_param_h
> diff --git a/doc/filters.texi b/doc/filters.texi
> index d9ba0fffa1..f938dd04de 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -14177,6 +14177,35 @@ other parameters is 0.
> These parameters correspond to the parameters assigned to the
> libopencv function @code{cvSmooth}.
>
> + at subsection facedetect
> +Face detection using Haar Feature-based Cascade Classifiers.
> +
> +The filter takes the following parameters:
> + at var{xml_model}|@var{qoffset}.
> +
> + at var{xml_model} is the path of pre-trained classifiers, The C API still
> +does not support the newer cascade format, please use the old format
> +haarcascade_frontalface_alt.xml which type_id is opencv-haar-classifier.
> +
> + at var{qoffset}
> +If you want export the detected faces by ROI side data in frame, please set
> the
> +parameters, See also the @ref{addroi} filter. The range of qoffset is from
> [-1.0, 1.0]
> +
> +By default the filter will report these metadata values if face are
> +detected:
> + at table @option
> + at item lavfi.facedetect.nb_faces
> +Display the detected face number
> +
> + at item lavfi.facedetect.face_id.x, lavfi.facedetect.face_id.y
> +Display x and y of every faces, face_id is the face index which is range
> +from [0, nb_faces-1]
> +
> + at item lavfi.facedetect.face_id.w, lavfi.facedetect.face_id.h
> +Display width and height of every faces, face_id is the face index
> +which is range from [0, nb_faces-1]
> + at end table
> +
> @section oscilloscope
>
> 2D Video Oscilloscope.
> diff --git a/libavfilter/vf_libopencv.c b/libavfilter/vf_libopencv.c
> index 8128030b8c..b2d19bb241 100644
> --- a/libavfilter/vf_libopencv.c
> +++ b/libavfilter/vf_libopencv.c
> @@ -1,5 +1,6 @@
> /*
> * Copyright (c) 2010 Stefano Sabatini
> + * Copyright (c) 2020 Limin Wang
> *
> * This file is part of FFmpeg.
> *
> @@ -27,10 +28,16 @@
> #if HAVE_OPENCV2_CORE_CORE_C_H
> #include <opencv2/core/core_c.h>
> #include <opencv2/imgproc/imgproc_c.h>
> +#if HAVE_OPENCV2_OBJECTDETECT_OBJECTDETECT_C_H
> +#include <opencv2/objdetect/objdetect_c.h>
> +#else
> +#include <opencv/cv.h>
> +#endif
> #else
> #include <opencv/cv.h>
> #include <opencv/cxcore.h>
> #endif
> +
> #include "libavutil/avstring.h"
> #include "libavutil/common.h"
> #include "libavutil/file.h"
> @@ -82,6 +89,7 @@ typedef struct OCVContext {
> int (*init)(AVFilterContext *ctx, const char *args);
> void (*uninit)(AVFilterContext *ctx);
> void (*end_frame_filter)(AVFilterContext *ctx, IplImage *inimg,
> IplImage *outimg);
> + void (*postprocess)(AVFilterContext *ctx, AVFrame *out);
> void *priv;
> } OCVContext;
>
> @@ -326,18 +334,152 @@ static void erode_end_frame_filter(AVFilterContext
> *ctx, IplImage *inimg, IplIma
> cvErode(inimg, outimg, dilate->kernel, dilate->nb_iterations);
> }
>
> +typedef struct FaceDetectContext {
> + char *xml_model;
> + CvHaarClassifierCascade* cascade;
> + CvMemStorage* storage;
> + int nb_faces;
> + CvSeq *faces;
> + int add_roi;
> + AVRational qoffset;
> +} FaceDetectContext;
> +
> +static av_cold int facedetect_init(AVFilterContext *ctx, const char *args)
> +{
> + OCVContext *s = ctx->priv;
> + FaceDetectContext *facedetect = s->priv;
> + const char *buf = args;
> + double qoffset;
> +
> + if (args) {
> + facedetect->xml_model = av_get_token(&buf, "|");
> + if (!facedetect->xml_model) {
> + av_log(ctx, AV_LOG_ERROR, "failed to get %s, %s\n", args,
> facedetect->xml_model);
> + return AVERROR(EINVAL);
> + }
> +
> + if (buf && sscanf(buf, "|%lf", &qoffset) == 1) {
> + if (qoffset < -1.0 || qoffset > 1.0) {
> + av_log(ctx, AV_LOG_ERROR, "failed to get valid
> qoffset(%f))\n", qoffset);
> + return AVERROR(EINVAL);
> + }
> + facedetect->add_roi = 1;
> + facedetect->qoffset = av_d2q(qoffset, 255);
> + }
> + } else {
> + av_log(ctx, AV_LOG_ERROR, "failed to get
> haarcascade_frontalface_alt.xml model file\n");
> + return AVERROR(EINVAL);
> + }
> +
> + av_log(ctx, AV_LOG_VERBOSE, "xml_model: %s add_roi: %d qoffset:
> %d/%d\n",
> + facedetect->xml_model, facedetect->add_roi,
> facedetect->qoffset.num, facedetect->qoffset.den);
> +
> + facedetect->storage = cvCreateMemStorage(0);
> + if (!facedetect->storage) {
> + av_log(ctx, AV_LOG_ERROR, "cvCreateMemStorage() failed\n");
> + return AVERROR(EINVAL);
> + }
> + cvClearMemStorage(facedetect->storage);
> +
> + facedetect->cascade = (CvHaarClassifierCascade*)cvLoad(
> facedetect->xml_model, NULL, NULL, NULL );
> + if (!facedetect->cascade) {
> + av_log(ctx, AV_LOG_ERROR, "failed to load classifier cascade: %s
> \n", facedetect->xml_model);
> + return AVERROR(EINVAL);
> + }
> +
> + return 0;
> +}
> +
> +static av_cold void facedetect_uninit(AVFilterContext *ctx)
> +{
> + OCVContext *s = ctx->priv;
> + FaceDetectContext *facedetect = s->priv;
> +
> + if (facedetect->cascade)
> + cvReleaseHaarClassifierCascade(&facedetect->cascade);
> + if (facedetect->storage)
> + cvReleaseMemStorage(&facedetect->storage);
> +}
> +
> +static void set_meta_int(AVDictionary **metadata, const char *key, int idx,
> int d)
> +{
> + char value[128];
> + char key2[128];
> +
> + snprintf(value, sizeof(value), "%d", d);
> + snprintf(key2, sizeof(key2), "lavfi.facedetect.%d.%s", idx, key);
> + av_dict_set(metadata, key2, value, 0);
> +}
> +
> +static void facedetect_end_frame_filter(AVFilterContext *ctx, IplImage
> *inimg, IplImage *outimg)
> +{
> + OCVContext *s = ctx->priv;
> + FaceDetectContext *facedetect = s->priv;
> +
> + facedetect->faces = cvHaarDetectObjects(inimg, facedetect->cascade,
> facedetect->storage,
> + 1.25, 3, CV_HAAR_DO_CANNY_PRUNING,
> + cvSize(inimg->width/16,inimg->height/16), cvSize(0,0));
> +
> + facedetect->nb_faces = facedetect->faces ? facedetect->faces->total :
> 0;
> +}
> +
> +static void facedetect_postprocess(AVFilterContext *ctx, AVFrame *out)
> +{
> + OCVContext *s = ctx->priv;
> + FaceDetectContext *facedetect = s->priv;
> + AVRegionOfInterest *roi;
> + AVFrameSideData *sd;
> + AVBufferRef *roi_buf;
> + int i;
> +
> + if (facedetect->add_roi && facedetect->nb_faces > 0) {
> + sd = av_frame_new_side_data(out, AV_FRAME_DATA_REGIONS_OF_INTEREST,
> + facedetect->nb_faces * sizeof(AVRegionOfInterest));
> + if (!sd) {
> + return AVERROR(ENOMEM);
> + }
> + roi = (AVRegionOfInterest*)sd->data;
> + for(i = 0; i < facedetect->nb_faces; i++ ) {
> + CvRect *r = (CvRect*) cvGetSeqElem(facedetect->faces, i);
> +
> + roi[i] = (AVRegionOfInterest) {
> + .self_size = sizeof(*roi),
> + .top = r->y,
> + .bottom = r->y + r->height,
> + .left = r->x,
> + .right = r->x + r->width,
> + .qoffset = facedetect->qoffset,
> + };
> + }
> + }
> +
> + if (facedetect->nb_faces > 0)
> + av_dict_set_int(&out->metadata, "lavfi.facedetect.nb_faces",
> facedetect->nb_faces, 0);
> +
> + for(i = 0; i < facedetect->nb_faces; i++ ) {
> + CvRect *r = (CvRect*) cvGetSeqElem(facedetect->faces, i);
> +
> + set_meta_int(&out->metadata, "x", i, r->x);
> + set_meta_int(&out->metadata, "y", i, r->y);
> + set_meta_int(&out->metadata, "w", i, r->width);
> + set_meta_int(&out->metadata, "h", i, r->height);
> + }
> +}
> +
> typedef struct OCVFilterEntry {
> const char *name;
> size_t priv_size;
> int (*init)(AVFilterContext *ctx, const char *args);
> void (*uninit)(AVFilterContext *ctx);
> void (*end_frame_filter)(AVFilterContext *ctx, IplImage *inimg,
> IplImage *outimg);
> + void (*postprocess)(AVFilterContext *ctx, AVFrame *out);
> } OCVFilterEntry;
>
> static const OCVFilterEntry ocv_filter_entries[] = {
> - { "dilate", sizeof(DilateContext), dilate_init, dilate_uninit,
> dilate_end_frame_filter },
> - { "erode", sizeof(DilateContext), dilate_init, dilate_uninit,
> erode_end_frame_filter },
> - { "smooth", sizeof(SmoothContext), smooth_init, NULL,
> smooth_end_frame_filter },
> + { "dilate", sizeof(DilateContext), dilate_init, dilate_uninit,
> dilate_end_frame_filter, NULL },
> + { "erode", sizeof(DilateContext), dilate_init, dilate_uninit,
> erode_end_frame_filter, NULL },
> + { "smooth", sizeof(SmoothContext), smooth_init, NULL,
> smooth_end_frame_filter, NULL },
> + { "facedetect", sizeof(FaceDetectContext), facedetect_init,
> facedetect_uninit, facedetect_end_frame_filter, facedetect_postprocess },
> };
>
> static av_cold int init(AVFilterContext *ctx)
> @@ -355,6 +497,7 @@ static av_cold int init(AVFilterContext *ctx)
> s->init = entry->init;
> s->uninit = entry->uninit;
> s->end_frame_filter = entry->end_frame_filter;
> + s->postprocess = entry->postprocess;
>
> if (!(s->priv = av_mallocz(entry->priv_size)))
> return AVERROR(ENOMEM);
> @@ -383,18 +526,33 @@ static int filter_frame(AVFilterLink *inlink, AVFrame
> *in)
> AVFrame *out;
> IplImage inimg, outimg;
>
> + /* facedetect filter will passthrought the input frame */
> + if (strcmp(s->name, "facedetect")) {
> out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
> if (!out) {
> av_frame_free(&in);
> return AVERROR(ENOMEM);
> }
> av_frame_copy_props(out, in);
> + } else {
> + out = in;
> + }
>
> fill_iplimage_from_frame(&inimg , in , inlink->format);
> +
> + if (strcmp(s->name, "facedetect")) {
> fill_iplimage_from_frame(&outimg, out, inlink->format);
> s->end_frame_filter(ctx, &inimg, &outimg);
> fill_frame_from_iplimage(out, &outimg, inlink->format);
> + } else {
> + s->end_frame_filter(ctx, &inimg, NULL);
> + }
> +
> + if (s->postprocess) {
> + s->postprocess(ctx, out);
> + }
>
> + if (out != in)
> av_frame_free(&in);
>
> return ff_filter_frame(outlink, out);
> --
> 2.21.0
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
More information about the ffmpeg-devel
mailing list