[FFmpeg-devel] [PATCH 3/4] avfilter/vf_dnn_processing: add format GRAY8 and GRAYF32 support

Fri Nov 22 09:50:11 EET 2019

Signed-off-by: Guo, Yejun <yejun.guo at intel.com>
---
 doc/filters.texi                |   8 ++-
 libavfilter/vf_dnn_processing.c | 147 ++++++++++++++++++++++++++++++----------
 2 files changed, 118 insertions(+), 37 deletions(-)

diff --git a/doc/filters.texi b/doc/filters.texi
index 1f86ae1..c3f7997 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -8992,7 +8992,13 @@ Set the input name of the dnn network.
 Set the output name of the dnn network.
 
 @item fmt
-Set the pixel format for the Frame. Allowed values are @code{AV_PIX_FMT_RGB24}, and @code{AV_PIX_FMT_BGR24}.
+Set the pixel format for the Frame, the value is determined by the input of the dnn network model.
+
+If the model handles RGB (or BGR) image and the data type of model input is uint8, fmt must be @code{AV_PIX_FMT_RGB24} (or @code{AV_PIX_FMT_BGR24}.
+If the model handles RGB (or BGR) image and the data type of model input is float, fmt must be @code{AV_PIX_FMT_RGB24} (or @code{AV_PIX_FMT_BGR24}, and this filter will do data type conversion internally.
+If the model handles GRAY image and the data type of model input is uint8, fmt must be @code{AV_PIX_FMT_GRAY8}.
+If the model handles GRAY image and the data type of model input is float, fmt must be @code{AV_PIX_FMT_GRAYF32}.
+
 Default value is @code{AV_PIX_FMT_RGB24}.
 
 @end table
diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c
index ce976ec..963dd5e 100644
--- a/libavfilter/vf_dnn_processing.c
+++ b/libavfilter/vf_dnn_processing.c
@@ -70,10 +70,12 @@ static av_cold int init(AVFilterContext *context)
 {
     DnnProcessingContext *ctx = context->priv;
     int supported = 0;
-    // as the first step, only rgb24 and bgr24 are supported
+    // to support more formats
     const enum AVPixelFormat supported_pixel_fmts[] = {
         AV_PIX_FMT_RGB24,
         AV_PIX_FMT_BGR24,
+        AV_PIX_FMT_GRAY8,
+        AV_PIX_FMT_GRAYF32,
     };
     for (int i = 0; i < sizeof(supported_pixel_fmts) / sizeof(enum AVPixelFormat); ++i) {
         if (supported_pixel_fmts[i] == ctx->fmt) {
@@ -156,14 +158,38 @@ static int config_input(AVFilterLink *inlink)
         return AVERROR(EIO);
     }
 
-    if (model_input.channels != 3) {
-        av_log(ctx, AV_LOG_ERROR, "the model requires input channels %d\n",
-                                   model_input.channels);
-        return AVERROR(EIO);
-    }
-    if (model_input.dt != DNN_FLOAT && model_input.dt != DNN_UINT8) {
-        av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type as float32 and uint8.\n");
-        return AVERROR(EIO);
+    if (ctx->fmt == AV_PIX_FMT_RGB24 || ctx->fmt == AV_PIX_FMT_BGR24) {
+        if (model_input.channels != 3) {
+            av_log(ctx, AV_LOG_ERROR, "channel number 3 is required, but the actual channel number is %d\n",
+                                       model_input.channels);
+            return AVERROR(EIO);
+        }
+        if (model_input.dt != DNN_FLOAT && model_input.dt != DNN_UINT8) {
+            av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type as float32 and uint8.\n");
+            return AVERROR(EIO);
+        }
+    } else if (ctx->fmt == AV_PIX_FMT_GRAY8) {
+        if (model_input.channels != 1) {
+            av_log(ctx, AV_LOG_ERROR, "channel number 1 is required, but the actual channel number is %d\n",
+                                       model_input.channels);
+            return AVERROR(EIO);
+        }
+        if (model_input.dt != DNN_UINT8) {
+            av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type as uint8.\n");
+            return AVERROR(EIO);
+        }
+    } else if (ctx->fmt == AV_PIX_FMT_GRAYF32) {
+        if (model_input.channels != 1) {
+            av_log(ctx, AV_LOG_ERROR, "channel number 1 is required, but the actual channel number is %d\n",
+                                       model_input.channels);
+            return AVERROR(EIO);
+        }
+        if (model_input.dt != DNN_FLOAT) {
+            av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type as float.\n");
+            return AVERROR(EIO);
+        }
+    } else {
+        av_assert0(!"should not reach here.");
     }
 
     ctx->input.width    = inlink->w;
@@ -203,28 +229,49 @@ static int config_output(AVFilterLink *outlink)
 
 static int copy_from_frame_to_dnn(DNNData *dnn_input, const AVFrame *frame)
 {
-    // extend this function to support more formats
-    av_assert0(frame->format == AV_PIX_FMT_RGB24 || frame->format == AV_PIX_FMT_BGR24);
-
-    if (dnn_input->dt == DNN_FLOAT) {
-        float *dnn_input_data = dnn_input->data;
-        for (int i = 0; i < frame->height; i++) {
-            for(int j = 0; j < frame->width * 3; j++) {
-                int k = i * frame->linesize[0] + j;
-                int t = i * frame->width * 3 + j;
-                dnn_input_data[t] = frame->data[0][k] / 255.0f;
+    if (frame->format == AV_PIX_FMT_RGB24 || frame->format == AV_PIX_FMT_BGR24) {
+        if (dnn_input->dt == DNN_FLOAT) {
+            float *dnn_input_data = dnn_input->data;
+            for (int i = 0; i < frame->height; i++) {
+                for(int j = 0; j < frame->width * 3; j++) {
+                    int k = i * frame->linesize[0] + j;
+                    int t = i * frame->width * 3 + j;
+                    dnn_input_data[t] = frame->data[0][k] / 255.0f;
+                }
+            }
+        } else {
+            uint8_t *dnn_input_data = dnn_input->data;
+            av_assert0(dnn_input->dt == DNN_UINT8);
+            for (int i = 0; i < frame->height; i++) {
+                for(int j = 0; j < frame->width * 3; j++) {
+                    int k = i * frame->linesize[0] + j;
+                    int t = i * frame->width * 3 + j;
+                    dnn_input_data[t] = frame->data[0][k];
+                }
             }
         }
-    } else {
+    } else if (frame->format == AV_PIX_FMT_GRAY8) {
         uint8_t *dnn_input_data = dnn_input->data;
         av_assert0(dnn_input->dt == DNN_UINT8);
         for (int i = 0; i < frame->height; i++) {
-            for(int j = 0; j < frame->width * 3; j++) {
+            for(int j = 0; j < frame->width; j++) {
                 int k = i * frame->linesize[0] + j;
-                int t = i * frame->width * 3 + j;
+                int t = i * frame->width + j;
                 dnn_input_data[t] = frame->data[0][k];
             }
         }
+    } else if (frame->format == AV_PIX_FMT_GRAYF32) {
+        float *dnn_input_data = dnn_input->data;
+        av_assert0(dnn_input->dt == DNN_FLOAT);
+        for (int i = 0; i < frame->height; i++) {
+            for(int j = 0; j < frame->width; j++) {
+                int k = i * frame->linesize[0] + j * sizeof(float);
+                int t = i * frame->width + j;
+                dnn_input_data[t] = *(float*)(frame->data[0] + k);
+            }
+        }
+    } else {
+        av_assert0(!"should not reach here.");
     }
 
     return 0;
@@ -232,28 +279,49 @@ static int copy_from_frame_to_dnn(DNNData *dnn_input, const AVFrame *frame)
 
 static int copy_from_dnn_to_frame(AVFrame *frame, const DNNData *dnn_output)
 {
-    // extend this function to support more formats
-    av_assert0(frame->format == AV_PIX_FMT_RGB24 || frame->format == AV_PIX_FMT_BGR24);
-
-    if (dnn_output->dt == DNN_FLOAT) {
-        float *dnn_output_data = dnn_output->data;
-        for (int i = 0; i < frame->height; i++) {
-            for(int j = 0; j < frame->width * 3; j++) {
-                int k = i * frame->linesize[0] + j;
-                int t = i * frame->width * 3 + j;
-                frame->data[0][k] = av_clip_uintp2((int)(dnn_output_data[t] * 255.0f), 8);
+    if (frame->format == AV_PIX_FMT_RGB24 || frame->format == AV_PIX_FMT_BGR24) {
+        if (dnn_output->dt == DNN_FLOAT) {
+            float *dnn_output_data = dnn_output->data;
+            for (int i = 0; i < frame->height; i++) {
+                for(int j = 0; j < frame->width * 3; j++) {
+                    int k = i * frame->linesize[0] + j;
+                    int t = i * frame->width * 3 + j;
+                    frame->data[0][k] = av_clip_uintp2((int)(dnn_output_data[t] * 255.0f), 8);
+                }
+            }
+        } else {
+            uint8_t *dnn_output_data = dnn_output->data;
+            av_assert0(dnn_output->dt == DNN_UINT8);
+            for (int i = 0; i < frame->height; i++) {
+                for(int j = 0; j < frame->width * 3; j++) {
+                    int k = i * frame->linesize[0] + j;
+                    int t = i * frame->width * 3 + j;
+                    frame->data[0][k] = dnn_output_data[t];
+                }
             }
         }
-    } else {
+    } else if (frame->format == AV_PIX_FMT_GRAY8) {
         uint8_t *dnn_output_data = dnn_output->data;
         av_assert0(dnn_output->dt == DNN_UINT8);
         for (int i = 0; i < frame->height; i++) {
-            for(int j = 0; j < frame->width * 3; j++) {
+            for(int j = 0; j < frame->width; j++) {
                 int k = i * frame->linesize[0] + j;
-                int t = i * frame->width * 3 + j;
+                int t = i * frame->width + j;
                 frame->data[0][k] = dnn_output_data[t];
             }
         }
+    } else if (frame->format == AV_PIX_FMT_GRAYF32) {
+        float *dnn_output_data = dnn_output->data;
+        av_assert0(dnn_output->dt == DNN_FLOAT);
+        for (int i = 0; i < frame->height; i++) {
+            for(int j = 0; j < frame->width; j++) {
+                int k = i * frame->linesize[0] + j * sizeof(float);
+                int t = i * frame->width + j;
+                *(float*)(frame->data[0] + k) = dnn_output_data[t];
+            }
+        }
+    } else {
+        av_assert0(!"should not reach here.");
     }
 
     return 0;
@@ -275,7 +343,14 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         av_frame_free(&in);
         return AVERROR(EIO);
     }
-    av_assert0(ctx->output.channels == 3);
+
+    if (ctx->fmt == AV_PIX_FMT_RGB24 || ctx->fmt == AV_PIX_FMT_BGR24) {
+        av_assert0(ctx->output.channels == 3);
+    } else if (ctx->fmt == AV_PIX_FMT_GRAY8 || ctx->fmt == AV_PIX_FMT_GRAYF32) {
+        av_assert0(ctx->output.channels == 1);
+    } else {
+        av_assert0(!"should not reach here");
+    }
 
     out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
     if (!out) {
-- 
2.7.4