[FFmpeg-devel] [PATCH] avfilter: Added siti filter

Tue Feb 22 13:26:31 EET 2022

Am 15.02.22 um 09:54 schrieb Anton Khirnov:
> Quoting Thilo Borgmann (2022-02-12 11:55:39)
>> Am 31.01.22 um 12:55 schrieb James Almer:
>> +static int config_input(AVFilterLink *inlink)
>> +{
>> +    // Video input data avilable
>> +    AVFilterContext *ctx = inlink->dst;
>> +    SiTiContext *s = ctx->priv;
>> +    int max_pixsteps[4];
>> +    size_t pixel_sz;
>> +    size_t data_sz;
>> +    size_t gradient_sz;
>> +    size_t motion_sz;
>> +
>> +    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
>> +    av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
>> +
>> +    s->pixel_depth = max_pixsteps[0];
>> +    s->width = inlink->w;
>> +    s->height = inlink->h;
>> +    pixel_sz = s->pixel_depth == 1 ? sizeof(uint8_t) : sizeof(uint16_t);
>> +    data_sz = s->width * pixel_sz * s->height;
>> +
>> +    s->prev_frame = av_malloc(data_sz);
>> +
>> +    gradient_sz = (s->width - 2) * sizeof(double) * (s->height - 2);
>> +    s->gradient_matrix = (double*)av_malloc(gradient_sz);
>> +
>> +    motion_sz = s->width * sizeof(double) * s->height;
>> +    s->motion_matrix = (double*)av_malloc(motion_sz);
> 
> useless casts
> 
>> +
>> +    if (!s->prev_frame || ! s->gradient_matrix || !s->motion_matrix) {
>> +        av_freep(&s->prev_frame);
>> +        av_freep(&s->gradient_matrix);
>> +        av_freep(&s->motion_matrix);
> 
> You don't need to free them on failure, that will be done in uninit. But
> you should free them at the beginning of this function, because
> config_input can be called multiple times.

Always freep'd and fail on alloc error.


>> +// Applies sobel convolution
>> +static void convolve_sobel(SiTiContext *s, const uint8_t *src, double *dst, int linesize)
>> +{
>> +    double x_conv_sum;
>> +    double y_conv_sum;
>> +    double gradient;
>> +    int ki;
>> +    int kj;
>> +    int index;
>> +    uint16_t data;
>> +    int filter_width = 3;
>> +    int filter_size = filter_width * filter_width;
>> +    int stride = linesize / s->pixel_depth;
>> +
>> +    // Dst matrix is smaller than src since we ignore edges that can't be convolved
>> +    #define CONVOLVE(bps)                                           \
>> +    {                                                               \
>> +        uint##bps##_t *vsrc = (uint##bps##_t*)src;                  \
>> +        for (int j = 1; j < s->height - 1; j++) {                   \
>> +            for (int i = 1; i < s->width - 1; i++) {                \
>> +                x_conv_sum = 0.0;                                   \
>> +                y_conv_sum = 0.0;                                   \
>> +                for (int k = 0; k < filter_size; k++) {             \
>> +                    ki = k % filter_width - 1;                      \
>> +                    kj = floor(k / filter_width) - 1;               \
>> +                    index = (j + kj) * stride + (i + ki);           \
>> +                    data = convert_full_range(s, vsrc[index]);      \
> 
> Pass bps as a parameter to convert_full_range() instead of accessing
> s->pixel_depth, so the compiler can optimize the branch away.

I am not sure if the changes I did here suit the optimization you had in mind... pls check if v4 does this right.


> 
>> +// Calculate pixel difference between current and previous frame, and update previous
>> +static void calculate_motion(SiTiContext *s, const uint8_t *curr,
>> +                             double *motion_matrix, int linesize)
>> +{
>> +    int stride = linesize / s->pixel_depth;
>> +    double motion;
>> +    int curr_index;
>> +    int prev_index;
>> +    uint16_t curr_data;
>> +
>> +    // Previous frame is already converted to full range
>> +    #define CALCULATE(bps)                                           \
>> +    {                                                                \
>> +        uint##bps##_t *vsrc = (uint##bps##_t*)curr;                  \
>> +        for (int j = 0; j < s->height; j++) {                        \
>> +            for (int i = 0; i < s->width; i++) {                     \
>> +                motion = 0;                                          \
>> +                curr_index = j * stride + i;                         \
>> +                prev_index = j * s->width + i;                       \
>> +                curr_data = convert_full_range(s, vsrc[curr_index]); \
>> +                if (s->nb_frames > 1)                                \
>> +                    motion = curr_data - s->prev_frame[prev_index];  \
>> +                s->prev_frame[prev_index] = curr_data;               \
> 
> previous code accessed this as uint8_t or uint16_t based on bps
> 

Fixed in v4. Attached.

Thanks,
Thilo
-------------- next part --------------
From 8f2c410dbfc7e8651c6654ce57efe144dba62592 Mon Sep 17 00:00:00 2001
From: Boris Baracaldo <borbarak at fb.com>
Date: Tue, 22 Feb 2022 12:23:07 +0100
Subject: [PATCH v4] lavfilter: Add SITI filter

Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
in ITU-T P.910: Subjective video quality assessment methods for multimedia
applications.
---
 Changelog                |   1 +
 doc/filters.texi         |  23 +++
 libavfilter/Makefile     |   1 +
 libavfilter/allfilters.c |   1 +
 libavfilter/version.h    |   2 +-
 libavfilter/vf_siti.c    | 348 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 375 insertions(+), 1 deletion(-)
 create mode 100644 libavfilter/vf_siti.c

diff --git a/Changelog b/Changelog
index 3dde3326be..dcb2c368d2 100644
--- a/Changelog
+++ b/Changelog
@@ -132,6 +132,7 @@ version 4.4:
 - msad video filter
 - gophers protocol
 - RIST protocol via librist
+- siti filter
 
 
 version 4.3:
diff --git a/doc/filters.texi b/doc/filters.texi
index 05d4b1a56e..f8a8a2cb72 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -19875,6 +19875,29 @@ ffmpeg -i input1.mkv -i input2.mkv -filter_complex "[0:v][1:v] signature=nb_inpu
 
 @end itemize
 
+ at anchor{siti}
+ at section siti
+
+Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined
+in ITU-T P.910: Subjective video quality assessment methods for multimedia
+applications. Available PDF at @url{https://www.itu.int/rec/T-REC-P.910-199909-S/en }.
+
+It accepts the following option:
+
+ at table @option
+ at item print_summary
+If set to 1, Summary statistics will be printed to the console. Default 0.
+ at end table
+
+ at subsection Examples
+ at itemize
+ at item
+To calculate SI/TI metrics and print summary:
+ at example
+ffmpeg -i input.mp4 -vf siti=print_summary=1 -f null -
+ at end example
+ at end itemize
+
 @anchor{smartblur}
 @section smartblur
 
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 1adbea75bd..3261d05311 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -454,6 +454,7 @@ OBJS-$(CONFIG_SMARTBLUR_FILTER)              += vf_smartblur.o
 OBJS-$(CONFIG_SOBEL_FILTER)                  += vf_convolution.o
 OBJS-$(CONFIG_SOBEL_OPENCL_FILTER)           += vf_convolution_opencl.o opencl.o \
                                                 opencl/convolution.o
+OBJS-$(CONFIG_SITI_FILTER)                   += vf_siti.o
 OBJS-$(CONFIG_SPLIT_FILTER)                  += split.o
 OBJS-$(CONFIG_SPP_FILTER)                    += vf_spp.o qp_table.o
 OBJS-$(CONFIG_SR_FILTER)                     += vf_sr.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 4325a3e557..808c172b28 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -430,6 +430,7 @@ extern const AVFilter ff_vf_shuffleplanes;
 extern const AVFilter ff_vf_sidedata;
 extern const AVFilter ff_vf_signalstats;
 extern const AVFilter ff_vf_signature;
+extern const AVFilter ff_vf_siti;
 extern const AVFilter ff_vf_smartblur;
 extern const AVFilter ff_vf_sobel;
 extern const AVFilter ff_vf_sobel_opencl;
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 1a9849ef82..89714bce84 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -30,7 +30,7 @@
 #include "libavutil/version.h"
 
 #define LIBAVFILTER_VERSION_MAJOR   8
-#define LIBAVFILTER_VERSION_MINOR  25
+#define LIBAVFILTER_VERSION_MINOR  26
 #define LIBAVFILTER_VERSION_MICRO 100
 
 
diff --git a/libavfilter/vf_siti.c b/libavfilter/vf_siti.c
new file mode 100644
index 0000000000..bae6ae3a24
--- /dev/null
+++ b/libavfilter/vf_siti.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 2021 Boris Baracaldo
+ * Copyright (c) 2022 Thilo Borgmann 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/**
+ * @file
+ * Calculate Spatial Info (SI) and Temporal Info (TI) scores
+ */
+
+#include <math.h>
+
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/opt.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+static const int X_FILTER[9] = {
+    1, 0, -1,
+    2, 0, -2,
+    1, 0, -1
+};
+
+static const int Y_FILTER[9] = {
+    1, 2, 1,
+    0, 0, 0,
+    -1, -2, -1
+};
+
+typedef struct SiTiContext {
+    const AVClass *class;
+    int pixel_depth;
+    int width, height;
+    uint64_t nb_frames;
+    uint8_t *prev_frame;
+    float max_si;
+    float max_ti;
+    float min_si;
+    float min_ti;
+    float sum_si;
+    float sum_ti;
+    float *gradient_matrix;
+    float *motion_matrix;
+    int full_range;
+    int print_summary;
+} SiTiContext;
+
+static const enum AVPixelFormat pix_fmts[] = {
+    AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+    AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+    AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10,
+    AV_PIX_FMT_NONE
+};
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    // User options but no input data
+    SiTiContext *s = ctx->priv;
+    s->max_si = 0;
+    s->max_ti = 0;
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    SiTiContext *s = ctx->priv;
+
+    if (s->print_summary) {
+        float avg_si = s->sum_si / s->nb_frames;
+        float avg_ti = s->sum_ti / s->nb_frames;
+        av_log(ctx, AV_LOG_INFO,
+               "SITI Summary:\nTotal frames: %"PRId64"\n\n"
+               "Spatial Information:\nAverage: %f\nMax: %f\nMin: %f\n\n"
+               "Temporal Information:\nAverage: %f\nMax: %f\nMin: %f\n",
+               s->nb_frames, avg_si, s->max_si, s->min_si, avg_ti, s->max_ti, s->min_ti
+        );
+    }
+
+    av_freep(&s->prev_frame);
+    av_freep(&s->gradient_matrix);
+    av_freep(&s->motion_matrix);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    // Video input data avilable
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+    int max_pixsteps[4];
+    size_t pixel_sz;
+    size_t data_sz;
+    size_t gradient_sz;
+    size_t motion_sz;
+
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
+
+    // free previous buffers in case they are allocated already
+    av_freep(&s->prev_frame);
+    av_freep(&s->gradient_matrix);
+    av_freep(&s->motion_matrix);
+
+    s->pixel_depth = max_pixsteps[0];
+    s->width = inlink->w;
+    s->height = inlink->h;
+    pixel_sz = s->pixel_depth == 1 ? sizeof(uint8_t) : sizeof(uint16_t);
+    data_sz = s->width * pixel_sz * s->height;
+
+    s->prev_frame = av_malloc(data_sz);
+
+    gradient_sz = (s->width - 2) * sizeof(float) * (s->height - 2);
+    s->gradient_matrix = (float*)av_malloc(gradient_sz);
+
+    motion_sz = s->width * sizeof(float) * s->height;
+    s->motion_matrix = (float*)av_malloc(motion_sz);
+
+    if (!s->prev_frame || ! s->gradient_matrix || !s->motion_matrix) {
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+// Determine whether the video is in full or limited range. If not defined, assume limited.
+static int is_full_range(AVFrame* frame)
+{
+    // If color range not specified, fallback to pixel format
+    if (frame->color_range == AVCOL_RANGE_UNSPECIFIED || frame->color_range == AVCOL_RANGE_NB)
+        return frame->format == AV_PIX_FMT_YUVJ420P || frame->format == AV_PIX_FMT_YUVJ422P;
+    return frame->color_range == AVCOL_RANGE_JPEG;
+}
+
+// Check frame's color range and convert to full range if needed
+static uint16_t convert_full_range(int factor, uint16_t y)
+{
+    int shift;
+    int limit_upper;
+    int full_upper;
+    int limit_y;
+
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    shift = 16 * factor;
+    limit_upper = 235 * factor - shift;
+    full_upper = 256 * factor - 1;
+    limit_y = fmin(fmax(y - shift, 0), limit_upper);
+    return (full_upper * limit_y / limit_upper);
+}
+
+// Applies sobel convolution
+static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int linesize)
+{
+    float x_conv_sum;
+    float y_conv_sum;
+    float gradient;
+    int ki;
+    int kj;
+    int index;
+    uint16_t data;
+    int filter_width = 3;
+    int filter_size = filter_width * filter_width;
+    int stride = linesize / s->pixel_depth;
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    int factor = s->pixel_depth == 1 ? 1 : 4;
+
+    // Dst matrix is smaller than src since we ignore edges that can't be convolved
+    #define CONVOLVE(bps)                                           \
+    {                                                               \
+        uint##bps##_t *vsrc = (uint##bps##_t*)src;                  \
+        for (int j = 1; j < s->height - 1; j++) {                   \
+            for (int i = 1; i < s->width - 1; i++) {                \
+                x_conv_sum = 0.0;                                   \
+                y_conv_sum = 0.0;                                   \
+                for (int k = 0; k < filter_size; k++) {             \
+                    ki = k % filter_width - 1;                      \
+                    kj = floor(k / filter_width) - 1;               \
+                    index = (j + kj) * stride + (i + ki);           \
+                    data = s->full_range ? vsrc[index] : convert_full_range(factor, vsrc[index]); \
+                    x_conv_sum += data * X_FILTER[k];               \
+                    y_conv_sum += data * Y_FILTER[k];               \
+                }                                                   \
+                gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum); \
+                dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient; \
+            }                                                       \
+        }                                                           \
+    }
+
+    if (s->pixel_depth == 2) {
+        CONVOLVE(16);
+    } else {
+        CONVOLVE(8);
+    }
+}
+
+// Calculate pixel difference between current and previous frame, and update previous
+static void calculate_motion(SiTiContext *s, const uint8_t *curr,
+                             float *motion_matrix, int linesize)
+{
+    int stride = linesize / s->pixel_depth;
+    float motion;
+    int curr_index;
+    int prev_index;
+    uint16_t curr_data;
+    // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
+    int factor = s->pixel_depth == 1 ? 1 : 4;
+
+    // Previous frame is already converted to full range
+    #define CALCULATE(bps)                                           \
+    {                                                                \
+        uint##bps##_t *vsrc = (uint##bps##_t*)curr;                  \
+        uint##bps##_t *vdst = (uint##bps##_t*)s->prev_frame;         \
+        for (int j = 0; j < s->height; j++) {                        \
+            for (int i = 0; i < s->width; i++) {                     \
+                motion = 0;                                          \
+                curr_index = j * stride + i;                         \
+                prev_index = j * s->width + i;                       \
+                curr_data = s->full_range ? vsrc[curr_index] : convert_full_range(factor, vsrc[curr_index]); \
+                if (s->nb_frames > 1)                                \
+                    motion = curr_data - vdst[prev_index];           \
+                vdst[prev_index] = curr_data;                        \
+                motion_matrix[j * s->width + i] = motion;            \
+            }                                                        \
+        }                                                            \
+    }
+
+    if (s->pixel_depth == 2) {
+        CALCULATE(16);
+    } else {
+        CALCULATE(8);
+    }
+}
+
+static float std_deviation(float *img_metrics, int width, int height)
+{
+    int size = height * width;
+    float mean = 0.0;
+    float sqr_diff = 0;
+
+    for (int j = 0; j < height; j++)
+        for (int i = 0; i < width; i++)
+            mean += img_metrics[j * width + i];
+
+    mean /= size;
+
+    for (int j = 0; j < height; j++) {
+        for (int i = 0; i < width; i++) {
+            float mean_diff = img_metrics[j * width + i] - mean;
+            sqr_diff += (mean_diff * mean_diff);
+        }
+    }
+    sqr_diff = sqr_diff / size;
+    return sqrt(sqr_diff);
+}
+
+static void set_meta(AVDictionary **metadata, const char *key, float d)
+{
+    char value[128];
+    snprintf(value, sizeof(value), "%0.2f", d);
+    av_dict_set(metadata, key, value, 0);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
+{
+    AVFilterContext *ctx = inlink->dst;
+    SiTiContext *s = ctx->priv;
+    float si;
+    float ti;
+
+    s->full_range = is_full_range(frame);
+    s->nb_frames++;
+
+    // Calculate si and ti
+    convolve_sobel(s, frame->data[0], s->gradient_matrix, frame->linesize[0]);
+    calculate_motion(s, frame->data[0], s->motion_matrix, frame->linesize[0]);
+    si = std_deviation(s->gradient_matrix, s->width - 2, s->height - 2);
+    ti = std_deviation(s->motion_matrix, s->width, s->height);
+
+    // Calculate statistics
+    s->max_si  = fmax(si, s->max_si);
+    s->max_ti  = fmax(ti, s->max_ti);
+    s->sum_si += si;
+    s->sum_ti += ti;
+    s->min_si  = s->nb_frames == 1 ? si : fmin(si, s->min_si);
+    s->min_ti  = s->nb_frames == 1 ? ti : fmin(ti, s->min_ti);
+
+    // Set si ti information in frame metadata
+    set_meta(&frame->metadata, "lavfi.siti.si", si);
+    set_meta(&frame->metadata, "lavfi.siti.ti", ti);
+
+    return ff_filter_frame(inlink->dst->outputs[0], frame);
+}
+
+#define OFFSET(x) offsetof(SiTiContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption siti_options[] = {
+    { "print_summary", "Print summary showing average values", OFFSET(print_summary), AV_OPT_TYPE_BOOL, { .i64=0 }, 0, 1, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(siti);
+
+static const AVFilterPad avfilter_vf_siti_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+};
+
+static const AVFilterPad avfilter_vf_siti_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO
+    },
+};
+
+AVFilter ff_vf_siti = {
+    .name          = "siti",
+    .description   = NULL_IF_CONFIG_SMALL("Calculate spatial information (SI) and temporal information (TI)."),
+    .priv_size     = sizeof(SiTiContext),
+    .priv_class    = &siti_class,
+    .init          = init,
+    .uninit        = uninit,
+    FILTER_PIXFMTS_ARRAY(pix_fmts),
+    FILTER_INPUTS(avfilter_vf_siti_inputs),
+    FILTER_OUTPUTS(avfilter_vf_siti_outputs),
+};
-- 
2.20.1 (Apple Git-117)