[FFmpeg-devel] [PATCH] essim patch

Tue Dec 10 14:27:03 EET 2024

Hi Andrea

I havnt really followed your gsoc work so in case some of my comments
below differs from your mentor(s), please point that out to me.

also maybe your mentors can do a full review (in CC) as it seems
noone else has reviewed your patch.

thx

On Thu, Dec 05, 2024 at 06:59:29PM +0100, AndreaMastroberti wrote:
[...]> diff --git a/libavfilter/version.h b/libavfilter/version.h
> index f84dec4805..0050874108 100644
> --- a/libavfilter/version.h
> +++ b/libavfilter/version.h
> @@ -31,8 +31,8 @@
>
>  #include "version_major.h"
>
> -#define LIBAVFILTER_VERSION_MINOR   6
> -#define LIBAVFILTER_VERSION_MICRO 101
> +#define LIBAVFILTER_VERSION_MINOR   7
> +#define LIBAVFILTER_VERSION_MICRO 100
>
>
>  #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
> diff --git a/libavfilter/vf_ssim.c b/libavfilter/vf_ssim.c
> index 52b22a6870..53b9af4d26 100644
> --- a/libavfilter/vf_ssim.c
> +++ b/libavfilter/vf_ssim.c
> @@ -44,6 +44,7 @@
>  #include "filters.h"
>  #include "framesync.h"
>  #include "ssim.h"
> +#include <math.h>
>
>  typedef struct SSIMContext {
>      const AVClass *class;
> @@ -63,8 +64,15 @@ typedef struct SSIMContext {
>      int **temp;
>      int is_rgb;
>      double **score;
> +    uint64_t **i1[4], **i2[4], **s1[4], **s2[4], **i12[4];
> +    int int_img;
> +    int win_size;
> +    int stride;
> +    PoolMethod pool;
>      int (*ssim_plane)(AVFilterContext *ctx, void *arg,
>                        int jobnr, int nb_jobs);
> +    int (*ssim_plane_int)(AVFilterContext *ctx, void *arg,
> +                      int jobnr, int nb_jobs);
>      SSIMDSPContext dsp;
>  } SSIMContext;
>

> @@ -74,6 +82,13 @@ typedef struct SSIMContext {
>  static const AVOption ssim_options[] = {
>      {"stats_file", "Set file where to store per-frame difference information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
>      {"f",          "Set file where to store per-frame difference information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
> +    { "int_img", "Compute SSIM using integral images", OFFSET(int_img), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
> +    { "window", "Window size", OFFSET(win_size), AV_OPT_TYPE_INT, { .i64 = 8 }, 8, 16, FLAGS },
> +    { "stride", "Stride length", OFFSET(stride), AV_OPT_TYPE_INT, { .i64 = 4 }, 4, 8, FLAGS },
> +    { "pool", "Pooling method", OFFSET(pool), AV_OPT_TYPE_INT, {.i64 = AVG}, 0, 2, FLAGS, "pool" },
> +    { "avg",   "Average pooling",     0, AV_OPT_TYPE_CONST, {.i64 = AVG}, 0, 0, FLAGS, "pool" },
> +    { "mink3", "Minkowski norm 3",         0, AV_OPT_TYPE_CONST, {.i64 = MINK_3},   0, 0, FLAGS, "pool" },
> +    { "mink4", "Minkowski norm 4",         0, AV_OPT_TYPE_CONST, {.i64 = MINK_4},   0, 0, FLAGS, "pool" },
>      { NULL }
>  };
>  

These can be vertically aligned like:

> +    { "avg",   "Average pooling",          0, AV_OPT_TYPE_CONST, {.i64 =    AVG},   0, 0, FLAGS, "pool" },
> +    { "mink3", "Minkowski norm 3",         0, AV_OPT_TYPE_CONST, {.i64 = MINK_3},   0, 0, FLAGS, "pool" },
> +    { "mink4", "Minkowski norm 4",         0, AV_OPT_TYPE_CONST, {.i64 = MINK_4},   0, 0, FLAGS, "pool" },

> @@ -175,6 +190,28 @@ static float ssim_end1x(int64_t s1, int64_t s2, int64_t ss, int64_t s12, int max
>           / ((float)(fs1 * fs1 + fs2 * fs2 + ssim_c1) * (float)(vars + ssim_c2));
>  }
>  

> +static float ssim_end1w(int s1, int s2, int ss, int s12, int win_size)
> +{
> +    double ws = (double)win_size * win_size;
> +    double ssim_c1 = 0.01 * 0.01 * 255 * 255 * ws;
> +    double ssim_c2 = 0.03 * 0.03 * 255 * 255 * ws * (ws - 1);
> +
> +    double fs1 = (double)s1;
> +    double fs2 = (double)s2;
> +    double fss = (double)ss;
> +    double fs12 = (double)s12;
> +
> +    double vars = fss * ws - fs1 * fs1 - fs2 * fs2;
> +    double covar = fs12 * ws - fs1 * fs2;

this can be calculated in int or int64 avoiding rounding issues with floats

> +
> +    double num = (2 * fs1 * fs2 + ssim_c1) * (2 * covar + ssim_c2);

if you multiply
0.01 * 0.01 * 255 * 255 * ws
by 50 then its an integer and you could avoid floats (assuming ws is 8 or 16)
before the num / den

not sure how usefull it is to avoid float/double here but as the input
is integers it seemed an idea that may be interresting
this way the numbers could be kept exact until num / den below i think

[...]
> +static int ssim_plane_int(AVFilterContext *ctx, void *arg,
> +                      int jobnr, int nb_jobs)
> +{
> +    SSIMContext *s = ctx->priv;
> +    ThreadData *td = arg;
> +    double *score = td->score[jobnr];
> +    void *temp = td->temp[jobnr];
> +    int stride = s->stride, win_size = s->win_size;
> +    uint64_t *i1, *i2, *s1, *s2, *i12;
> +    int offset;
> +
> +    for (int c = 0; c < td->nb_components; c++) {
> +        int width = td->planewidth[c];
> +        int height = td->planeheight[c];

> +        const int slice_start = ((height/stride) * jobnr) / nb_jobs;
> +        const int slice_end = ((height/stride) * (jobnr+1)) / nb_jobs;

how does this behave if height is not a multiple of stride ?
does this work as intended in that case ?

thx

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

The real ebay dictionary, page 2
"100% positive feedback" - "All either got their money back or didnt complain"
"Best seller ever, very honest" - "Seller refunded buyer after failed scam"
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 195 bytes
Desc: not available
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20241210/120d7784/attachment.sig>