[FFmpeg-devel] [PATCH] Added function to obtain MSE of two frames and a filter to calculate the PSNR

Fri Jun 3 17:48:52 CEST 2011

On date Friday 2011-06-03 15:44:32 +0200, Roger Pau Monné encoded:
> Modified libavutil in order to add a new function to calculate the MSE
> and created a new filter (vf_psnr) that calculates the average PSNR of
> two input video files.
> 
> libavutil: added a new function called av_images_mse to calculate the
> mean squared error (MSE) between two images.
> libavfilter: created a new filter that obtains the average peak
> signal-to-noise ratio (PSNR) of two input video files, this filter
> makes use of the new mse fnction added to libavutil.

please create a patch for each separate change

> From 30708f99df841dc2abc7571d08019b9f7f79eb42 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau at entel.upc.edu>
> Date: Fri, 3 Jun 2011 15:31:31 +0200
> Subject: [PATCH 4/5] libavutil: added a new function called av_images_mse to calculate the mean squared error (MSE) between two images.
>  libavfilter: created a new filter that obtains the average peak signal-to-noise ratio (PSNR) of two input video files, this filter makes use of the new mse fnction added to libavutil.
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
> 
> 
> Signed-off-by: Roger Pau Monn?? <roger.pau at entel.upc.edu>
> ---
>  libavfilter/vf_psnr.c |  175 +++++++++++++++++++++++++++++++++++++++++++++++++
>  libavutil/imgutils.c  |   28 ++++++++
>  libavutil/imgutils.h  |   14 ++++
>  3 files changed, 217 insertions(+), 0 deletions(-)
>  create mode 100644 libavfilter/vf_psnr.c
> 
> diff --git a/libavfilter/vf_psnr.c b/libavfilter/vf_psnr.c
> new file mode 100644
> index 0000000..de6ebad
> --- /dev/null
> +++ b/libavfilter/vf_psnr.c
> @@ -0,0 +1,175 @@
> +/*
> + * Copyright (c) 2011 Roger Pau Monn?? <roger.pau at entel.upc.edu>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * Caculate the PSNR between two input videos
> + * Based on the overlay filter
> + */
> +
> +#include "libavutil/imgutils.h"
> +#include "libavcodec/avcodec.h"
> +#include "avfilter.h"
> +

> +#define MAIN    0
> +#define OVERLAY 1

I don't think these are necessary, you can simply use 0 and 1.

> +
> +typedef struct {
> +    AVFilterBufferRef *overpicref;

overpicref -> the "over" is confusing since this is not the overlay
filter, picref should be fine (or something more explicative)

> +    double mse, min_mse, max_mse;

> +    int num_frames;

nit: nb_frames for overall consistency

> +    int hsub, vsub;
> +} PSNRContext;
> +
> +static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque)
> +{
> +    PSNRContext *psnr_context = ctx->priv;
> +
> +    psnr_context->mse = psnr_context->num_frames = 0;
> +    psnr_context->min_mse = psnr_context->max_mse = -1.0;
> +    psnr_context->overpicref = NULL;
> +
> +    return 0;
> +}
> +
> +static av_cold void uninit(AVFilterContext *ctx)
> +{
> +    PSNRContext *psnr_context = ctx->priv;
> +
> +    av_log(ctx, AV_LOG_INFO, "Average PSNR = %0.2fdB (Frame min: %0.2fdB / Frame max: %0.2fdB)\n", 10.0*log((pow(255*3, 2))/(psnr_context->mse/psnr_context->num_frames))/log(10.0), 10.0*log((pow(255*3, 2))/(psnr_context->max_mse))/log(10.0), 10.0*log((pow(255*3, 2))/(psnr_context->min_mse))/log(10.0));

please split this line, also maybe we can conceive a more
grep-friendly way to print the info, e.g.:

average:%0.2fdB min:%0.2fdB max:%0.2fdB

> +
> +    if(psnr_context->overpicref)
> +        avfilter_unref_buffer(psnr_context->overpicref);

psnr_context->overpicref = NULL may save some headache

> +}
> +
> +static int config_input_overlay(AVFilterLink *inlink)
                          ^^^^^^^^

misleading

> +{
> +    AVFilterContext *ctx  = inlink->dst;
> +    PSNRContext *psnr_context = ctx->priv;
> +

> +    avcodec_get_chroma_sub_sample(inlink->format, &psnr_context->hsub, &psnr_context->vsub);

please directly access av_pix_fmt_descriptors, this is adding a
dependency on libavcodec.

> +
> +    if(ctx->inputs[MAIN]->w != ctx->inputs[OVERLAY]->w || ctx->inputs[MAIN]->h != ctx->inputs[OVERLAY]->h) {
> +        av_log(ctx, AV_LOG_ERROR, "Width and/or heigth of input videos are different, could not calculate PSNR");
> +        return AVERROR(EINVAL);
> +    }
> +    return 0;
> +}
> +
> +static int config_output(AVFilterLink *outlink)
> +{
> +    AVFilterContext *ctx = outlink->src;
> +
> +    outlink->time_base = outlink->src->inputs[0]->time_base;
> +    outlink->w = ctx->inputs[MAIN]->w;
> +    outlink->h = ctx->inputs[MAIN]->h;
> +    return 0;
> +}
> +
> +static void start_frame(AVFilterLink *inlink, AVFilterBufferRef *inpicref)
> +{
> +    AVFilterBufferRef *outpicref = avfilter_ref_buffer(inpicref, ~0);
> +    AVFilterContext *ctx = inlink->dst;
> +    PSNRContext *psnr_context = ctx->priv;
> +
> +    inlink->dst->outputs[0]->out_buf = outpicref;
> +    outpicref->pts = av_rescale_q(inpicref->pts, ctx->inputs[MAIN]->time_base, ctx->outputs[0]->time_base);
> +    
> +    if(psnr_context->overpicref)
> +    {
> +        avfilter_unref_buffer(psnr_context->overpicref);
> +        psnr_context->overpicref = NULL;
> +    }
> +    avfilter_request_frame(ctx->inputs[OVERLAY]);
> +
> +    avfilter_start_frame(inlink->dst->outputs[0], outpicref);
> +}
> +

> +static void start_frame_overlay(AVFilterLink *inlink, AVFilterBufferRef *inpicref)
                           ^^^^^^^

ditto

> +{
> +    AVFilterContext *ctx = inlink->dst;
> +    PSNRContext *psnr_context = ctx->priv;
> +
> +    psnr_context->overpicref = inpicref;
> +    psnr_context->overpicref->pts = av_rescale_q(inpicref->pts, ctx->inputs[OVERLAY]->time_base, ctx->outputs[0]->time_base);
> +}
> +static void end_frame(AVFilterLink *inlink)
> +{
> +    AVFilterContext *ctx = inlink->dst;
> +    PSNRContext *psnr_context = ctx->priv;
> +    AVFilterLink *outlink = ctx->outputs[0];

> +    AVFilterBufferRef *outpic = outlink->out_buf;
> +    AVFilterBufferRef *overlay = psnr_context->overpicref;

> +    double mse;
> +

> +    if(psnr_context->overpicref)
> +    {

style nits: if_(...)_{

> +        mse = av_images_mse(outpic->data, overlay->data, outpic->linesize, outpic->video->w, outpic->video->h, psnr_context->hsub, psnr_context->vsub);
> +        if(psnr_context->min_mse == -1) {
> +            psnr_context->min_mse = mse;
> +            psnr_context->max_mse = mse;
> +        }
> +        if(psnr_context->min_mse > mse)
> +            psnr_context->min_mse = mse;
> +        if(psnr_context->max_mse < mse)
> +            psnr_context->max_mse = mse;
> +
> +        psnr_context->mse += mse;
> +        psnr_context->num_frames++;
> +    }
> +

> +    avfilter_end_frame(inlink->dst->outputs[0]);
                          ^^^^^^^^^^^^^^^^^^^^^^^

Nit: you can use outlink here.

> +    avfilter_unref_buffer(inlink->cur_buf);
> +}
> +
> +static void null_draw_slice(AVFilterLink *inlink, int y, int h, int slice_dir) { }
> +
> +static void null_end_frame(AVFilterLink *inlink) { }
> +
> +AVFilter avfilter_vf_psnr = {
> +    .name      = "psnr",
> +    .description = NULL_IF_CONFIG_SMALL("Calculates the PSNR given two input files."),
> +
> +    .init      = init,
> +    .uninit    = uninit,
> +
> +    .priv_size = sizeof(PSNRContext),
> +
> +    .inputs    = (AVFilterPad[]) {{ .name            = "main",
> +                                    .type            = AVMEDIA_TYPE_VIDEO,
> +                                    .start_frame     = start_frame,
> +                                    .draw_slice      = null_draw_slice,
> +                                    .end_frame       = end_frame,
> +                                    .min_perms       = AV_PERM_READ,

> +                                    .rej_perms       = AV_PERM_REUSE2|AV_PERM_PRESERVE, },

I believe this can be removed, indeed the input is never modified.

> +                                  { .name            = "overlay",
> +                                    .type            = AVMEDIA_TYPE_VIDEO,
> +                                    .start_frame     = start_frame_overlay,
> +                                    .config_props    = config_input_overlay,
> +                                    .draw_slice      = null_draw_slice,
> +                                    .end_frame       = null_end_frame,
> +                                    .min_perms       = AV_PERM_READ,

> +                                    .rej_perms       = AV_PERM_REUSE2, },

Same here

> +                                  { .name = NULL}},
> +    .outputs   = (AVFilterPad[]) {{ .name            = "default",
> +                                    .type            = AVMEDIA_TYPE_VIDEO,
> +                                    .config_props    = config_output, },
> +                                  { .name = NULL}},
> +};
> diff --git a/libavutil/imgutils.c b/libavutil/imgutils.c
> index 8eefa4d..4d1a732 100644
> --- a/libavutil/imgutils.c
> +++ b/libavutil/imgutils.c
> @@ -266,3 +266,31 @@ void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4],
>          }
>      }
>  }
> +

> +double av_images_mse(uint8_t *src_data[3], uint8_t *coded_data[3], const int linesizes[3], int width, int height, int hsub, int vsub)
> +{
> +    uint8_t *src_y    = src_data[0];
> +    uint8_t *coded_y  = coded_data[0];
> +    uint8_t *src_u    = src_data[1];
> +    uint8_t *coded_u  = coded_data[1];
> +    uint8_t *src_v    = src_data[2];
> +    uint8_t *coded_v  = coded_data[2];
> +    double mse_r = 0.0;
> +
> +    for(int i = 0; i < height; i++) {
> +        for(int j = 0; j < width; j++) {
> +            if(j >= (width >> hsub) || i >= (height >> vsub))
> +                mse_r += pow(src_y[j] - coded_y[j], 2);
> +            else
> +                mse_r += pow(src_y[j] - coded_y[j] + src_u[j] - coded_u[j] + src_v[j] - coded_v[j], 2);
> +        }
> +        src_y   += linesizes[0];
> +        coded_y += linesizes[0];
> +        src_u   += linesizes[1];
> +        coded_u += linesizes[1];
> +        src_v   += linesizes[2];
> +        coded_v += linesizes[2];
> +    }
> +
> +    return mse_r/((double) width*height);

This only works with yuv formats, at least this can be easily extended
to planar non-bitstream and non-paletted formats.

For the signature I suggest:

double av_compute_images_mse(const uint8_t *ref_data[4], const uint8_t *data[4], const int linesizes[4],
                             int width, int height, enum PixelFormat pix_fmt);

(av_get_images_mse() is fine as well)
-- 
FFmpeg = Fancy Fantastic Merciful Powerful Erotic God