[FFmpeg-devel] [PATCH 4/4] vf_ssim: x86 simd for ssim_4x4xN and ssim_endN.
Paul B Mahol
onemda at gmail.com
Mon Jul 13 00:09:58 CEST 2015
On 7/11/15, Ronald S. Bultje <rsbultje at gmail.com> wrote:
> Both are 2-2.5x faster than their C counterpart.
> ---
> libavfilter/ssim.h | 36 ++++++++
> libavfilter/vf_ssim.c | 26 ++++--
> libavfilter/x86/Makefile | 2 +
> libavfilter/x86/vf_ssim.asm | 190
> +++++++++++++++++++++++++++++++++++++++++
> libavfilter/x86/vf_ssim_init.c | 38 +++++++++
> 5 files changed, 283 insertions(+), 9 deletions(-)
> create mode 100644 libavfilter/ssim.h
> create mode 100644 libavfilter/x86/vf_ssim.asm
> create mode 100644 libavfilter/x86/vf_ssim_init.c
>
> diff --git a/libavfilter/ssim.h b/libavfilter/ssim.h
> new file mode 100644
> index 0000000..cd3a6ee
> --- /dev/null
> +++ b/libavfilter/ssim.h
> @@ -0,0 +1,36 @@
> +/*
> + * Copyright (c) 2015 Ronald S. Bultje <rsbultje at gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA
> + */
> +
> +#ifndef LIBAVFILTER_SSIM_H
> +#define LIBAVFILTER_SSIM_H
> +
> +#include <stddef.h>
> +#include <stdint.h>
> +
> +typedef struct SSIMDSPContext {
> + void (*ssim_4x4_line)(const uint8_t *buf, ptrdiff_t buf_stride,
> + const uint8_t *ref, ptrdiff_t ref_stride,
> + int (*sums)[4], int w);
> + float (*ssim_end_line)(const int (*sum0)[4], const int (*sum1)[4], int
> w);
> +} SSIMDSPContext;
> +
> +void ff_ssim_init_x86(SSIMDSPContext *dsp);
> +
> +#endif /* LIBAVFILTER_SSIM_H */
> diff --git a/libavfilter/vf_ssim.c b/libavfilter/vf_ssim.c
> index f7a259e..b5a61ee 100644
> --- a/libavfilter/vf_ssim.c
> +++ b/libavfilter/vf_ssim.c
> @@ -42,6 +42,7 @@
> #include "drawutils.h"
> #include "formats.h"
> #include "internal.h"
> +#include "ssim.h"
> #include "video.h"
>
> typedef struct SSIMContext {
> @@ -59,6 +60,7 @@ typedef struct SSIMContext {
> int planeheight[4];
> int *temp;
> int is_rgb;
> + SSIMDSPContext dsp;
> } SSIMContext;
>
> #define OFFSET(x) offsetof(SSIMContext, x)
> @@ -85,8 +87,8 @@ static void set_meta(AVDictionary **metadata, const char
> *key, char comp, float
> }
> }
>
> -static void ssim_4x4xn(const uint8_t *main, int main_stride,
> - const uint8_t *ref, int ref_stride,
> +static void ssim_4x4xn(const uint8_t *main, ptrdiff_t main_stride,
> + const uint8_t *ref, ptrdiff_t ref_stride,
> int (*sums)[4], int width)
> {
> int x, y, z;
> @@ -132,7 +134,7 @@ static float ssim_end1(int s1, int s2, int ss, int s12)
> / ((float)(fs1 * fs1 + fs2 * fs2 + ssim_c1) * (float)(vars +
> ssim_c2));
> }
>
> -static float ssim_endn(int (*sum0)[4], int (*sum1)[4], int width)
> +static float ssim_endn(const int (*sum0)[4], const int (*sum1)[4], int
> width)
> {
> float ssim = 0.0;
> int i;
> @@ -145,7 +147,8 @@ static float ssim_endn(int (*sum0)[4], int (*sum1)[4],
> int width)
> return ssim;
> }
>
> -static float ssim_plane(uint8_t *main, int main_stride,
> +static float ssim_plane(SSIMDSPContext *dsp,
> + uint8_t *main, int main_stride,
> uint8_t *ref, int ref_stride,
> int width, int height, void *temp)
> {
> @@ -160,12 +163,12 @@ static float ssim_plane(uint8_t *main, int
> main_stride,
> for (y = 1; y < height; y++) {
> for (; z <= y; z++) {
> FFSWAP(void*, sum0, sum1);
> - ssim_4x4xn(&main[4 * z * main_stride], main_stride,
> - &ref[4 * z * ref_stride], ref_stride,
> - sum0, width);
> + dsp->ssim_4x4_line(&main[4 * z * main_stride], main_stride,
> + &ref[4 * z * ref_stride], ref_stride,
> + sum0, width);
> }
>
> - ssim += ssim_endn(sum0, sum1, width - 1);
> + ssim += dsp->ssim_end_line(sum0, sum1, width - 1);
> }
>
> return ssim / ((height - 1) * (width - 1));
> @@ -187,7 +190,7 @@ static AVFrame *do_ssim(AVFilterContext *ctx, AVFrame
> *main,
> s->nb_frames++;
>
> for (i = 0; i < s->nb_components; i++) {
> - c[i] = ssim_plane(main->data[i], main->linesize[i],
> + c[i] = ssim_plane(&s->dsp, main->data[i], main->linesize[i],
> ref->data[i], ref->linesize[i],
> s->planewidth[i], s->planeheight[i], s->temp);
> ssimv += s->coefs[i] * c[i];
> @@ -294,6 +297,11 @@ static int config_input_ref(AVFilterLink *inlink)
> if (!s->temp)
> return AVERROR(ENOMEM);
>
> + s->dsp.ssim_4x4_line = ssim_4x4xn;
> + s->dsp.ssim_end_line = ssim_endn;
> + if (ARCH_X86)
> + ff_ssim_init_x86(&s->dsp);
> +
> return 0;
> }
>
> diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
> index 89d3ca1..230e879 100644
> --- a/libavfilter/x86/Makefile
> +++ b/libavfilter/x86/Makefile
> @@ -9,6 +9,7 @@ OBJS-$(CONFIG_PP7_FILTER) +=
> x86/vf_pp7_init.o
> OBJS-$(CONFIG_PSNR_FILTER) += x86/vf_psnr_init.o
> OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup_init.o
> OBJS-$(CONFIG_SPP_FILTER) += x86/vf_spp.o
> +OBJS-$(CONFIG_SSIM_FILTER) += x86/vf_ssim_init.o
> OBJS-$(CONFIG_TINTERLACE_FILTER) += x86/vf_tinterlace_init.o
> OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o
> OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif_init.o
> @@ -21,6 +22,7 @@ YASM-OBJS-$(CONFIG_INTERLACE_FILTER) +=
> x86/vf_interlace.o
> YASM-OBJS-$(CONFIG_PP7_FILTER) += x86/vf_pp7.o
> YASM-OBJS-$(CONFIG_PSNR_FILTER) += x86/vf_psnr.o
> YASM-OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup.o
> +YASM-OBJS-$(CONFIG_SSIM_FILTER) += x86/vf_ssim.o
> YASM-OBJS-$(CONFIG_TINTERLACE_FILTER) += x86/vf_interlace.o
> YASM-OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume.o
> YASM-OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif.o
> x86/yadif-16.o x86/yadif-10.o
> diff --git a/libavfilter/x86/vf_ssim.asm b/libavfilter/x86/vf_ssim.asm
> new file mode 100644
> index 0000000..55bb645
> --- /dev/null
> +++ b/libavfilter/x86/vf_ssim.asm
> @@ -0,0 +1,190 @@
> +;*****************************************************************************
> +;* x86-optimized functions for interlace filter
Besides this above. patch lgtm. Unless someone have to comment to asm part.
More information about the ffmpeg-devel
mailing list