[FFmpeg-cvslog] avfilter/vf_framerate: factorize SAD functions which compute SAD for a whole frame
Marton Balint
git at videolan.org
Sun Nov 11 21:35:42 EET 2018
ffmpeg | branch: master | Marton Balint <cus at passwd.hu> | Thu Apr 5 01:37:25 2018 +0200| [6c2a7a8e9a3698f37913d3f24723fbb8fa895798] | committer: Marton Balint
avfilter/vf_framerate: factorize SAD functions which compute SAD for a whole frame
Also add SIMD which works on lines because it is faster then calculating it on
8x8 blocks using pixelutils.
Signed-off-by: Marton Balint <cus at passwd.hu>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6c2a7a8e9a3698f37913d3f24723fbb8fa895798
---
configure | 3 +-
libavfilter/Makefile | 1 +
libavfilter/framerate.h | 4 +--
libavfilter/scene_sad.c | 72 ++++++++++++++++++++++++++++++++++++++
libavfilter/scene_sad.h | 44 ++++++++++++++++++++++++
libavfilter/vf_framerate.c | 61 ++++-----------------------------
libavfilter/x86/Makefile | 4 +++
libavfilter/x86/scene_sad.asm | 74 ++++++++++++++++++++++++++++++++++++++++
libavfilter/x86/scene_sad_init.c | 52 ++++++++++++++++++++++++++++
9 files changed, 257 insertions(+), 58 deletions(-)
diff --git a/configure b/configure
index 00b5d9795e..52a9bd63d7 100755
--- a/configure
+++ b/configure
@@ -2337,6 +2337,7 @@ CONFIG_EXTRA="
rtpdec
rtpenc_chain
rv34dsp
+ scene_sad
sinewin
snappy
srtp
@@ -3400,7 +3401,7 @@ find_rect_filter_deps="avcodec avformat gpl"
firequalizer_filter_deps="avcodec"
firequalizer_filter_select="rdft"
flite_filter_deps="libflite"
-framerate_filter_select="pixelutils"
+framerate_filter_select="scene_sad"
frei0r_filter_deps="frei0r libdl"
frei0r_src_filter_deps="frei0r libdl"
fspp_filter_deps="gpl"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 79a89a1ab1..7c6fc836e5 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -28,6 +28,7 @@ OBJS-$(HAVE_THREADS) += pthread.o
OBJS-$(CONFIG_QSVVPP) += qsvvpp.o
DNN-OBJS-$(CONFIG_LIBTENSORFLOW) += dnn_backend_tf.o
OBJS-$(CONFIG_DNN) += dnn_interface.o dnn_backend_native.o $(DNN-OBJS-yes)
+OBJS-$(CONFIG_SCENE_SAD) += scene_sad.o
# audio filters
OBJS-$(CONFIG_ABENCH_FILTER) += f_bench.o
diff --git a/libavfilter/framerate.h b/libavfilter/framerate.h
index a42d5af68a..8048dfa36a 100644
--- a/libavfilter/framerate.h
+++ b/libavfilter/framerate.h
@@ -19,7 +19,7 @@
#ifndef AVFILTER_FRAMERATE_H
#define AVFILTER_FRAMERATE_H
-#include "libavutil/pixelutils.h"
+#include "scene_sad.h"
#include "avfilter.h"
#define BLEND_FUNC_PARAMS const uint8_t *src1, ptrdiff_t src1_linesize, \
@@ -48,7 +48,7 @@ typedef struct FrameRateContext {
AVRational srce_time_base; ///< timebase of source
AVRational dest_time_base; ///< timebase of destination
- av_pixelutils_sad_fn sad; ///< Sum of the absolute difference function (scene detect only)
+ ff_scene_sad_fn sad; ///< Sum of the absolute difference function (scene detect only)
double prev_mafd; ///< previous MAFD (scene detect only)
int blend_factor_max;
diff --git a/libavfilter/scene_sad.c b/libavfilter/scene_sad.c
new file mode 100644
index 0000000000..fa57a25961
--- /dev/null
+++ b/libavfilter/scene_sad.c
@@ -0,0 +1,72 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Scene SAD funtions
+ */
+
+#include "scene_sad.h"
+
+void ff_scene_sad16_c(SCENE_SAD_PARAMS)
+{
+ uint64_t sad = 0;
+ const uint16_t *src1w = (const uint16_t *)src1;
+ const uint16_t *src2w = (const uint16_t *)src2;
+ int x, y;
+
+ stride1 /= 2;
+ stride2 /= 2;
+
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++)
+ sad += FFABS(src1w[x] - src2w[x]);
+ src1w += stride1;
+ src2w += stride2;
+ }
+ *sum = sad;
+}
+
+void ff_scene_sad_c(SCENE_SAD_PARAMS)
+{
+ uint64_t sad = 0;
+ int x, y;
+
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++)
+ sad += FFABS(src1[x] - src2[x]);
+ src1 += stride1;
+ src2 += stride2;
+ }
+ *sum = sad;
+}
+
+ff_scene_sad_fn ff_scene_sad_get_fn(int depth)
+{
+ ff_scene_sad_fn sad = NULL;
+ if (ARCH_X86)
+ sad = ff_scene_sad_get_fn_x86(depth);
+ if (!sad) {
+ if (depth == 8)
+ sad = ff_scene_sad_c;
+ if (depth == 16)
+ sad = ff_scene_sad16_c;
+ }
+ return sad;
+}
+
diff --git a/libavfilter/scene_sad.h b/libavfilter/scene_sad.h
new file mode 100644
index 0000000000..433c69473d
--- /dev/null
+++ b/libavfilter/scene_sad.h
@@ -0,0 +1,44 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Scene SAD funtions
+ */
+
+#ifndef AVFILTER_SCENE_SAD_H
+#define AVFILTER_SCENE_SAD_H
+
+#include "avfilter.h"
+
+#define SCENE_SAD_PARAMS const uint8_t *src1, ptrdiff_t stride1, \
+ const uint8_t *src2, ptrdiff_t stride2, \
+ ptrdiff_t width, ptrdiff_t height, \
+ uint64_t *sum
+
+typedef void (*ff_scene_sad_fn)(SCENE_SAD_PARAMS);
+
+void ff_scene_sad_c(SCENE_SAD_PARAMS);
+
+void ff_scene_sad16_c(SCENE_SAD_PARAMS);
+
+ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth);
+
+ff_scene_sad_fn ff_scene_sad_get_fn(int depth);
+
+#endif /* AVFILTER_SCENE_SAD_H */
diff --git a/libavfilter/vf_framerate.c b/libavfilter/vf_framerate.c
index fb65381923..06e463e4d7 100644
--- a/libavfilter/vf_framerate.c
+++ b/libavfilter/vf_framerate.c
@@ -33,13 +33,13 @@
#include "libavutil/internal.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
-#include "libavutil/pixelutils.h"
#include "avfilter.h"
#include "internal.h"
#include "video.h"
#include "filters.h"
#include "framerate.h"
+#include "scene_sad.h"
#define OFFSET(x) offsetof(FrameRateContext, x)
#define V AV_OPT_FLAG_VIDEO_PARAM
@@ -62,52 +62,6 @@ static const AVOption framerate_options[] = {
AVFILTER_DEFINE_CLASS(framerate);
-static av_always_inline int64_t sad_8x8_16(const uint16_t *src1, ptrdiff_t stride1,
- const uint16_t *src2, ptrdiff_t stride2)
-{
- int sum = 0;
- int x, y;
-
- for (y = 0; y < 8; y++) {
- for (x = 0; x < 8; x++)
- sum += FFABS(src1[x] - src2[x]);
- src1 += stride1;
- src2 += stride2;
- }
- return sum;
-}
-
-static int64_t scene_sad16(FrameRateContext *s, const uint16_t *p1, int p1_linesize, const uint16_t* p2, int p2_linesize, const int width, const int height)
-{
- int64_t sad;
- int x, y;
- for (sad = y = 0; y < height - 7; y += 8) {
- for (x = 0; x < width - 7; x += 8) {
- sad += sad_8x8_16(p1 + y * p1_linesize + x,
- p1_linesize,
- p2 + y * p2_linesize + x,
- p2_linesize);
- }
- }
- return sad;
-}
-
-static int64_t scene_sad8(FrameRateContext *s, uint8_t *p1, int p1_linesize, uint8_t* p2, int p2_linesize, const int width, const int height)
-{
- int64_t sad;
- int x, y;
- for (sad = y = 0; y < height - 7; y += 8) {
- for (x = 0; x < width - 7; x += 8) {
- sad += s->sad(p1 + y * p1_linesize + x,
- p1_linesize,
- p2 + y * p2_linesize + x,
- p2_linesize);
- }
- }
- emms_c();
- return sad;
-}
-
static double get_scene_score(AVFilterContext *ctx, AVFrame *crnt, AVFrame *next)
{
FrameRateContext *s = ctx->priv;
@@ -117,16 +71,13 @@ static double get_scene_score(AVFilterContext *ctx, AVFrame *crnt, AVFrame *next
if (crnt->height == next->height &&
crnt->width == next->width) {
- int64_t sad;
+ uint64_t sad;
double mafd, diff;
ff_dlog(ctx, "get_scene_score() process\n");
- if (s->bitdepth == 8)
- sad = scene_sad8(s, crnt->data[0], crnt->linesize[0], next->data[0], next->linesize[0], crnt->width, crnt->height);
- else
- sad = scene_sad16(s, (const uint16_t*)crnt->data[0], crnt->linesize[0] / 2, (const uint16_t*)next->data[0], next->linesize[0] / 2, crnt->width, crnt->height);
-
- mafd = (double)sad * 100.0 / FFMAX(1, (crnt->height & ~7) * (crnt->width & ~7)) / (1 << s->bitdepth);
+ s->sad(crnt->data[0], crnt->linesize[0], next->data[0], next->linesize[0], crnt->width, crnt->height, &sad);
+ emms_c();
+ mafd = (double)sad * 100.0 / (crnt->width * crnt->height) / (1 << s->bitdepth);
diff = fabs(mafd - s->prev_mafd);
ret = av_clipf(FFMIN(mafd, diff), 0, 100.0);
s->prev_mafd = mafd;
@@ -350,7 +301,7 @@ static int config_input(AVFilterLink *inlink)
s->bitdepth = pix_desc->comp[0].depth;
s->vsub = pix_desc->log2_chroma_h;
- s->sad = av_pixelutils_get_sad_fn(3, 3, 2, s); // 8x8 both sources aligned
+ s->sad = ff_scene_sad_get_fn(s->bitdepth == 8 ? 8 : 16);
if (!s->sad)
return AVERROR(EINVAL);
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index b484c8bd1c..6eecb94359 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile
@@ -1,3 +1,5 @@
+OBJS-$(CONFIG_SCENE_SAD) += x86/scene_sad_init.o
+
OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir_init.o
OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o
OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o
@@ -29,6 +31,8 @@ OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o
OBJS-$(CONFIG_W3FDIF_FILTER) += x86/vf_w3fdif_init.o
OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif_init.o
+X86ASM-OBJS-$(CONFIG_SCENE_SAD) += x86/scene_sad.o
+
X86ASM-OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir.o
X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o
X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o
diff --git a/libavfilter/x86/scene_sad.asm b/libavfilter/x86/scene_sad.asm
new file mode 100644
index 0000000000..d38d71ccca
--- /dev/null
+++ b/libavfilter/x86/scene_sad.asm
@@ -0,0 +1,74 @@
+;*****************************************************************************
+;* x86-optimized functions for scene SAD
+;*
+;* Copyright (C) 2018 Marton Balint
+;*
+;* Based on vf_blend.asm, Copyright (C) 2015 Paul B Mahol
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+
+%macro SAD_INIT 0
+cglobal scene_sad, 6, 7, 2, src1, stride1, src2, stride2, width, end, x
+ add src1q, widthq
+ add src2q, widthq
+ neg widthq
+ pxor m1, m1
+%endmacro
+
+
+%macro SAD_LOOP 0
+.nextrow:
+ mov xq, widthq
+
+ .loop:
+ movu m0, [src1q + xq]
+ psadbw m0, [src2q + xq]
+ paddq m1, m0
+ add xq, mmsize
+ jl .loop
+ add src1q, stride1q
+ add src2q, stride2q
+ sub endd, 1
+ jg .nextrow
+
+ mov r0q, r6mp
+ movu [r0q], m1 ; sum
+REP_RET
+%endmacro
+
+
+%macro SAD_FRAMES 0
+ SAD_INIT
+ SAD_LOOP
+%endmacro
+
+
+INIT_XMM sse2
+SAD_FRAMES
+
+%if HAVE_AVX2_EXTERNAL
+
+INIT_YMM avx2
+SAD_FRAMES
+
+%endif
diff --git a/libavfilter/x86/scene_sad_init.c b/libavfilter/x86/scene_sad_init.c
new file mode 100644
index 0000000000..461fa406d9
--- /dev/null
+++ b/libavfilter/x86/scene_sad_init.c
@@ -0,0 +1,52 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavfilter/scene_sad.h"
+
+#define SCENE_SAD_FUNC(FUNC_NAME, ASM_FUNC_NAME, MMSIZE) \
+void ASM_FUNC_NAME(SCENE_SAD_PARAMS); \
+ \
+static void FUNC_NAME(SCENE_SAD_PARAMS) { \
+ uint64_t sad[MMSIZE / 8] = {0}; \
+ ptrdiff_t awidth = width & ~(MMSIZE - 1); \
+ *sum = 0; \
+ ASM_FUNC_NAME(src1, stride1, src2, stride2, awidth, height, sad); \
+ for (int i = 0; i < MMSIZE / 8; i++) \
+ *sum += sad[i]; \
+ ff_scene_sad_c(src1 + awidth, stride1, \
+ src2 + awidth, stride2, \
+ width - awidth, height, sad); \
+ *sum += sad[0]; \
+}
+
+SCENE_SAD_FUNC(scene_sad_sse2, ff_scene_sad_sse2, 16);
+SCENE_SAD_FUNC(scene_sad_avx2, ff_scene_sad_avx2, 32);
+
+ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth)
+{
+ int cpu_flags = av_get_cpu_flags();
+ if (depth == 8) {
+ if (EXTERNAL_AVX2_FAST(cpu_flags))
+ return scene_sad_avx2;
+ else if (EXTERNAL_SSE2(cpu_flags))
+ return scene_sad_sse2;
+ }
+ return NULL;
+}
More information about the ffmpeg-cvslog
mailing list