[FFmpeg-devel] [PATCHv2] vf_interlace: Add SIMD for lowpass filter

Kieran Kunhya kierank at obe.tv
Tue Nov 11 19:43:42 CET 2014


---
 libavfilter/interlace.h             | 59 +++++++++++++++++++++++++++++
 libavfilter/vf_interlace.c          | 55 +++++++++++++--------------
 libavfilter/x86/Makefile            |  2 +
 libavfilter/x86/vf_interlace.asm    | 75 +++++++++++++++++++++++++++++++++++++
 libavfilter/x86/vf_interlace_init.c | 42 +++++++++++++++++++++
 5 files changed, 204 insertions(+), 29 deletions(-)
 create mode 100644 libavfilter/interlace.h
 create mode 100644 libavfilter/x86/vf_interlace.asm
 create mode 100644 libavfilter/x86/vf_interlace_init.c

diff --git a/libavfilter/interlace.h b/libavfilter/interlace.h
new file mode 100644
index 0000000..a05251f
--- /dev/null
+++ b/libavfilter/interlace.h
@@ -0,0 +1,59 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Libav; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/**
+ * @file
+ * progressive to interlaced content filter, inspired by heavy debugging of tinterlace filter
+ */
+
+#ifndef AVFILTER_INTERLACE_H
+#define AVFILTER_INTERLACE_H
+
+#include "libavutil/common.h"
+#include "libavutil/opt.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/avassert.h"
+
+#include "formats.h"
+#include "avfilter.h"
+#include "internal.h"
+#include "video.h"
+
+enum ScanMode {
+    MODE_TFF = 0,
+    MODE_BFF = 1,
+};
+
+enum FieldType {
+    FIELD_UPPER = 0,
+    FIELD_LOWER = 1,
+};
+
+typedef struct InterlaceContext {
+    const AVClass *class;
+    enum ScanMode scan;    // top or bottom field first scanning
+    int lowpass;           // enable or disable low pass filterning
+    AVFrame *cur, *next;   // the two frames from which the new one is obtained
+    int got_output;        // signal an output frame is reday to request_frame()
+    void (*lowpass_line)(uint8_t *dstp, ptrdiff_t linesize, const uint8_t *srcp,
+                         const uint8_t *srcp_above, const uint8_t *srcp_below);
+} InterlaceContext;
+
+void ff_interlace_init_x86(InterlaceContext *interlace);
+
+#endif /* AVFILTER_INTERLACE_H */
diff --git a/libavfilter/vf_interlace.c b/libavfilter/vf_interlace.c
index c534b0b..368be10 100644
--- a/libavfilter/vf_interlace.c
+++ b/libavfilter/vf_interlace.c
@@ -30,24 +30,7 @@
 #include "avfilter.h"
 #include "internal.h"
 #include "video.h"
-
-enum ScanMode {
-    MODE_TFF = 0,
-    MODE_BFF = 1,
-};
-
-enum FieldType {
-    FIELD_UPPER = 0,
-    FIELD_LOWER = 1,
-};
-
-typedef struct InterlaceContext {
-    const AVClass *class;
-    enum ScanMode scan;    // top or bottom field first scanning
-    int lowpass;           // enable or disable low pass filterning
-    AVFrame *cur, *next;   // the two frames from which the new one is obtained
-    int got_output;        // signal an output frame is reday to request_frame()
-} InterlaceContext;
+#include "interlace.h"
 
 #define OFFSET(x) offsetof(InterlaceContext, x)
 #define V AV_OPT_FLAG_VIDEO_PARAM
@@ -70,6 +53,17 @@ static const AVClass class = {
     .version    = LIBAVUTIL_VERSION_INT,
 };
 
+static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize, const uint8_t *srcp,
+                           const uint8_t *srcp_above, const uint8_t *srcp_below)
+{
+    int i;
+    for (i = 0; i < linesize; i++) {
+        // this calculation is an integer representation of
+        // '0.5 * current + 0.25 * above + 0.25 * below'
+        // '1 +' is for rounding.
+        dstp[i] = (1 + srcp[i] + srcp[i] + srcp_above[i] + srcp_below[i]) >> 2;
+    }
+}
 
 static const enum AVPixelFormat formats_supported[] = {
     AV_PIX_FMT_YUV420P,  AV_PIX_FMT_YUV422P,  AV_PIX_FMT_YUV444P,
@@ -116,23 +110,31 @@ static int config_out_props(AVFilterLink *outlink)
     // half framerate
     outlink->time_base.num *= 2;
 
+
+    if (s->lowpass) {
+        s->lowpass_line = lowpass_line_c;
+        if (ARCH_X86)
+            ff_interlace_init_x86(s);
+    }
+
     av_log(ctx, AV_LOG_VERBOSE, "%s interlacing %s lowpass filter\n",
            s->scan == MODE_TFF ? "tff" : "bff", (s->lowpass) ? "with" : "without");
 
     return 0;
 }
 
-static void copy_picture_field(AVFrame *src_frame, AVFrame *dst_frame,
+static void copy_picture_field(InterlaceContext *s,
+                               AVFrame *src_frame, AVFrame *dst_frame,
                                AVFilterLink *inlink, enum FieldType field_type,
                                int lowpass)
 {
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
     int vsub = desc->log2_chroma_h;
-    int plane, i, j;
+    int plane, j;
 
     for (plane = 0; plane < desc->nb_components; plane++) {
         int lines = (plane == 1 || plane == 2) ? -(-inlink->h) >> vsub : inlink->h;
-        int linesize = av_image_get_linesize(inlink->format, inlink->w, plane);
+        ptrdiff_t linesize = av_image_get_linesize(inlink->format, inlink->w, plane);
         uint8_t *dstp = dst_frame->data[plane];
         const uint8_t *srcp = src_frame->data[plane];
 
@@ -153,12 +155,7 @@ static void copy_picture_field(AVFrame *src_frame, AVFrame *dst_frame,
                     srcp_above = srcp; // there is no line above
                 if (j == 1)
                     srcp_below = srcp; // there is no line below
-                for (i = 0; i < linesize; i++) {
-                    // this calculation is an integer representation of
-                    // '0.5 * current + 0.25 * above + 0.25 * below'
-                    // '1 +' is for rounding.
-                    dstp[i] = (1 + srcp[i] + srcp[i] + srcp_above[i] + srcp_below[i]) >> 2;
-                }
+                s->lowpass_line(dstp, linesize, srcp, srcp_above, srcp_below);
                 dstp += dstp_linesize;
                 srcp += srcp_linesize;
             }
@@ -209,11 +206,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
     out->pts             /= 2;  // adjust pts to new framerate
 
     /* copy upper/lower field from cur */
-    copy_picture_field(s->cur, out, inlink, tff ? FIELD_UPPER : FIELD_LOWER, s->lowpass);
+    copy_picture_field(s, s->cur, out, inlink, tff ? FIELD_UPPER : FIELD_LOWER, s->lowpass);
     av_frame_free(&s->cur);
 
     /* copy lower/upper field from next */
-    copy_picture_field(s->next, out, inlink, tff ? FIELD_LOWER : FIELD_UPPER, s->lowpass);
+    copy_picture_field(s, s->next, out, inlink, tff ? FIELD_LOWER : FIELD_UPPER, s->lowpass);
     av_frame_free(&s->next);
 
     ret = ff_filter_frame(outlink, out);
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index 16b1307..13b5d31 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile
@@ -1,9 +1,11 @@
 OBJS-$(CONFIG_GRADFUN_FILTER)                += x86/vf_gradfun_init.o
 OBJS-$(CONFIG_HQDN3D_FILTER)                 += x86/vf_hqdn3d_init.o
+OBJS-$(CONFIG_INTERLACE_FILTER)              += x86/vf_interlace_init.o
 OBJS-$(CONFIG_VOLUME_FILTER)                 += x86/af_volume_init.o
 OBJS-$(CONFIG_YADIF_FILTER)                  += x86/vf_yadif_init.o
 
 YASM-OBJS-$(CONFIG_GRADFUN_FILTER)           += x86/vf_gradfun.o
 YASM-OBJS-$(CONFIG_HQDN3D_FILTER)            += x86/vf_hqdn3d.o
+YASM-OBJS-$(CONFIG_INTERLACE_FILTER)         += x86/vf_interlace.o
 YASM-OBJS-$(CONFIG_VOLUME_FILTER)            += x86/af_volume.o
 YASM-OBJS-$(CONFIG_YADIF_FILTER)             += x86/vf_yadif.o
diff --git a/libavfilter/x86/vf_interlace.asm b/libavfilter/x86/vf_interlace.asm
new file mode 100644
index 0000000..8c2e9b0
--- /dev/null
+++ b/libavfilter/x86/vf_interlace.asm
@@ -0,0 +1,75 @@
+;*****************************************************************************
+;* x86-optimized functions for interlace filter
+;*
+;* Copyright (C) 2014 Kieran Kunhya <kierank at obe.tv>
+;*
+;* This file is part of Libav.
+;*
+;* Libav is free software; you can redistribute it and/or modify
+;* it under the terms of the GNU General Public License as published by
+;* the Free Software Foundation; either version 2 of the License, or
+;* (at your option) any later version.
+;*
+;* Libav is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;* GNU General Public License for more details.
+;*
+;* You should have received a copy of the GNU General Public License along
+;* with Libav; if not, write to the Free Software Foundation, Inc.,
+;* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+SECTION .text
+
+%macro LOWPASS_LINE 0
+cglobal lowpass_line, 5, 5, 7
+    add r0, r1
+    add r2, r1
+    add r3, r1
+    add r4, r1
+    neg r1
+
+    pxor m6, m6
+
+.loop
+    mova m0, [r2+r1]
+    punpcklbw m1, m0, m6
+    punpckhbw m0, m6
+    paddw m0, m0
+    paddw m1, m1
+
+    mova m2, [r3+r1]
+    punpcklbw m3, m2, m6
+    punpckhbw m2, m6
+
+    mova m4, [r4+r1]
+    punpcklbw m5, m4, m6
+    punpckhbw m4, m6
+
+    paddw m1, m3
+    pavgw m1, m5
+
+    paddw m0, m2
+    pavgw m0, m4
+
+    psrlw m0, 1
+    psrlw m1, 1
+
+    packuswb m1, m0
+    mova [r0+r1], m1
+
+    add r1, mmsize
+    jl .loop
+REP_RET
+%endmacro
+
+INIT_XMM sse2
+LOWPASS_LINE
+
+INIT_XMM avx
+LOWPASS_LINE
diff --git a/libavfilter/x86/vf_interlace_init.c b/libavfilter/x86/vf_interlace_init.c
new file mode 100644
index 0000000..54ed43f
--- /dev/null
+++ b/libavfilter/x86/vf_interlace_init.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2014 Kieran Kunhya <kierank at obe.tv>
+ *
+ * This file is part of Libav.
+ *
+  * Libav is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Libav; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/internal.h"
+#include "libavutil/mem.h"
+#include "libavutil/x86/asm.h"
+#include "libavutil/x86/cpu.h"
+#include "libavfilter/interlace.h"
+
+void ff_lowpass_line_sse2(uint8_t *dstp, ptrdiff_t linesize, const uint8_t *srcp,
+                          const uint8_t *srcp_above, const uint8_t *srcp_below);
+void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize, const uint8_t *srcp,
+                          const uint8_t *srcp_above, const uint8_t *srcp_below);
+
+av_cold void ff_interlace_init_x86(InterlaceContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_SSE2(cpu_flags))
+        s->lowpass_line = ff_lowpass_line_sse2;
+    if (EXTERNAL_AVX(cpu_flags))
+        s->lowpass_line = ff_lowpass_line_avx;
+}
-- 
1.9.1



More information about the ffmpeg-devel mailing list