[FFmpeg-devel] [PATCH 2/2] avfilter/transpose: x86 asm
    Paul B Mahol 
    onemda at gmail.com
       
    Thu Sep 12 18:52:55 CEST 2013
    
    
  
Signed-off-by: Paul B Mahol <onemda at gmail.com>
---
I'm not happy with it. It at best speeds by 30%, but
I expected at least 3x.
---
 libavfilter/vf_transpose.c          | 31 ++++-------------
 libavfilter/vf_transpose.h          | 57 +++++++++++++++++++++++++++++++
 libavfilter/x86/Makefile            |  2 ++
 libavfilter/x86/vf_transpose.asm    | 67 +++++++++++++++++++++++++++++++++++++
 libavfilter/x86/vf_transpose_init.c | 39 +++++++++++++++++++++
 5 files changed, 171 insertions(+), 25 deletions(-)
 create mode 100644 libavfilter/vf_transpose.h
 create mode 100644 libavfilter/x86/vf_transpose.asm
 create mode 100644 libavfilter/x86/vf_transpose_init.c
diff --git a/libavfilter/vf_transpose.c b/libavfilter/vf_transpose.c
index d19198c..8a14d79 100644
--- a/libavfilter/vf_transpose.c
+++ b/libavfilter/vf_transpose.c
@@ -37,31 +37,7 @@
 #include "formats.h"
 #include "internal.h"
 #include "video.h"
-
-typedef enum {
-    TRANSPOSE_PT_TYPE_NONE,
-    TRANSPOSE_PT_TYPE_LANDSCAPE,
-    TRANSPOSE_PT_TYPE_PORTRAIT,
-} PassthroughType;
-
-enum TransposeDir {
-    TRANSPOSE_CCLOCK_FLIP,
-    TRANSPOSE_CLOCK,
-    TRANSPOSE_CCLOCK,
-    TRANSPOSE_CLOCK_FLIP,
-};
-
-typedef struct {
-    const AVClass *class;
-    int hsub, vsub;
-    int pixsteps[4];
-
-    PassthroughType passthrough; ///< landscape passthrough mode enabled
-    enum TransposeDir dir;
-
-    void (*transpose_block)(uint8_t *src, int src_linesize,
-                            uint8_t *dst, int dst_linesize);
-} TransContext;
+#include "vf_transpose.h"
 
 static int query_formats(AVFilterContext *ctx)
 {
@@ -190,6 +166,9 @@ static int config_props_output(AVFilterLink *outlink)
     case 8: trans->transpose_block = transpose_64_c; break;
     }
 
+    if (ARCH_X86)
+        ff_transpose_init_x86(trans);
+
     av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d dir:%d -> w:%d h:%d rotation:%s vflip:%d\n",
            inlink->w, inlink->h, trans->dir, outlink->w, outlink->h,
            trans->dir == 1 || trans->dir == 3 ? "clockwise" : "counterclockwise",
@@ -287,6 +266,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 
     td.in = in, td.out = out;
     ctx->internal->execute(ctx, filter_slice, &td, NULL, FFMIN(outlink->h, ctx->graph->nb_threads));
+    emms_c();
+
     av_frame_free(&in);
     return ff_filter_frame(outlink, out);
 }
diff --git a/libavfilter/vf_transpose.h b/libavfilter/vf_transpose.h
new file mode 100644
index 0000000..a7b9099
--- /dev/null
+++ b/libavfilter/vf_transpose.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2010 Stefano Sabatini
+ * Copyright (c) 2008 Vitor Sessak
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_VF_TRANSPOSE_H
+#define AVFILTER_VF_TRANSPOSE_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libavutil/opt.h"
+
+typedef enum {
+    TRANSPOSE_PT_TYPE_NONE,
+    TRANSPOSE_PT_TYPE_LANDSCAPE,
+    TRANSPOSE_PT_TYPE_PORTRAIT,
+} PassthroughType;
+
+enum TransposeDir {
+    TRANSPOSE_CCLOCK_FLIP,
+    TRANSPOSE_CLOCK,
+    TRANSPOSE_CCLOCK,
+    TRANSPOSE_CLOCK_FLIP,
+};
+
+typedef struct TransContext {
+    const AVClass *class;
+    int hsub, vsub;
+    int pixsteps[4];
+
+    PassthroughType passthrough; ///< landscape passthrough mode enabled
+    enum TransposeDir dir;
+
+    void (*transpose_block)(uint8_t *src, int src_linesize,
+                            uint8_t *dst, int dst_linesize);
+} TransContext;
+
+void ff_transpose_init_x86(TransContext *trans);
+
+#endif /* AVFILTER_VF_TRANSPOSE */
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index 0b19c4b..ee89313 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile
@@ -1,9 +1,11 @@
 OBJS-$(CONFIG_GRADFUN_FILTER)                += x86/vf_gradfun.o
 OBJS-$(CONFIG_HQDN3D_FILTER)                 += x86/vf_hqdn3d_init.o
 OBJS-$(CONFIG_SPP_FILTER)                    += x86/vf_spp.o
+OBJS-$(CONFIG_TRANSPOSE_FILTER)              += x86/vf_transpose_init.o
 OBJS-$(CONFIG_VOLUME_FILTER)                 += x86/af_volume_init.o
 OBJS-$(CONFIG_YADIF_FILTER)                  += x86/vf_yadif_init.o
 
 YASM-OBJS-$(CONFIG_HQDN3D_FILTER)            += x86/vf_hqdn3d.o
 YASM-OBJS-$(CONFIG_VOLUME_FILTER)            += x86/af_volume.o
+YASM-OBJS-$(CONFIG_TRANSPOSE_FILTER)         += x86/vf_transpose.o
 YASM-OBJS-$(CONFIG_YADIF_FILTER)             += x86/vf_yadif.o x86/yadif-16.o x86/yadif-10.o
diff --git a/libavfilter/x86/vf_transpose.asm b/libavfilter/x86/vf_transpose.asm
new file mode 100644
index 0000000..5c4ced7
--- /dev/null
+++ b/libavfilter/x86/vf_transpose.asm
@@ -0,0 +1,67 @@
+;******************************************************************************
+;* x86-optimized functions for transpose filter
+;*
+;* Copyright (c) 2012 Fredrik Mellbin
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+INIT_XMM sse
+cglobal transpose_filter_byte, 4, 5, 0, src, srcstride, dst, dststride, tmp
+    lea tmpq, [srcq + srcstrideq]
+    movq m0, [srcq]
+    movq m1, [tmpq]
+    movq m2, [srcq + 2*srcstrideq]
+    movq m3, [tmpq + 2*srcstrideq]
+    lea srcq, [srcq + 4*srcstrideq]
+    lea tmpq, [tmpq + 4*srcstrideq]
+    punpcklbw m0, m1
+    punpcklbw m2, m3
+    movq m4, [srcq]
+    movq m5, [tmpq]
+    movq m6, [srcq + 2*srcstrideq]
+    movq m7, [tmpq + 2*srcstrideq]
+    mova m1, m0
+    punpcklwd m0, m2
+    punpckhwd m1, m2
+    punpcklbw m4, m5
+    punpcklbw m6, m7
+    mova m5, m4
+    punpcklwd m4, m6
+    punpckhwd m5, m6
+    mova m2, m0
+    punpckldq m0, m4
+    punpckhdq m2, m4
+    mova m3, m1
+    lea tmpq, [dstq + dststrideq]
+    punpckldq m1, m5
+    punpckhdq m3, m5
+    movq [dstq], m0
+    movhps [tmpq], m0
+    movq [dstq + 2*dststrideq], m2
+    movhps [tmpq + 2*dststrideq], m2
+    lea dstq, [dstq + 4*dststrideq]
+    lea tmpq, [tmpq + 4*dststrideq]
+    movq [dstq], m1
+    movhps [tmpq], m1
+    movq [dstq + 2*dststrideq], m3
+    movhps [tmpq + 2*dststrideq], m3
+    RET
diff --git a/libavfilter/x86/vf_transpose_init.c b/libavfilter/x86/vf_transpose_init.c
new file mode 100644
index 0000000..619b883
--- /dev/null
+++ b/libavfilter/x86/vf_transpose_init.c
@@ -0,0 +1,39 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/mem.h"
+#include "libavutil/x86/asm.h"
+#include "libavutil/x86/cpu.h"
+#include "libavfilter/vf_transpose.h"
+
+void ff_transpose_filter_byte_sse(uint8_t *src, int src_linesize,
+                                  uint8_t *dst, int dst_linesize);
+
+av_cold void ff_transpose_init_x86(TransContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    switch (s->pixsteps[0]) {
+    case 1:
+        if (EXTERNAL_SSE(cpu_flags))
+            s->transpose_block = ff_transpose_filter_byte_sse;
+        break;
+    }
+}
-- 
1.7.11.2
    
    
More information about the ffmpeg-devel
mailing list