[FFmpeg-devel] [PATCH 2/2] avfilter/transpose: x86 asm
Paul B Mahol
onemda at gmail.com
Thu Sep 12 18:52:55 CEST 2013
Signed-off-by: Paul B Mahol <onemda at gmail.com>
---
I'm not happy with it. It at best speeds by 30%, but
I expected at least 3x.
---
libavfilter/vf_transpose.c | 31 ++++-------------
libavfilter/vf_transpose.h | 57 +++++++++++++++++++++++++++++++
libavfilter/x86/Makefile | 2 ++
libavfilter/x86/vf_transpose.asm | 67 +++++++++++++++++++++++++++++++++++++
libavfilter/x86/vf_transpose_init.c | 39 +++++++++++++++++++++
5 files changed, 171 insertions(+), 25 deletions(-)
create mode 100644 libavfilter/vf_transpose.h
create mode 100644 libavfilter/x86/vf_transpose.asm
create mode 100644 libavfilter/x86/vf_transpose_init.c
diff --git a/libavfilter/vf_transpose.c b/libavfilter/vf_transpose.c
index d19198c..8a14d79 100644
--- a/libavfilter/vf_transpose.c
+++ b/libavfilter/vf_transpose.c
@@ -37,31 +37,7 @@
#include "formats.h"
#include "internal.h"
#include "video.h"
-
-typedef enum {
- TRANSPOSE_PT_TYPE_NONE,
- TRANSPOSE_PT_TYPE_LANDSCAPE,
- TRANSPOSE_PT_TYPE_PORTRAIT,
-} PassthroughType;
-
-enum TransposeDir {
- TRANSPOSE_CCLOCK_FLIP,
- TRANSPOSE_CLOCK,
- TRANSPOSE_CCLOCK,
- TRANSPOSE_CLOCK_FLIP,
-};
-
-typedef struct {
- const AVClass *class;
- int hsub, vsub;
- int pixsteps[4];
-
- PassthroughType passthrough; ///< landscape passthrough mode enabled
- enum TransposeDir dir;
-
- void (*transpose_block)(uint8_t *src, int src_linesize,
- uint8_t *dst, int dst_linesize);
-} TransContext;
+#include "vf_transpose.h"
static int query_formats(AVFilterContext *ctx)
{
@@ -190,6 +166,9 @@ static int config_props_output(AVFilterLink *outlink)
case 8: trans->transpose_block = transpose_64_c; break;
}
+ if (ARCH_X86)
+ ff_transpose_init_x86(trans);
+
av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d dir:%d -> w:%d h:%d rotation:%s vflip:%d\n",
inlink->w, inlink->h, trans->dir, outlink->w, outlink->h,
trans->dir == 1 || trans->dir == 3 ? "clockwise" : "counterclockwise",
@@ -287,6 +266,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
td.in = in, td.out = out;
ctx->internal->execute(ctx, filter_slice, &td, NULL, FFMIN(outlink->h, ctx->graph->nb_threads));
+ emms_c();
+
av_frame_free(&in);
return ff_filter_frame(outlink, out);
}
diff --git a/libavfilter/vf_transpose.h b/libavfilter/vf_transpose.h
new file mode 100644
index 0000000..a7b9099
--- /dev/null
+++ b/libavfilter/vf_transpose.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2010 Stefano Sabatini
+ * Copyright (c) 2008 Vitor Sessak
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_VF_TRANSPOSE_H
+#define AVFILTER_VF_TRANSPOSE_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libavutil/opt.h"
+
+typedef enum {
+ TRANSPOSE_PT_TYPE_NONE,
+ TRANSPOSE_PT_TYPE_LANDSCAPE,
+ TRANSPOSE_PT_TYPE_PORTRAIT,
+} PassthroughType;
+
+enum TransposeDir {
+ TRANSPOSE_CCLOCK_FLIP,
+ TRANSPOSE_CLOCK,
+ TRANSPOSE_CCLOCK,
+ TRANSPOSE_CLOCK_FLIP,
+};
+
+typedef struct TransContext {
+ const AVClass *class;
+ int hsub, vsub;
+ int pixsteps[4];
+
+ PassthroughType passthrough; ///< landscape passthrough mode enabled
+ enum TransposeDir dir;
+
+ void (*transpose_block)(uint8_t *src, int src_linesize,
+ uint8_t *dst, int dst_linesize);
+} TransContext;
+
+void ff_transpose_init_x86(TransContext *trans);
+
+#endif /* AVFILTER_VF_TRANSPOSE */
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index 0b19c4b..ee89313 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile
@@ -1,9 +1,11 @@
OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun.o
OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o
OBJS-$(CONFIG_SPP_FILTER) += x86/vf_spp.o
+OBJS-$(CONFIG_TRANSPOSE_FILTER) += x86/vf_transpose_init.o
OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o
OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif_init.o
YASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d.o
YASM-OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume.o
+YASM-OBJS-$(CONFIG_TRANSPOSE_FILTER) += x86/vf_transpose.o
YASM-OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif.o x86/yadif-16.o x86/yadif-10.o
diff --git a/libavfilter/x86/vf_transpose.asm b/libavfilter/x86/vf_transpose.asm
new file mode 100644
index 0000000..5c4ced7
--- /dev/null
+++ b/libavfilter/x86/vf_transpose.asm
@@ -0,0 +1,67 @@
+;******************************************************************************
+;* x86-optimized functions for transpose filter
+;*
+;* Copyright (c) 2012 Fredrik Mellbin
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+INIT_XMM sse
+cglobal transpose_filter_byte, 4, 5, 0, src, srcstride, dst, dststride, tmp
+ lea tmpq, [srcq + srcstrideq]
+ movq m0, [srcq]
+ movq m1, [tmpq]
+ movq m2, [srcq + 2*srcstrideq]
+ movq m3, [tmpq + 2*srcstrideq]
+ lea srcq, [srcq + 4*srcstrideq]
+ lea tmpq, [tmpq + 4*srcstrideq]
+ punpcklbw m0, m1
+ punpcklbw m2, m3
+ movq m4, [srcq]
+ movq m5, [tmpq]
+ movq m6, [srcq + 2*srcstrideq]
+ movq m7, [tmpq + 2*srcstrideq]
+ mova m1, m0
+ punpcklwd m0, m2
+ punpckhwd m1, m2
+ punpcklbw m4, m5
+ punpcklbw m6, m7
+ mova m5, m4
+ punpcklwd m4, m6
+ punpckhwd m5, m6
+ mova m2, m0
+ punpckldq m0, m4
+ punpckhdq m2, m4
+ mova m3, m1
+ lea tmpq, [dstq + dststrideq]
+ punpckldq m1, m5
+ punpckhdq m3, m5
+ movq [dstq], m0
+ movhps [tmpq], m0
+ movq [dstq + 2*dststrideq], m2
+ movhps [tmpq + 2*dststrideq], m2
+ lea dstq, [dstq + 4*dststrideq]
+ lea tmpq, [tmpq + 4*dststrideq]
+ movq [dstq], m1
+ movhps [tmpq], m1
+ movq [dstq + 2*dststrideq], m3
+ movhps [tmpq + 2*dststrideq], m3
+ RET
diff --git a/libavfilter/x86/vf_transpose_init.c b/libavfilter/x86/vf_transpose_init.c
new file mode 100644
index 0000000..619b883
--- /dev/null
+++ b/libavfilter/x86/vf_transpose_init.c
@@ -0,0 +1,39 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/mem.h"
+#include "libavutil/x86/asm.h"
+#include "libavutil/x86/cpu.h"
+#include "libavfilter/vf_transpose.h"
+
+void ff_transpose_filter_byte_sse(uint8_t *src, int src_linesize,
+ uint8_t *dst, int dst_linesize);
+
+av_cold void ff_transpose_init_x86(TransContext *s)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ switch (s->pixsteps[0]) {
+ case 1:
+ if (EXTERNAL_SSE(cpu_flags))
+ s->transpose_block = ff_transpose_filter_byte_sse;
+ break;
+ }
+}
--
1.7.11.2
More information about the ffmpeg-devel
mailing list