[FFmpeg-cvslog] SBR DSP x86: implement SSE sbr_sum_square_sse
Christophe GISQUET
git at videolan.org
Sat Feb 25 04:28:15 CET 2012
ffmpeg | branch: master | Christophe GISQUET <christophe.gisquet at gmail.com> | Thu Feb 23 19:48:58 2012 +0100| [34454c761f01275d4adaf40df6d70a59011c4a6c] | committer: Ronald S. Bultje
SBR DSP x86: implement SSE sbr_sum_square_sse
The 32bits targets have been compiled with -mfpmath=sse for proper reference.
sbr_sum_square C /32bits: 82c (unrolled)/102c
C /64bits: 69c (unrolled)/82c
SSE/32bits: 42c
SSE/64bits: 31c
Use of SSE4.1 dpps to perform the final sum is slower.
Not unrolling to perform 8 operations in a loop yields 10 more cycles.
Signed-off-by: Ronald S. Bultje <rsbultje at gmail.com>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=34454c761f01275d4adaf40df6d70a59011c4a6c
---
libavcodec/sbrdsp.c | 2 +
libavcodec/sbrdsp.h | 1 +
libavcodec/x86/Makefile | 2 +
libavcodec/x86/sbrdsp.asm | 74 ++++++++++++++++++++++++++++++++++++++++++
libavcodec/x86/sbrdsp_init.c | 37 +++++++++++++++++++++
5 files changed, 116 insertions(+), 0 deletions(-)
diff --git a/libavcodec/sbrdsp.c b/libavcodec/sbrdsp.c
index aef894a..f942759 100644
--- a/libavcodec/sbrdsp.c
+++ b/libavcodec/sbrdsp.c
@@ -238,4 +238,6 @@ av_cold void ff_sbrdsp_init(SBRDSPContext *s)
if (ARCH_ARM)
ff_sbrdsp_init_arm(s);
+ if (HAVE_MMX)
+ ff_sbrdsp_init_x86(s);
}
diff --git a/libavcodec/sbrdsp.h b/libavcodec/sbrdsp.h
index ee5d5a0..fe91957 100644
--- a/libavcodec/sbrdsp.h
+++ b/libavcodec/sbrdsp.h
@@ -46,5 +46,6 @@ extern const float ff_sbr_noise_table[][2];
void ff_sbrdsp_init(SBRDSPContext *s);
void ff_sbrdsp_init_arm(SBRDSPContext *s);
+void ff_sbrdsp_init_x86(SBRDSPContext *s);
#endif
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index fc88433..e64697a 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -47,6 +47,8 @@ YASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o
MMX-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp-init.o
YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
MMX-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp-init.o
+MMX-OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp_init.o
+YASM-OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp.o
MMX-OBJS-$(CONFIG_DWT) += x86/snowdsp_mmx.o
MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o
YASM-OBJS-$(CONFIG_VP3_DECODER) += x86/vp3dsp.o
diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
new file mode 100644
index 0000000..71471bd
--- /dev/null
+++ b/libavcodec/x86/sbrdsp.asm
@@ -0,0 +1,74 @@
+;******************************************************************************
+;* AAC Spectral Band Replication decoding functions
+;* Copyright (C) 2012 Christophe Gisquet <christophe.gisquet at gmail.com>
+;*
+;* This file is part of Libav.
+;*
+;* Libav is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* Libav is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with Libav; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "x86inc.asm"
+%include "x86util.asm"
+
+;SECTION_RODATA
+SECTION .text
+
+INIT_XMM sse
+cglobal sbr_sum_square, 2, 3, 6
+ mov r2, r1
+ xorps m0, m0
+ xorps m1, m1
+ sar r2, 3
+ jz .prepare
+.loop:
+ movu m2, [r0 + 0]
+ movu m3, [r0 + 16]
+ movu m4, [r0 + 32]
+ movu m5, [r0 + 48]
+ mulps m2, m2
+ mulps m3, m3
+ mulps m4, m4
+ mulps m5, m5
+ addps m0, m2
+ addps m1, m3
+ addps m0, m4
+ addps m1, m5
+ add r0, 64
+ dec r2
+ jnz .loop
+.prepare:
+ and r1, 7
+ sar r1, 1
+ jz .end
+; len is a multiple of 2, thus there are at least 4 elements to process
+.endloop:
+ movu m2, [r0]
+ add r0, 16
+ mulps m2, m2
+ dec r1
+ addps m0, m2
+ jnz .endloop
+.end:
+ addps m0, m1
+ movhlps m2, m0
+ addps m0, m2
+ movss m1, m0
+ shufps m0, m0, 1
+ addss m0, m1
+%if ARCH_X86_64 == 0
+ movd r0m, m0
+ fld dword r0m
+%endif
+ RET
diff --git a/libavcodec/x86/sbrdsp_init.c b/libavcodec/x86/sbrdsp_init.c
new file mode 100644
index 0000000..313f492
--- /dev/null
+++ b/libavcodec/x86/sbrdsp_init.c
@@ -0,0 +1,37 @@
+/*
+ * AAC Spectral Band Replication decoding functions
+ * Copyright (c) 2012 Christophe Gisquet <christophe.gisquet at gmail.com>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/cpu.h"
+#include "libavcodec/sbrdsp.h"
+
+float ff_sbr_sum_square_sse(float (*x)[2], int n);
+
+void ff_sbrdsp_init_x86(SBRDSPContext *s)
+{
+ if (HAVE_YASM) {
+ int mm_flags = av_get_cpu_flags();
+
+ if (mm_flags & AV_CPU_FLAG_SSE) {
+ s->sum_square = ff_sbr_sum_square_sse;
+ }
+ }
+}
More information about the ffmpeg-cvslog
mailing list