[FFmpeg-cvslog] SBR DSP x86: implement SSE sbr_sum_square_sse

Christophe GISQUET git at videolan.org
Sat Feb 25 04:28:15 CET 2012


ffmpeg | branch: master | Christophe GISQUET <christophe.gisquet at gmail.com> | Thu Feb 23 19:48:58 2012 +0100| [34454c761f01275d4adaf40df6d70a59011c4a6c] | committer: Ronald S. Bultje

SBR DSP x86: implement SSE sbr_sum_square_sse

The 32bits targets have been compiled with -mfpmath=sse for proper reference.
sbr_sum_square C  /32bits: 82c (unrolled)/102c
               C  /64bits: 69c (unrolled)/82c
               SSE/32bits: 42c
               SSE/64bits: 31c

Use of SSE4.1 dpps to perform the final sum is slower.
Not unrolling to perform 8 operations in a loop yields 10 more cycles.

Signed-off-by: Ronald S. Bultje <rsbultje at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=34454c761f01275d4adaf40df6d70a59011c4a6c
---

 libavcodec/sbrdsp.c          |    2 +
 libavcodec/sbrdsp.h          |    1 +
 libavcodec/x86/Makefile      |    2 +
 libavcodec/x86/sbrdsp.asm    |   74 ++++++++++++++++++++++++++++++++++++++++++
 libavcodec/x86/sbrdsp_init.c |   37 +++++++++++++++++++++
 5 files changed, 116 insertions(+), 0 deletions(-)

diff --git a/libavcodec/sbrdsp.c b/libavcodec/sbrdsp.c
index aef894a..f942759 100644
--- a/libavcodec/sbrdsp.c
+++ b/libavcodec/sbrdsp.c
@@ -238,4 +238,6 @@ av_cold void ff_sbrdsp_init(SBRDSPContext *s)
 
     if (ARCH_ARM)
         ff_sbrdsp_init_arm(s);
+    if (HAVE_MMX)
+        ff_sbrdsp_init_x86(s);
 }
diff --git a/libavcodec/sbrdsp.h b/libavcodec/sbrdsp.h
index ee5d5a0..fe91957 100644
--- a/libavcodec/sbrdsp.h
+++ b/libavcodec/sbrdsp.h
@@ -46,5 +46,6 @@ extern const float ff_sbr_noise_table[][2];
 
 void ff_sbrdsp_init(SBRDSPContext *s);
 void ff_sbrdsp_init_arm(SBRDSPContext *s);
+void ff_sbrdsp_init_x86(SBRDSPContext *s);
 
 #endif
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index fc88433..e64697a 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -47,6 +47,8 @@ YASM-OBJS-$(CONFIG_PNG_DECODER)        += x86/pngdsp.o
 MMX-OBJS-$(CONFIG_PNG_DECODER)         += x86/pngdsp-init.o
 YASM-OBJS-$(CONFIG_PRORES_DECODER)     += x86/proresdsp.o
 MMX-OBJS-$(CONFIG_PRORES_DECODER)      += x86/proresdsp-init.o
+MMX-OBJS-$(CONFIG_AAC_DECODER)         += x86/sbrdsp_init.o
+YASM-OBJS-$(CONFIG_AAC_DECODER)        += x86/sbrdsp.o
 MMX-OBJS-$(CONFIG_DWT)                 += x86/snowdsp_mmx.o
 MMX-OBJS-$(CONFIG_VC1_DECODER)         += x86/vc1dsp_mmx.o
 YASM-OBJS-$(CONFIG_VP3_DECODER)        += x86/vp3dsp.o
diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
new file mode 100644
index 0000000..71471bd
--- /dev/null
+++ b/libavcodec/x86/sbrdsp.asm
@@ -0,0 +1,74 @@
+;******************************************************************************
+;* AAC Spectral Band Replication decoding functions
+;* Copyright (C) 2012 Christophe Gisquet <christophe.gisquet at gmail.com>
+;*
+;* This file is part of Libav.
+;*
+;* Libav is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* Libav is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with Libav; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "x86inc.asm"
+%include "x86util.asm"
+
+;SECTION_RODATA
+SECTION .text
+
+INIT_XMM sse
+cglobal sbr_sum_square, 2, 3, 6
+    mov         r2, r1
+    xorps       m0, m0
+    xorps       m1, m1
+    sar         r2, 3
+    jz          .prepare
+.loop:
+    movu        m2, [r0 +  0]
+    movu        m3, [r0 + 16]
+    movu        m4, [r0 + 32]
+    movu        m5, [r0 + 48]
+    mulps       m2, m2
+    mulps       m3, m3
+    mulps       m4, m4
+    mulps       m5, m5
+    addps       m0, m2
+    addps       m1, m3
+    addps       m0, m4
+    addps       m1, m5
+    add         r0, 64
+    dec         r2
+    jnz         .loop
+.prepare:
+    and         r1, 7
+    sar         r1, 1
+    jz          .end
+; len is a multiple of 2, thus there are at least 4 elements to process
+.endloop:
+    movu        m2, [r0]
+    add         r0, 16
+    mulps       m2, m2
+    dec         r1
+    addps       m0, m2
+    jnz         .endloop
+.end:
+    addps       m0, m1
+    movhlps     m2, m0
+    addps       m0, m2
+    movss       m1, m0
+    shufps      m0, m0, 1
+    addss       m0, m1
+%if ARCH_X86_64 == 0
+    movd        r0m,  m0
+    fld         dword r0m
+%endif
+    RET
diff --git a/libavcodec/x86/sbrdsp_init.c b/libavcodec/x86/sbrdsp_init.c
new file mode 100644
index 0000000..313f492
--- /dev/null
+++ b/libavcodec/x86/sbrdsp_init.c
@@ -0,0 +1,37 @@
+/*
+ * AAC Spectral Band Replication decoding functions
+ * Copyright (c) 2012 Christophe Gisquet <christophe.gisquet at gmail.com>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/cpu.h"
+#include "libavcodec/sbrdsp.h"
+
+float ff_sbr_sum_square_sse(float (*x)[2], int n);
+
+void ff_sbrdsp_init_x86(SBRDSPContext *s)
+{
+    if (HAVE_YASM) {
+        int mm_flags = av_get_cpu_flags();
+
+        if (mm_flags & AV_CPU_FLAG_SSE) {
+            s->sum_square = ff_sbr_sum_square_sse;
+        }
+    }
+}



More information about the ffmpeg-cvslog mailing list