[FFmpeg-devel] [PATCH 01/11] dcadsp: add int8x8_fmul_int32 to dsp context

Thu Feb 6 01:41:52 CET 2014

It is currently declared as a macro who is set to inlinable functions,
among which a Neon implementation.

Add a DSP parameter to the macro, so that the implementation can either
be an inline function, or a call to the function found in the context,
which is the default.

On an Arrandale CPU, gain for an SSE2 function of that inlining vs. a call:
- Win32: 29 to 26 cycles
- Win64: 25 to 23 cycles
---
 libavcodec/arm/dca.h |  5 +++--
 libavcodec/dcadec.c  | 16 +++-------------
 libavcodec/dcadsp.c  |  9 +++++++++
 libavcodec/dcadsp.h  |  4 ++++
 4 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/libavcodec/arm/dca.h b/libavcodec/arm/dca.h
index 35971a8..94d008e 100644
--- a/libavcodec/arm/dca.h
+++ b/libavcodec/arm/dca.h
@@ -80,8 +80,8 @@ static inline int decode_blockcodes(int code1, int code2, int levels,
 
 #if HAVE_NEON_INLINE && HAVE_ASM_MOD_Y
 
-#define int8x8_fmul_int32 int8x8_fmul_int32
-static inline void int8x8_fmul_int32(float *dst, const int8_t *src, int scale)
+#undef  int8x8_fmul_int32
+static void int8x8_fmul_int32(float *dst, const int8_t *src, int scale)
 {
     __asm__ ("vcvt.f32.s32 %2,  %2,  #4         \n"
              "vld1.8       {d0},     [%1,:64]   \n"
@@ -97,6 +97,7 @@ static inline void int8x8_fmul_int32(float *dst, const int8_t *src, int scale)
              : "r"(src), "x"(scale)
              : "d0", "d1", "d2", "d3");
 }
+#define int8x8_fmul_int32(dsp) int8x8_fmul_int32
 
 #endif
 
diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c
index 9f7ee54..2e1f941 100644
--- a/libavcodec/dcadec.c
+++ b/libavcodec/dcadec.c
@@ -1246,16 +1246,6 @@ static int decode_blockcodes(int code1, int code2, int levels, int32_t *values)
 static const uint8_t abits_sizes[7]  = { 7, 10, 12, 13, 15, 17, 19 };
 static const uint8_t abits_levels[7] = { 3,  5,  7,  9, 13, 17, 25 };
 
-#ifndef int8x8_fmul_int32
-static inline void int8x8_fmul_int32(float *dst, const int8_t *src, int scale)
-{
-    float fscale = scale / 16.0;
-    int i;
-    for (i = 0; i < 8; i++)
-        dst[i] = src[i] * fscale;
-}
-#endif
-
 static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
 {
     int k, l;
@@ -1380,9 +1370,9 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
                 s->debug_flag |= 0x01;
             }
 
-            int8x8_fmul_int32(subband_samples[k][l],
-                              &high_freq_vq[hfvq][subsubframe * 8],
-                              s->scale_factor[k][l][0]);
+            int8x8_fmul_int32(s->dcadsp)(subband_samples[k][l],
+                                         &high_freq_vq[hfvq][subsubframe * 8],
+                                         s->scale_factor[k][l][0]);
         }
     }
 
diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c
index abeba24..a82548f 100644
--- a/libavcodec/dcadsp.c
+++ b/libavcodec/dcadsp.c
@@ -24,6 +24,14 @@
 #include "libavutil/intreadwrite.h"
 #include "dcadsp.h"
 
+static void int8x8_fmul_int32_c(float *dst, const int8_t *src, int scale)
+{
+    float fscale = scale / 16.0;
+    int i;
+    for (i = 0; i < 8; i++)
+        dst[i] = src[i] * fscale;
+}
+
 static void dca_lfe_fir_c(float *out, const float *in, const float *coefs,
                           int decifactor, float scale)
 {
@@ -78,5 +86,6 @@ av_cold void ff_dcadsp_init(DCADSPContext *s)
 {
     s->lfe_fir = dca_lfe_fir_c;
     s->qmf_32_subbands = dca_qmf_32_subbands;
+    s->int8x8_fmul_int32 = int8x8_fmul_int32_c;
     if (ARCH_ARM) ff_dcadsp_init_arm(s);
 }
diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h
index d86c1f3..aca7334 100644
--- a/libavcodec/dcadsp.h
+++ b/libavcodec/dcadsp.h
@@ -31,8 +31,12 @@ typedef struct DCADSPContext {
                             int *synth_buf_offset, float synth_buf2[32],
                             const float window[512], float *samples_out,
                             float raXin[32], float scale);
+    void (*int8x8_fmul_int32)(float *dst, const int8_t *src, int scale);
 } DCADSPContext;
 
+/** Default define to allow switching from inlinable function to dsp */
+#define int8x8_fmul_int32(dsp) dsp.int8x8_fmul_int32
+
 void ff_dcadsp_init(DCADSPContext *s);
 void ff_dcadsp_init_arm(DCADSPContext *s);
 
-- 
1.8.0.msysgit.0