[FFmpeg-devel] [PATCH/RFC] Add some dsputil functions useful for AAC decoder

Sat Sep 19 00:11:55 CEST 2009

This patch adds a few dsputil functions that can be used in the AAC
decoder.

With trivial NEON versions of these functions, the AAC decoder gets
~1.6x faster on Cortex-A8, and better NEON code will push that even
further.

I will readily admit that some of the names in this patch are rubbish,
so please suggest something better.  Other enhancements are obviously
welcome too.

---
 libavcodec/dsputil.c |   83 ++++++++++++++++++++++++++++++++++++++++++++++++++
 libavcodec/dsputil.h |    8 +++++
 2 files changed, 91 insertions(+), 0 deletions(-)

diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index e1f2eda..f889487 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -4087,6 +4087,79 @@ void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, c
     }
 }
 
+static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
+                                 int len)
+{
+    int i;
+    for (i = 0; i < len; i++)
+        dst[i] = src[i] * mul;
+}
+
+static void vector_fmul_scalar_vp_2_c(float *dst, const float *src,
+                                      const float **vp, float mul, int len)
+{
+    int i;
+    for (i = 0; i < len; i += 2, vp++) {
+        dst[i  ] = src[i  ] * vp[0][0] * mul;
+        dst[i+1] = src[i+1] * vp[0][1] * mul;
+    }
+}
+
+static void vector_fmul_scalar_vp_4_c(float *dst, const float *src,
+                                      const float **vp, float mul, int len)
+{
+    int i;
+    for (i = 0; i < len; i += 4, vp++) {
+        dst[i  ] = src[i  ] * vp[0][0] * mul;
+        dst[i+1] = src[i+1] * vp[0][1] * mul;
+        dst[i+2] = src[i+2] * vp[0][2] * mul;
+        dst[i+3] = src[i+3] * vp[0][3] * mul;
+    }
+}
+
+static void vp_fmul_scalar_2_c(float *dst, const float **vp, float mul,
+                               int len)
+{
+    int i;
+    for (i = 0; i < len; i += 2, vp++) {
+        dst[i  ] = vp[0][0] * mul;
+        dst[i+1] = vp[0][1] * mul;
+    }
+}
+
+static void vp_fmul_scalar_4_c(float *dst, const float **vp, float mul,
+                               int len)
+{
+    int i;
+    for (i = 0; i < len; i += 4, vp++) {
+        dst[i  ] = vp[0][0] * mul;
+        dst[i+1] = vp[0][1] * mul;
+        dst[i+2] = vp[0][2] * mul;
+        dst[i+3] = vp[0][3] * mul;
+    }
+}
+
+static void butterflies_float_c(float *v1, float *v2, int len)
+{
+    int i;
+    for (i = 0; i < len; i++) {
+        float t = v1[i] - v2[i];
+        v1[i] += v2[i];
+        v2[i] = t;
+    }
+}
+
+static float scalarproduct_float_c(const float *v1, const float *v2, int len)
+{
+    float p = 0.0;
+    int i;
+
+    for (i = 0; i < len; i++)
+        p += v1[i] * v2[i];
+
+    return p;
+}
+
 static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul, int len){
     int i;
     for(i=0; i<len; i++)
@@ -4720,6 +4793,16 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     c->add_int16 = add_int16_c;
     c->sub_int16 = sub_int16_c;
     c->scalarproduct_int16 = scalarproduct_int16_c;
+    c->scalarproduct_float = scalarproduct_float_c;
+    c->butterflies_float = butterflies_float_c;
+
+    c->vector_fmul_scalar = vector_fmul_scalar_c;
+
+    c->vector_fmul_scalar_vp[0] = vector_fmul_scalar_vp_2_c;
+    c->vector_fmul_scalar_vp[1] = vector_fmul_scalar_vp_4_c;
+
+    c->vp_fmul_scalar[0] = vp_fmul_scalar_2_c;
+    c->vp_fmul_scalar[1] = vp_fmul_scalar_4_c;
 
     c->shrink[0]= ff_img_copy_plane;
     c->shrink[1]= ff_shrink22;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index d9d7d16..61252f5 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -397,6 +397,14 @@ typedef struct DSPContext {
     /* assume len is a multiple of 8, and arrays are 16-byte aligned */
     void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len);
     void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
+    void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
+                               int len);
+    void (*vector_fmul_scalar_vp[2])(float *dst, const float *src,
+                                     const float **vp, float mul, int len);
+    void (*vp_fmul_scalar[2])(float *dst, const float **vp,
+                              float mul, int len);
+    float (*scalarproduct_float)(const float *v1, const float *v2, int len);
+    void (*butterflies_float)(float *v1, float *v2, int len);
 
     /* C version: convert floats from the range [384.0,386.0] to ints in [-32768,32767]
      * simd versions: convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */
-- 
1.6.4.3