[FFmpeg-devel] [PATCH 2/5] ra144: use scalarproduct_int16

Wed May 8 16:51:48 CEST 2013

---
 libavcodec/ra144.c    | 11 ++++-------
 libavcodec/ra144.h    |  4 +++-
 libavcodec/ra144dec.c |  1 +
 libavcodec/ra144enc.c |  5 +++--
 4 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/libavcodec/ra144.c b/libavcodec/ra144.c
index fe9a5bc..cde2e47 100644
--- a/libavcodec/ra144.c
+++ b/libavcodec/ra144.c
@@ -1681,12 +1681,9 @@ unsigned int ff_rescale_rms(unsigned int rms, unsigned int energy)
 }
 
 /** inverse root mean square */
-int ff_irms(const int16_t *data)
+int ff_irms(DSPContext *dsp, const int16_t *data)
 {
-    unsigned int i, sum = 0;
-
-    for (i=0; i < BLOCKSIZE; i++)
-        sum += data[i] * data[i];
+    unsigned int sum = dsp->scalarproduct_int16(data, data, BLOCKSIZE);
 
     if (sum == 0)
         return 0; /* OOPS - division by zero */
@@ -1698,14 +1695,14 @@ void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs,
                            int cba_idx, int cb1_idx, int cb2_idx,
                            int gval, int gain)
 {
-    int16_t buffer_a[BLOCKSIZE];
+    LOCAL_ALIGNED(16, int16_t, buffer_a, [BLOCKSIZE]);
     int16_t *block;
     int m[3];
 
     if (cba_idx) {
         cba_idx += BLOCKSIZE/2 - 1;
         ff_copy_and_dup(buffer_a, ractx->adapt_cb, cba_idx);
-        m[0] = (ff_irms(buffer_a) * gval) >> 12;
+        m[0] = (ff_irms(&ractx->dsp, buffer_a) * gval) >> 12;
     } else {
         m[0] = 0;
     }
diff --git a/libavcodec/ra144.h b/libavcodec/ra144.h
index 426fea3..73152e5 100644
--- a/libavcodec/ra144.h
+++ b/libavcodec/ra144.h
@@ -25,6 +25,7 @@
 #include <stdint.h>
 #include "lpc.h"
 #include "audio_frame_queue.h"
+#include "dsputil.h"
 
 #define NBLOCKS         4       ///< number of subblocks within a block
 #define BLOCKSIZE       40      ///< subblock size in 16-bit words
@@ -35,6 +36,7 @@
 
 typedef struct RA144Context {
     AVCodecContext *avctx;
+    DSPContext dsp;
     LPCContext lpc_ctx;
     AudioFrameQueue afq;
     int last_frame;
@@ -68,7 +70,7 @@ unsigned int ff_rms(const int *data);
 int ff_interp(RA144Context *ractx, int16_t *out, int a, int copyold,
               int energy);
 unsigned int ff_rescale_rms(unsigned int rms, unsigned int energy);
-int ff_irms(const int16_t *data);
+int ff_irms(DSPContext *dsp, const int16_t *data/*align 16*/);
 void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs,
                            int cba_idx, int cb1_idx, int cb2_idx,
                            int gval, int gain);
diff --git a/libavcodec/ra144dec.c b/libavcodec/ra144dec.c
index 63f77e0..d41c54f 100644
--- a/libavcodec/ra144dec.c
+++ b/libavcodec/ra144dec.c
@@ -34,6 +34,7 @@ static av_cold int ra144_decode_init(AVCodecContext * avctx)
     RA144Context *ractx = avctx->priv_data;
 
     ractx->avctx = avctx;
+    ff_dsputil_init(&ractx->dsp, avctx);
 
     ractx->lpc_coef[0] = ractx->lpc_tables[0];
     ractx->lpc_coef[1] = ractx->lpc_tables[1];
diff --git a/libavcodec/ra144enc.c b/libavcodec/ra144enc.c
index 2eac343..3fd7340 100644
--- a/libavcodec/ra144enc.c
+++ b/libavcodec/ra144enc.c
@@ -61,6 +61,7 @@ static av_cold int ra144_encode_init(AVCodecContext * avctx)
     ractx->lpc_coef[0] = ractx->lpc_tables[0];
     ractx->lpc_coef[1] = ractx->lpc_tables[1];
     ractx->avctx = avctx;
+    ff_dsputil_init(&ractx->dsp, avctx);
     ret = ff_lpc_init(&ractx->lpc_ctx, avctx->frame_size, LPC_ORDER,
                       FF_LPC_TYPE_LEVINSON);
     if (ret < 0)
@@ -334,7 +335,7 @@ static void ra144_encode_subblock(RA144Context *ractx,
     float data[BLOCKSIZE] = { 0 }, work[LPC_ORDER + BLOCKSIZE];
     float coefs[LPC_ORDER];
     float zero[BLOCKSIZE], cba[BLOCKSIZE], cb1[BLOCKSIZE], cb2[BLOCKSIZE];
-    int16_t cba_vect[BLOCKSIZE];
+    LOCAL_ALIGNED(16, int16_t, cba_vect, [BLOCKSIZE]);
     int cba_idx, cb1_idx, cb2_idx, gain;
     int i, n;
     unsigned m[3];
@@ -374,7 +375,7 @@ static void ra144_encode_subblock(RA144Context *ractx,
         memcpy(cba, work + LPC_ORDER, sizeof(cba));
 
         ff_copy_and_dup(cba_vect, ractx->adapt_cb, cba_idx + BLOCKSIZE / 2 - 1);
-        m[0] = (ff_irms(cba_vect) * rms) >> 12;
+        m[0] = (ff_irms(&ractx->dsp, cba_vect) * rms) >> 12;
     }
     fixed_cb_search(work + LPC_ORDER, coefs, data, cba_idx, &cb1_idx, &cb2_idx);
     for (i = 0; i < BLOCKSIZE; i++) {
-- 
1.8.0.msysgit.0