[FFmpeg-devel] [PATCH 1/5] ra144: use scalarproduct_int16
Christophe Gisquet
christophe.gisquet at gmail.com
Fri May 10 11:32:54 CEST 2013
The buffer holding the coefficients must be padded with 0 so as to use DSP
functions that may overread. Currently, the SSE2/3 versions is an example,
as they process batches of 16 bytes.
---
libavcodec/ra144.c | 14 +++++---------
libavcodec/ra144.h | 6 +++++-
libavcodec/ra144dec.c | 3 +++
libavcodec/ra144enc.c | 6 ++++--
4 files changed, 17 insertions(+), 12 deletions(-)
diff --git a/libavcodec/ra144.c b/libavcodec/ra144.c
index fe9a5bc..9929721 100644
--- a/libavcodec/ra144.c
+++ b/libavcodec/ra144.c
@@ -1681,12 +1681,9 @@ unsigned int ff_rescale_rms(unsigned int rms, unsigned int energy)
}
/** inverse root mean square */
-int ff_irms(const int16_t *data)
+int ff_irms(DSPContext *dsp, const int16_t *data)
{
- unsigned int i, sum = 0;
-
- for (i=0; i < BLOCKSIZE; i++)
- sum += data[i] * data[i];
+ unsigned int sum = dsp->scalarproduct_int16(data, data, BLOCKSIZE);
if (sum == 0)
return 0; /* OOPS - division by zero */
@@ -1698,14 +1695,13 @@ void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs,
int cba_idx, int cb1_idx, int cb2_idx,
int gval, int gain)
{
- int16_t buffer_a[BLOCKSIZE];
int16_t *block;
int m[3];
if (cba_idx) {
cba_idx += BLOCKSIZE/2 - 1;
- ff_copy_and_dup(buffer_a, ractx->adapt_cb, cba_idx);
- m[0] = (ff_irms(buffer_a) * gval) >> 12;
+ ff_copy_and_dup(ractx->buffer_a, ractx->adapt_cb, cba_idx);
+ m[0] = (ff_irms(&ractx->dsp, ractx->buffer_a) * gval) >> 12;
} else {
m[0] = 0;
}
@@ -1716,7 +1712,7 @@ void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs,
block = ractx->adapt_cb + BUFFERSIZE - BLOCKSIZE;
- add_wav(block, gain, cba_idx, m, cba_idx? buffer_a: NULL,
+ add_wav(block, gain, cba_idx, m, cba_idx? ractx->buffer_a: NULL,
ff_cb1_vects[cb1_idx], ff_cb2_vects[cb2_idx]);
memcpy(ractx->curr_sblock, ractx->curr_sblock + BLOCKSIZE,
diff --git a/libavcodec/ra144.h b/libavcodec/ra144.h
index 426fea3..2dd7ec2 100644
--- a/libavcodec/ra144.h
+++ b/libavcodec/ra144.h
@@ -25,6 +25,7 @@
#include <stdint.h>
#include "lpc.h"
#include "audio_frame_queue.h"
+#include "dsputil.h"
#define NBLOCKS 4 ///< number of subblocks within a block
#define BLOCKSIZE 40 ///< subblock size in 16-bit words
@@ -35,6 +36,7 @@
typedef struct RA144Context {
AVCodecContext *avctx;
+ DSPContext dsp;
LPCContext lpc_ctx;
AudioFrameQueue afq;
int last_frame;
@@ -57,6 +59,8 @@ typedef struct RA144Context {
/** Adaptive codebook, its size is two units bigger to avoid a
* buffer overflow. */
int16_t adapt_cb[146+2];
+
+ DECLARE_ALIGNED(16, int16_t, buffer_a)[FFALIGN(BLOCKSIZE,16)];
} RA144Context;
void ff_copy_and_dup(int16_t *target, const int16_t *source, int offset);
@@ -68,7 +72,7 @@ unsigned int ff_rms(const int *data);
int ff_interp(RA144Context *ractx, int16_t *out, int a, int copyold,
int energy);
unsigned int ff_rescale_rms(unsigned int rms, unsigned int energy);
-int ff_irms(const int16_t *data);
+int ff_irms(DSPContext *dsp, const int16_t *data/*align 16*/);
void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs,
int cba_idx, int cb1_idx, int cb2_idx,
int gval, int gain);
diff --git a/libavcodec/ra144dec.c b/libavcodec/ra144dec.c
index 63f77e0..748eba1 100644
--- a/libavcodec/ra144dec.c
+++ b/libavcodec/ra144dec.c
@@ -34,9 +34,12 @@ static av_cold int ra144_decode_init(AVCodecContext * avctx)
RA144Context *ractx = avctx->priv_data;
ractx->avctx = avctx;
+ ff_dsputil_init(&ractx->dsp, avctx);
ractx->lpc_coef[0] = ractx->lpc_tables[0];
ractx->lpc_coef[1] = ractx->lpc_tables[1];
+
+ AV_ZERO128(ractx->buffer_a+BLOCKSIZE);
avctx->channels = 1;
avctx->channel_layout = AV_CH_LAYOUT_MONO;
diff --git a/libavcodec/ra144enc.c b/libavcodec/ra144enc.c
index 2eac343..cbbde02 100644
--- a/libavcodec/ra144enc.c
+++ b/libavcodec/ra144enc.c
@@ -60,7 +60,9 @@ static av_cold int ra144_encode_init(AVCodecContext * avctx)
ractx = avctx->priv_data;
ractx->lpc_coef[0] = ractx->lpc_tables[0];
ractx->lpc_coef[1] = ractx->lpc_tables[1];
+ AV_ZERO128(ractx->buffer_a+BLOCKSIZE);
ractx->avctx = avctx;
+ ff_dsputil_init(&ractx->dsp, avctx);
ret = ff_lpc_init(&ractx->lpc_ctx, avctx->frame_size, LPC_ORDER,
FF_LPC_TYPE_LEVINSON);
if (ret < 0)
@@ -334,7 +336,7 @@ static void ra144_encode_subblock(RA144Context *ractx,
float data[BLOCKSIZE] = { 0 }, work[LPC_ORDER + BLOCKSIZE];
float coefs[LPC_ORDER];
float zero[BLOCKSIZE], cba[BLOCKSIZE], cb1[BLOCKSIZE], cb2[BLOCKSIZE];
- int16_t cba_vect[BLOCKSIZE];
+ LOCAL_ALIGNED(16, int16_t, cba_vect, [BLOCKSIZE]);
int cba_idx, cb1_idx, cb2_idx, gain;
int i, n;
unsigned m[3];
@@ -374,7 +376,7 @@ static void ra144_encode_subblock(RA144Context *ractx,
memcpy(cba, work + LPC_ORDER, sizeof(cba));
ff_copy_and_dup(cba_vect, ractx->adapt_cb, cba_idx + BLOCKSIZE / 2 - 1);
- m[0] = (ff_irms(cba_vect) * rms) >> 12;
+ m[0] = (ff_irms(&ractx->dsp, cba_vect) * rms) >> 12;
}
fixed_cb_search(work + LPC_ORDER, coefs, data, cba_idx, &cb1_idx, &cb2_idx);
for (i = 0; i < BLOCKSIZE; i++) {
--
1.8.0.msysgit.0
More information about the ffmpeg-devel
mailing list