From c3390fd56cf55259ea7665ecea6c8aeddf56e2fc Mon Sep 17 00:00:00 2001 From: Christophe Gisquet Date: Sun, 4 Mar 2012 13:28:16 +0100 Subject: [PATCH] ra144: use scalarproduct_int16 The buffer holding the coefficients must be padded with 0 so as to use DSP functions that may overread. Currently, the SSE2/3 versions is an example, as they process batches of 16 bytes. Signed-off-by: Michael Niedermayer --- libavcodec/ra144.c | 14 +++++--------- libavcodec/ra144.h | 6 +++++- libavcodec/ra144dec.c | 3 +++ libavcodec/ra144enc.c | 7 ++++--- 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/libavcodec/ra144.c b/libavcodec/ra144.c index fe9a5bcb3c..992972182f 100644 --- a/libavcodec/ra144.c +++ b/libavcodec/ra144.c @@ -1681,12 +1681,9 @@ unsigned int ff_rescale_rms(unsigned int rms, unsigned int energy) } /** inverse root mean square */ -int ff_irms(const int16_t *data) +int ff_irms(DSPContext *dsp, const int16_t *data) { - unsigned int i, sum = 0; - - for (i=0; i < BLOCKSIZE; i++) - sum += data[i] * data[i]; + unsigned int sum = dsp->scalarproduct_int16(data, data, BLOCKSIZE); if (sum == 0) return 0; /* OOPS - division by zero */ @@ -1698,14 +1695,13 @@ void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs, int cba_idx, int cb1_idx, int cb2_idx, int gval, int gain) { - int16_t buffer_a[BLOCKSIZE]; int16_t *block; int m[3]; if (cba_idx) { cba_idx += BLOCKSIZE/2 - 1; - ff_copy_and_dup(buffer_a, ractx->adapt_cb, cba_idx); - m[0] = (ff_irms(buffer_a) * gval) >> 12; + ff_copy_and_dup(ractx->buffer_a, ractx->adapt_cb, cba_idx); + m[0] = (ff_irms(&ractx->dsp, ractx->buffer_a) * gval) >> 12; } else { m[0] = 0; } @@ -1716,7 +1712,7 @@ void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs, block = ractx->adapt_cb + BUFFERSIZE - BLOCKSIZE; - add_wav(block, gain, cba_idx, m, cba_idx? buffer_a: NULL, + add_wav(block, gain, cba_idx, m, cba_idx? ractx->buffer_a: NULL, ff_cb1_vects[cb1_idx], ff_cb2_vects[cb2_idx]); memcpy(ractx->curr_sblock, ractx->curr_sblock + BLOCKSIZE, diff --git a/libavcodec/ra144.h b/libavcodec/ra144.h index 763495dce7..c2ee59b2dc 100644 --- a/libavcodec/ra144.h +++ b/libavcodec/ra144.h @@ -25,6 +25,7 @@ #include #include "lpc.h" #include "audio_frame_queue.h" +#include "dsputil.h" #define NBLOCKS 4 ///< number of subblocks within a block #define BLOCKSIZE 40 ///< subblock size in 16-bit words @@ -35,6 +36,7 @@ typedef struct RA144Context { AVCodecContext *avctx; + DSPContext dsp; LPCContext lpc_ctx; AudioFrameQueue afq; int last_frame; @@ -57,6 +59,8 @@ typedef struct RA144Context { /** Adaptive codebook, its size is two units bigger to avoid a * buffer overflow. */ int16_t adapt_cb[146+2]; + + DECLARE_ALIGNED(16, int16_t, buffer_a)[FFALIGN(BLOCKSIZE,16)]; } RA144Context; void ff_copy_and_dup(int16_t *target, const int16_t *source, int offset); @@ -68,7 +72,7 @@ unsigned int ff_rms(const int *data); int ff_interp(RA144Context *ractx, int16_t *out, int a, int copyold, int energy); unsigned int ff_rescale_rms(unsigned int rms, unsigned int energy); -int ff_irms(const int16_t *data); +int ff_irms(DSPContext *dsp, const int16_t *data/*align 16*/); void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs, int cba_idx, int cb1_idx, int cb2_idx, int gval, int gain); diff --git a/libavcodec/ra144dec.c b/libavcodec/ra144dec.c index b7add7f365..03ab9f58c4 100644 --- a/libavcodec/ra144dec.c +++ b/libavcodec/ra144dec.c @@ -34,10 +34,13 @@ static av_cold int ra144_decode_init(AVCodecContext * avctx) RA144Context *ractx = avctx->priv_data; ractx->avctx = avctx; + ff_dsputil_init(&ractx->dsp, avctx); ractx->lpc_coef[0] = ractx->lpc_tables[0]; ractx->lpc_coef[1] = ractx->lpc_tables[1]; + AV_ZERO128(ractx->buffer_a+BLOCKSIZE); + avctx->channels = 1; avctx->channel_layout = AV_CH_LAYOUT_MONO; avctx->sample_fmt = AV_SAMPLE_FMT_S16; diff --git a/libavcodec/ra144enc.c b/libavcodec/ra144enc.c index 3558254e56..71f206fda3 100644 --- a/libavcodec/ra144enc.c +++ b/libavcodec/ra144enc.c @@ -60,7 +60,9 @@ static av_cold int ra144_encode_init(AVCodecContext * avctx) ractx = avctx->priv_data; ractx->lpc_coef[0] = ractx->lpc_tables[0]; ractx->lpc_coef[1] = ractx->lpc_tables[1]; + AV_ZERO128(ractx->buffer_a+BLOCKSIZE); ractx->avctx = avctx; + ff_dsputil_init(&ractx->dsp, avctx); ret = ff_lpc_init(&ractx->lpc_ctx, avctx->frame_size, LPC_ORDER, FF_LPC_TYPE_LEVINSON); if (ret < 0) @@ -334,7 +336,6 @@ static void ra144_encode_subblock(RA144Context *ractx, float data[BLOCKSIZE] = { 0 }, work[LPC_ORDER + BLOCKSIZE]; float coefs[LPC_ORDER]; float zero[BLOCKSIZE], cba[BLOCKSIZE], cb1[BLOCKSIZE], cb2[BLOCKSIZE]; - int16_t cba_vect[BLOCKSIZE]; int cba_idx, cb1_idx, cb2_idx, gain; int i, n; unsigned m[3]; @@ -373,8 +374,8 @@ static void ra144_encode_subblock(RA144Context *ractx, */ memcpy(cba, work + LPC_ORDER, sizeof(cba)); - ff_copy_and_dup(cba_vect, ractx->adapt_cb, cba_idx + BLOCKSIZE / 2 - 1); - m[0] = (ff_irms(cba_vect) * rms) >> 12; + ff_copy_and_dup(ractx->buffer_a, ractx->adapt_cb, cba_idx + BLOCKSIZE / 2 - 1); + m[0] = (ff_irms(&ractx->dsp, ractx->buffer_a) * rms) >> 12; } fixed_cb_search(work + LPC_ORDER, coefs, data, cba_idx, &cb1_idx, &cb2_idx); for (i = 0; i < BLOCKSIZE; i++) {