From eb1c687b1b3ab7963ba6c527d275221ed55553ed Mon Sep 17 00:00:00 2001
From: Fabrice Bellard <fabrice@bellard.org>
Date: Sat, 3 Nov 2007 14:34:25 +0000
Subject: [PATCH] Use the ffmpeg mdct function, patch by Fabrice Bellard.
 Thread: [FFmpeg-devel] NellyMoser transform bug, 10/25/2007 12:24 PM

Originally committed as revision 10909 to svn://svn.ffmpeg.org/ffmpeg/trunk
---
 libavcodec/nellymoserdec.c | 119 ++++++++-----------------------------
 1 file changed, 26 insertions(+), 93 deletions(-)

diff --git a/libavcodec/nellymoserdec.c b/libavcodec/nellymoserdec.c
index eb7b0fd632..0450c5d885 100644
--- a/libavcodec/nellymoserdec.c
+++ b/libavcodec/nellymoserdec.c
@@ -46,7 +46,7 @@
 #define NELLY_BIT_CAP     6
 #define NELLY_BASE_OFF    4228
 #define NELLY_BASE_SHIFT  19
-#define NELLY_SAMPLES     256
+#define NELLY_SAMPLES     (2 * NELLY_BUF_LEN)
 
 static const float dequantization_table[127] = {
 0.0000000000,-0.8472560048, 0.7224709988, -1.5247479677, -0.4531480074, 0.3753609955, 1.4717899561,
@@ -98,14 +98,12 @@ typedef struct NellyMoserDecodeContext {
     int             add_bias;
     int             scale_bias;
     DSPContext      dsp;
-    FFTContext      fftc;
+    MDCTContext     imdct_ctx;
+    DECLARE_ALIGNED_16(float,imdct_tmp[NELLY_BUF_LEN]);
+    DECLARE_ALIGNED_16(float,imdct_out[NELLY_BUF_LEN * 2]);
 } NellyMoserDecodeContext;
 
-
 DECLARE_ALIGNED_16(float,sine_window[128]);
-DECLARE_ALIGNED_16(float,tcos[64]);
-DECLARE_ALIGNED_16(float,tsin[64]);
-DECLARE_ALIGNED_16(float,cos_tab[64]);
 
 static inline int signed_shift(int i, int shift) {
     if (shift > 0)
@@ -113,69 +111,6 @@ static inline int signed_shift(int i, int shift) {
     return i >> -shift;
 }
 
-static void antialias(float *buf, float *audio)
-{
-    int i, end, mid_hi, mid_lo;
-
-    end = NELLY_BUF_LEN-1;
-    mid_hi = NELLY_BUF_LEN/2;
-    mid_lo = mid_hi-1;
-
-    for (i = 0; i < NELLY_BUF_LEN/4; i++) {
-        audio[2*i]      =   buf[2*i      ]*tcos[i       ] - buf[end-2*i]*tsin[i];
-        audio[2*i+1]    = -(buf[end-2*i  ]*tcos[i       ] + buf[2*i    ]*tsin[i]);
-        audio[end-2*i-1]=   buf[end-2*i-1]*tcos[mid_lo-i] - buf[2*i+1  ]*tsin[mid_lo-i];
-        audio[end-2*i  ]= -(buf[2*i+1    ]*tcos[mid_lo-i] + buf[end-2*i]*tsin[mid_lo-i]);
-    }
-}
-
-static void complex2signal(float *audio)
-{
-    int i, end, mid_hi, mid_lo;
-    float *aptr, *sigptr, a, b, c, d, e, f, g;
-
-    end = NELLY_BUF_LEN-1;
-    mid_hi = NELLY_BUF_LEN/2;
-    mid_lo = mid_hi-1;
-
-    a = -audio[end];
-    b = audio[end-1];
-    c = -audio[1];
-    d = cos_tab[0];
-    e = audio[0];
-    f = cos_tab[mid_lo];
-    g = cos_tab[1];
-
-    audio[0] = d*e;
-    audio[1] = b*g-a*f;
-    audio[end-1] = a*g+b*f;
-    audio[end] = c*(-d);
-
-    aptr = audio+end-2;
-    sigptr = cos_tab+mid_hi-1;
-
-    for (i = 3; i < NELLY_BUF_LEN/2; i += 2) {
-        a = audio[i-1];
-        b = -audio[i];
-        c = cos_tab[i/2];
-        d = *sigptr;
-        e = *(aptr-1);
-        f = -(*aptr);
-
-        audio[i-1] = a*c+b*d;
-        *aptr = a*d-b*c;
-
-        a = cos_tab[(i/2)+1];
-        b = *(sigptr-1);
-
-        *(aptr-1) = b*e+a*f;
-        audio[i] = a*e-b*f;
-
-        sigptr--;
-        aptr -= 2;
-    }
-}
-
 static void overlap_and_window(NellyMoserDecodeContext *s, float *state, float *audio)
 {
     int bot, mid_up, mid_down, top;
@@ -229,7 +164,7 @@ static int headroom(int *la)
 }
 
 
-static void get_sample_bits(float *buf, int *bits)
+static void get_sample_bits(const float *buf, int *bits)
 {
     int i, j;
     short sbuf[128];
@@ -342,10 +277,11 @@ static void get_sample_bits(float *buf, int *bits)
 void nelly_decode_block(NellyMoserDecodeContext *s, unsigned char block[NELLY_BLOCK_LEN], float audio[NELLY_SAMPLES])
 {
     int i,j;
-    float buf[NELLY_BUF_LEN], pows[NELLY_BUF_LEN];
+    float buf[NELLY_FILL_LEN], pows[NELLY_FILL_LEN];
     float *aptr, *bptr, *pptr, val, pval;
     int bits[NELLY_BUF_LEN];
     unsigned char v;
+    float a;
 
     init_get_bits(&s->gb, block, NELLY_BLOCK_LEN * 8);
 
@@ -363,31 +299,36 @@ void nelly_decode_block(NellyMoserDecodeContext *s, unsigned char block[NELLY_BL
 
     }
 
-    memset(&buf[NELLY_FILL_LEN],0,4*sizeof(float));
-    memset(&pows[NELLY_FILL_LEN],0,4*sizeof(float));
-
     get_sample_bits(buf, bits);
 
     for (i = 0; i < 2; i++) {
-        aptr = audio+i*128;
+        aptr = audio + i * NELLY_BUF_LEN;
+
         init_get_bits(&s->gb, block, NELLY_BLOCK_LEN * 8);
         skip_bits(&s->gb, NELLY_HEADER_BITS + i*NELLY_DETAIL_BITS);
 
         for (j = 0; j < NELLY_FILL_LEN; j++) {
             if (bits[j] <= 0) {
-                buf[j] = M_SQRT1_2*pows[j];
+                aptr[j] = M_SQRT1_2*pows[j];
                 if (av_random(&s->random_state) & 1)
-                    buf[j] *= -1.0;
+                    aptr[j] *= -1.0;
             } else {
                 v = get_bits(&s->gb, bits[j]);
-                buf[j] = dequantization_table[(1<<bits[j])-1+v]*pows[j];
+                aptr[j] = dequantization_table[(1<<bits[j])-1+v]*pows[j];
             }
         }
+        memset(&aptr[NELLY_FILL_LEN], 0,
+               (NELLY_BUF_LEN - NELLY_FILL_LEN) * sizeof(float));
 
-        antialias(buf, aptr);
-        ff_fft_permute(&s->fftc, (FFTComplex*)aptr);
-        ff_fft_calc(&s->fftc, (FFTComplex*)aptr);
-        complex2signal(aptr);
+        s->imdct_ctx.fft.imdct_calc(&s->imdct_ctx, s->imdct_out,
+                                    aptr, s->imdct_tmp);
+        /* XXX: overlapping and windowing should be part of a more
+           generic imdct function */
+        a = 1.0 / 8.0;
+        for(j = 0; j < NELLY_BUF_LEN / 2; j++) {
+            aptr[j] = s->imdct_out[j + NELLY_BUF_LEN + NELLY_BUF_LEN / 2] * a;
+            aptr[j + NELLY_BUF_LEN / 2] = -s->imdct_out[j] * a;
+        }
         overlap_and_window(s, s->state, aptr);
     }
 }
@@ -399,7 +340,8 @@ static int decode_init(AVCodecContext * avctx) {
 
     s->avctx = avctx;
     av_init_random(0, &s->random_state);
-    ff_fft_init(&s->fftc, 6, 1);
+    ff_mdct_init(&s->imdct_ctx, 8, 1);
+
     dsputil_init(&s->dsp, avctx);
 
     if(s->dsp.float_to_int16 == ff_float_to_int16_c) {
@@ -416,15 +358,6 @@ static int decode_init(AVCodecContext * avctx) {
             sine_window[i] = sin((i + 0.5) / 256.0 * M_PI);
         }
 
-    /* Generate tables */
-    if (!tcos[0])
-        for(i=0;i<64;i++) {
-            alpha = 2*M_PI * (i + 1.0 / 4.0) / 256;
-            tcos[i] = cos(alpha);
-            tsin[i] = -sin(alpha);
-            cos_tab[i] = cos(i/128.0*M_PI)/8.0;
-        }
-
     return 0;
 }
 
@@ -464,7 +397,7 @@ static int decode_tag(AVCodecContext * avctx,
 static int decode_end(AVCodecContext * avctx) {
     NellyMoserDecodeContext *s = avctx->priv_data;
 
-    ff_fft_end(&s->fftc);
+    ff_mdct_end(&s->imdct_ctx);
     return 0;
 }