DCA: simplify lfe_interpolation_fir()

This reorders the lfe_fir tables, and drops the mirrored half, such that the loops in lfe_interpolation_fir() can be simplified. The new loop structure should be easier to implement with SIMD. Static data size is reduced by 2kB. 3% faster on Cortex-A8. Originally committed as revision 22849 to svn://svn.ffmpeg.org/ffmpeg/trunk
2025-01-13 21:28:01 +02:00 · 2010-04-12 11:14:55 +00:00 · 2010-04-12 11:14:55 +00:00 · 766fefe8e2
commit 766fefe8e2
parent b92d483bac
2 changed files with 211 additions and 1037 deletions
--- a/libavcodec/dca.c
+++ b/libavcodec/dca.c
@ -802,28 +802,37 @@ static void lfe_interpolation_fir(int decimation_select,
    int decifactor, k, j;
    const float *prCoeff;
    int interp_index = 0;       /* Index to the interpolated samples */
    int deciindex;
    /* Select decimation filter */
    if (decimation_select == 1) {
-        decifactor = 128;
+        decifactor = 64;
        prCoeff = lfe_fir_128;
    } else {
-        decifactor = 64;
+        decifactor = 32;
        prCoeff = lfe_fir_64;
    }
    /* Interpolation */
    for (deciindex = 0; deciindex < num_deci_sample; deciindex++) {
-        /* One decimated sample generates decifactor interpolated ones */
+        float *samples_out2 = samples_out + decifactor;
        const float *cf0 = prCoeff;
        const float *cf1 = prCoeff + 256;
        /* One decimated sample generates 2*decifactor interpolated ones */
        for (k = 0; k < decifactor; k++) {
-            float rTmp = 0.0;
+            float v0 = 0.0;
-            //FIXME the coeffs are symetric, fix that
+            float v1 = 0.0;
-            for (j = 0; j < 512 / decifactor; j++)
+            for (j = 0; j < 256 / decifactor; j++) {
-                rTmp += samples_in[deciindex - j] * prCoeff[k + j * decifactor];
+                float s = samples_in[-j];
-            samples_out[interp_index++] = (rTmp * scale) + bias;
+                v0 += s * *cf0++;
                v1 += s * *--cf1;
            }
            *samples_out++  = (v0 * scale) + bias;
            *samples_out2++ = (v1 * scale) + bias;
        }
        samples_in++;
        samples_out += decifactor;
    }
 }
--- a/libavcodec/dcadata.h
+++ b/libavcodec/dcadata.h