FFmpeg/libavcodec/flacdsp_lpc_template.c

/*
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include <stdint.h>
#include "libavutil/avutil.h"
#include "mathops.h"

#undef FUNC
#undef sum_type
#undef MUL
#undef CLIP
#undef FSUF

#define FUNC(n) AV_JOIN(n ## _, SAMPLE_SIZE)

#if SAMPLE_SIZE == 32
#   define sum_type  int64_t
#   define MUL(a, b) MUL64(a, b)
#   define CLIP(x) av_clipl_int32(x)
#else
#   define sum_type  int32_t
#   define MUL(a, b) ((a) * (b))
#   define CLIP(x) (x)
#endif

#define LPC1(x) {           \
    int c = coefs[(x)-1];   \
    p0   += MUL(c, s);      \
    s     = smp[i-(x)+1];   \
    p1   += MUL(c, s);      \
}

static av_always_inline void FUNC(lpc_encode_unrolled)(int32_t *res,
                                  const int32_t *smp, int len, int order,
                                  const int32_t *coefs, int shift, int big)
{
    int i;
    for (i = order; i < len; i += 2) {
        int s  = smp[i-order];
        sum_type p0 = 0, p1 = 0;
        if (big) {
            switch (order) {
            case 32: LPC1(32)
            case 31: LPC1(31)
            case 30: LPC1(30)
            case 29: LPC1(29)
            case 28: LPC1(28)
            case 27: LPC1(27)
            case 26: LPC1(26)
            case 25: LPC1(25)
            case 24: LPC1(24)
            case 23: LPC1(23)
            case 22: LPC1(22)
            case 21: LPC1(21)
            case 20: LPC1(20)
            case 19: LPC1(19)
            case 18: LPC1(18)
            case 17: LPC1(17)
            case 16: LPC1(16)
            case 15: LPC1(15)
            case 14: LPC1(14)
            case 13: LPC1(13)
            case 12: LPC1(12)
            case 11: LPC1(11)
            case 10: LPC1(10)
            case  9: LPC1( 9)
                     LPC1( 8)
                     LPC1( 7)
                     LPC1( 6)
                     LPC1( 5)
                     LPC1( 4)
                     LPC1( 3)
                     LPC1( 2)
                     LPC1( 1)
            }
        } else {
            switch (order) {
            case  8: LPC1( 8)
            case  7: LPC1( 7)
            case  6: LPC1( 6)
            case  5: LPC1( 5)
            case  4: LPC1( 4)
            case  3: LPC1( 3)
            case  2: LPC1( 2)
            case  1: LPC1( 1)
            }
        }
        res[i  ] = smp[i  ] - CLIP(p0 >> shift);
        res[i+1] = smp[i+1] - CLIP(p1 >> shift);
    }
}

static void FUNC(flac_lpc_encode_c)(int32_t *res, const int32_t *smp, int len,
                                    int order, const int32_t *coefs, int shift)
{
    int i;
    for (i = 0; i < order; i++)
        res[i] = smp[i];
#if CONFIG_SMALL
    for (i = order; i < len; i += 2) {
        int j;
        int s  = smp[i];
        sum_type p0 = 0, p1 = 0;
        for (j = 0; j < order; j++) {
            int c = coefs[j];
            p1   += MUL(c, s);
            s     = smp[i-j-1];
            p0   += MUL(c, s);
        }
        res[i  ] = smp[i  ] - CLIP(p0 >> shift);
        res[i+1] = smp[i+1] - CLIP(p1 >> shift);
    }
#else
    switch (order) {
    case  1: FUNC(lpc_encode_unrolled)(res, smp, len,     1, coefs, shift, 0); break;
    case  2: FUNC(lpc_encode_unrolled)(res, smp, len,     2, coefs, shift, 0); break;
    case  3: FUNC(lpc_encode_unrolled)(res, smp, len,     3, coefs, shift, 0); break;
    case  4: FUNC(lpc_encode_unrolled)(res, smp, len,     4, coefs, shift, 0); break;
    case  5: FUNC(lpc_encode_unrolled)(res, smp, len,     5, coefs, shift, 0); break;
    case  6: FUNC(lpc_encode_unrolled)(res, smp, len,     6, coefs, shift, 0); break;
    case  7: FUNC(lpc_encode_unrolled)(res, smp, len,     7, coefs, shift, 0); break;
    case  8: FUNC(lpc_encode_unrolled)(res, smp, len,     8, coefs, shift, 0); break;
    default: FUNC(lpc_encode_unrolled)(res, smp, len, order, coefs, shift, 1); break;
    }
#endif
}

/* Comment for clarity/de-obfuscation.
 *
 * for (int i = order; i < len; i++) {
 *     int32_t p = 0;
 *     for (int j = 0; j < order; j++) {
 *         int c = coefs[j];
 *         int s = smp[(i-1)-j];
 *         p    += c*s;
 *     }
 *     res[i] = smp[i] - (p >> shift);
 * }
 *
 * The CONFIG_SMALL code above simplifies to this, in the case of SAMPLE_SIZE
 * not being equal to 32 (at the present time that means for 16-bit audio). The
 * code above does 2 samples per iteration.  Commit bfdd5bc ( made all the way
 * back in 2007) says that way is faster.
 */
flacdsp: move lpc encoding from FLAC encoder to FLACDSPContext Also, templatize the functions for 16-bit and 32-bit sample range. This will be used for 24-bit FLAC encoding. 2012-10-27 07:25:04 +03:00			`/*`
Merge remote-tracking branch 'qatar/master' * qatar/master: FATE: add a 24-bit FLAC encoding test FATE: rename FLAC tests from flac-* to flac-16-* flacenc: use RICE2 entropy coding mode for 24-bit flacenc: add 24-bit encoding flacdsp: move lpc encoding from FLAC encoder to FLACDSPContext Merged-by: Michael Niedermayer <michaelni@gmx.at> 2012-11-06 01:00:23 +03:00			`* This file is part of FFmpeg.`
flacdsp: move lpc encoding from FLAC encoder to FLACDSPContext Also, templatize the functions for 16-bit and 32-bit sample range. This will be used for 24-bit FLAC encoding. 2012-10-27 07:25:04 +03:00			`*`
Merge remote-tracking branch 'qatar/master' * qatar/master: FATE: add a 24-bit FLAC encoding test FATE: rename FLAC tests from flac-* to flac-16-* flacenc: use RICE2 entropy coding mode for 24-bit flacenc: add 24-bit encoding flacdsp: move lpc encoding from FLAC encoder to FLACDSPContext Merged-by: Michael Niedermayer <michaelni@gmx.at> 2012-11-06 01:00:23 +03:00			`* FFmpeg is free software; you can redistribute it and/or`
flacdsp: move lpc encoding from FLAC encoder to FLACDSPContext Also, templatize the functions for 16-bit and 32-bit sample range. This will be used for 24-bit FLAC encoding. 2012-10-27 07:25:04 +03:00			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
			`* version 2.1 of the License, or (at your option) any later version.`
			`*`
Merge remote-tracking branch 'qatar/master' * qatar/master: FATE: add a 24-bit FLAC encoding test FATE: rename FLAC tests from flac-* to flac-16-* flacenc: use RICE2 entropy coding mode for 24-bit flacenc: add 24-bit encoding flacdsp: move lpc encoding from FLAC encoder to FLACDSPContext Merged-by: Michael Niedermayer <michaelni@gmx.at> 2012-11-06 01:00:23 +03:00			`* FFmpeg is distributed in the hope that it will be useful,`
flacdsp: move lpc encoding from FLAC encoder to FLACDSPContext Also, templatize the functions for 16-bit and 32-bit sample range. This will be used for 24-bit FLAC encoding. 2012-10-27 07:25:04 +03:00			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public`
Merge remote-tracking branch 'qatar/master' * qatar/master: FATE: add a 24-bit FLAC encoding test FATE: rename FLAC tests from flac-* to flac-16-* flacenc: use RICE2 entropy coding mode for 24-bit flacenc: add 24-bit encoding flacdsp: move lpc encoding from FLAC encoder to FLACDSPContext Merged-by: Michael Niedermayer <michaelni@gmx.at> 2012-11-06 01:00:23 +03:00			`* License along with FFmpeg; if not, write to the Free Software`
flacdsp: move lpc encoding from FLAC encoder to FLACDSPContext Also, templatize the functions for 16-bit and 32-bit sample range. This will be used for 24-bit FLAC encoding. 2012-10-27 07:25:04 +03:00			`* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA`
			`*/`

			`#include <stdint.h>`
			`#include "libavutil/avutil.h"`
			`#include "mathops.h"`

			`#undef FUNC`
			`#undef sum_type`
			`#undef MUL`
			`#undef CLIP`
			`#undef FSUF`

			`#define FUNC(n) AV_JOIN(n ## _, SAMPLE_SIZE)`

			`#if SAMPLE_SIZE == 32`
			`# define sum_type int64_t`
			`# define MUL(a, b) MUL64(a, b)`
			`# define CLIP(x) av_clipl_int32(x)`
			`#else`
			`# define sum_type int32_t`
			`# define MUL(a, b) ((a) * (b))`
			`# define CLIP(x) (x)`
			`#endif`

			`#define LPC1(x) { \`
			`int c = coefs[(x)-1]; \`
			`p0 += MUL(c, s); \`
			`s = smp[i-(x)+1]; \`
			`p1 += MUL(c, s); \`
			`}`

			`static av_always_inline void FUNC(lpc_encode_unrolled)(int32_t *res,`
			`const int32_t *smp, int len, int order,`
			`const int32_t *coefs, int shift, int big)`
			`{`
			`int i;`
			`for (i = order; i < len; i += 2) {`
			`int s = smp[i-order];`
			`sum_type p0 = 0, p1 = 0;`
			`if (big) {`
			`switch (order) {`
			`case 32: LPC1(32)`
			`case 31: LPC1(31)`
			`case 30: LPC1(30)`
			`case 29: LPC1(29)`
			`case 28: LPC1(28)`
			`case 27: LPC1(27)`
			`case 26: LPC1(26)`
			`case 25: LPC1(25)`
			`case 24: LPC1(24)`
			`case 23: LPC1(23)`
			`case 22: LPC1(22)`
			`case 21: LPC1(21)`
			`case 20: LPC1(20)`
			`case 19: LPC1(19)`
			`case 18: LPC1(18)`
			`case 17: LPC1(17)`
			`case 16: LPC1(16)`
			`case 15: LPC1(15)`
			`case 14: LPC1(14)`
			`case 13: LPC1(13)`
			`case 12: LPC1(12)`
			`case 11: LPC1(11)`
			`case 10: LPC1(10)`
			`case 9: LPC1( 9)`
			`LPC1( 8)`
			`LPC1( 7)`
			`LPC1( 6)`
			`LPC1( 5)`
			`LPC1( 4)`
			`LPC1( 3)`
			`LPC1( 2)`
			`LPC1( 1)`
			`}`
			`} else {`
			`switch (order) {`
			`case 8: LPC1( 8)`
			`case 7: LPC1( 7)`
			`case 6: LPC1( 6)`
			`case 5: LPC1( 5)`
			`case 4: LPC1( 4)`
			`case 3: LPC1( 3)`
			`case 2: LPC1( 2)`
			`case 1: LPC1( 1)`
			`}`
			`}`
			`res[i ] = smp[i ] - CLIP(p0 >> shift);`
			`res[i+1] = smp[i+1] - CLIP(p1 >> shift);`
			`}`
			`}`

			`static void FUNC(flac_lpc_encode_c)(int32_t res, const int32_t smp, int len,`
			`int order, const int32_t *coefs, int shift)`
			`{`
			`int i;`
			`for (i = 0; i < order; i++)`
			`res[i] = smp[i];`
			`#if CONFIG_SMALL`
			`for (i = order; i < len; i += 2) {`
			`int j;`
			`int s = smp[i];`
			`sum_type p0 = 0, p1 = 0;`
			`for (j = 0; j < order; j++) {`
			`int c = coefs[j];`
			`p1 += MUL(c, s);`
			`s = smp[i-j-1];`
			`p0 += MUL(c, s);`
			`}`
			`res[i ] = smp[i ] - CLIP(p0 >> shift);`
			`res[i+1] = smp[i+1] - CLIP(p1 >> shift);`
			`}`
			`#else`
			`switch (order) {`
			`case 1: FUNC(lpc_encode_unrolled)(res, smp, len, 1, coefs, shift, 0); break;`
			`case 2: FUNC(lpc_encode_unrolled)(res, smp, len, 2, coefs, shift, 0); break;`
			`case 3: FUNC(lpc_encode_unrolled)(res, smp, len, 3, coefs, shift, 0); break;`
			`case 4: FUNC(lpc_encode_unrolled)(res, smp, len, 4, coefs, shift, 0); break;`
			`case 5: FUNC(lpc_encode_unrolled)(res, smp, len, 5, coefs, shift, 0); break;`
			`case 6: FUNC(lpc_encode_unrolled)(res, smp, len, 6, coefs, shift, 0); break;`
			`case 7: FUNC(lpc_encode_unrolled)(res, smp, len, 7, coefs, shift, 0); break;`
			`case 8: FUNC(lpc_encode_unrolled)(res, smp, len, 8, coefs, shift, 0); break;`
			`default: FUNC(lpc_encode_unrolled)(res, smp, len, order, coefs, shift, 1); break;`
			`}`
			`#endif`
			`}`
flacdsp_lpc_template: add comment to explain the CONFIG_SMALL code I found the optimisation of 2 samples per iteration obscured the underlying algorithm. I had to write it out on paper and translate into a mathematical sum to see that the two samples are unconnected. I hope that if anyone else is struggling to understand the code that this will be useful. Reviewed-by: Christophe Gisquet <christophe.gisquet@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at> 2014-02-14 14:40:10 +03:00
			`/* Comment for clarity/de-obfuscation.`
			`*`
			`* for (int i = order; i < len; i++) {`
			`* int32_t p = 0;`
			`* for (int j = 0; j < order; j++) {`
			`* int c = coefs[j];`
			`* int s = smp[(i-1)-j];`
			`* p += c*s;`
			`* }`
			`* res[i] = smp[i] - (p >> shift);`
			`* }`
			`*`
			`* The CONFIG_SMALL code above simplifies to this, in the case of SAMPLE_SIZE`
			`* not being equal to 32 (at the present time that means for 16-bit audio). The`
			`* code above does 2 samples per iteration. Commit bfdd5bc ( made all the way`
			`* back in 2007) says that way is faster.`
			`*/`