FFmpeg/libavcodec/aacencdsp.h

/*
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#ifndef AVCODEC_AACENCDSP_H
#define AVCODEC_AACENCDSP_H

#include <math.h>

#include "config.h"

#include "libavutil/macros.h"

typedef struct AACEncDSPContext {
    void (*abs_pow34)(float *out, const float *in, const int size);
    void (*quant_bands)(int *out, const float *in, const float *scaled,
                        int size, int is_signed, int maxval, const float Q34,
                        const float rounding);
} AACEncDSPContext;

void ff_aacenc_dsp_init_riscv(AACEncDSPContext *s);
void ff_aacenc_dsp_init_x86(AACEncDSPContext *s);
void ff_aacenc_dsp_init_aarch64(AACEncDSPContext *s);

static inline void abs_pow34_v(float *out, const float *in, const int size)
{
    for (int i = 0; i < size; i++) {
        float a = fabsf(in[i]);
        out[i] = sqrtf(a * sqrtf(a));
    }
}

static inline void quantize_bands(int *out, const float *in, const float *scaled,
                                  int size, int is_signed, int maxval, const float Q34,
                                  const float rounding)
{
    for (int i = 0; i < size; i++) {
        float qc = scaled[i] * Q34;
        int tmp = (int)FFMIN((float)(qc + rounding), (float)maxval);
        if (is_signed && in[i] < 0.0f) {
            tmp = -tmp;
        }
        out[i] = tmp;
    }
}

static inline void ff_aacenc_dsp_init(AACEncDSPContext *s)
{
    s->abs_pow34   = abs_pow34_v;
    s->quant_bands = quantize_bands;

#if ARCH_RISCV
    ff_aacenc_dsp_init_riscv(s);
#elif ARCH_X86
    ff_aacenc_dsp_init_x86(s);
#elif ARCH_AARCH64
    ff_aacenc_dsp_init_aarch64(s);
#endif
}

#endif
avcodec/aacenc: Move initializing DSP out of aacenc.c Otherwise aacenc.o gets pulled in by the aacencdsp checkasm test and it in turn pulls the rest of lavc in. Besides being bad size-wise this also has the downside that it pulls in avpriv_(cga\|vga16)_font from libavutil which are marked as being imported from another library when building libavcodec as a DLL and this breaks checkasm because it links both lavc and lavu statically. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> 2024-02-28 13:29:19 +01:00			`/*`
			`* This file is part of FFmpeg.`
			`*`
			`* FFmpeg is free software; you can redistribute it and/or`
			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
			`* version 2.1 of the License, or (at your option) any later version.`
			`*`
			`* FFmpeg is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public`
			`* License along with FFmpeg; if not, write to the Free Software`
			`* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA`
			`*/`

			`#ifndef AVCODEC_AACENCDSP_H`
			`#define AVCODEC_AACENCDSP_H`

			`#include <math.h>`

			`#include "config.h"`

			`#include "libavutil/macros.h"`

			`typedef struct AACEncDSPContext {`
			`void (abs_pow34)(float out, const float *in, const int size);`
			`void (quant_bands)(int out, const float in, const float scaled,`
			`int size, int is_signed, int maxval, const float Q34,`
			`const float rounding);`
			`} AACEncDSPContext;`

			`void ff_aacenc_dsp_init_riscv(AACEncDSPContext *s);`
			`void ff_aacenc_dsp_init_x86(AACEncDSPContext *s);`
avcodec/aarch64/aacencdsp: NEON implementation This patch supplies handwritten NEON code for AAC. The benchmarks below were collected by invoking these two commands on each of my boards, A78, A72 and Thinkpad x13s: 1) ./tests/checkasm/checkasm --test=aacencdsp --bench --runs=12 2) ./ffmpeg -y -t 10:00 -f lavfi -i sine /tmp/foo.aac (the first line is speed without the patch, second, with) - A78 abs_pow34_c: 4161.5 ( 1.00x) abs_pow34_neon: 3586.2 ( 1.16x) quant_bands_signed_c: 5548.0 ( 1.00x) quant_bands_signed_neon: 1126.8 ( 4.92x) quant_bands_unsigned_c: 3979.2 ( 1.00x) quant_bands_unsigned_neon: 800.2 ( 4.97x) size= 5251KiB time=00:10:00.00 bitrate= 71.7kbits/s speed=71.6x size= 5251KiB time=00:10:00.00 bitrate= 71.7kbits/s speed=82.3x - A72 abs_pow34_c: 15362.2 ( 1.00x) abs_pow34_neon: 15382.5 ( 1.00x) quant_bands_signed_c: 9926.5 ( 1.00x) quant_bands_signed_neon: 2467.8 ( 4.02x) quant_bands_unsigned_c: 5469.8 ( 1.00x) quant_bands_unsigned_neon: 2089.5 ( 2.62x) size= 5251KiB time=00:10:00.00 bitrate= 71.7kbits/s speed=34.3x size= 5251KiB time=00:10:00.00 bitrate= 71.7kbits/s speed=37.8 - x13s abs_pow34_c: 2413.4 ( 1.00x) abs_pow34_neon: 1796.2 ( 1.34x) quant_bands_signed_c: 2968.9 ( 1.00x) quant_bands_signed_neon: 675.6 ( 4.39x) quant_bands_unsigned_c: 2311.9 ( 1.00x) quant_bands_unsigned_neon: 477.1 ( 4.85x) size= 5251KiB time=00:10:00.00 bitrate= 71.7kbits/s speed= 135x size= 5251KiB time=00:10:00.00 bitrate= 71.7kbits/s speed= 159x Signed-off-by: Martin Storsjö <martin@martin.st> 2025-01-24 19:58:26 +01:00			`void ff_aacenc_dsp_init_aarch64(AACEncDSPContext *s);`
avcodec/aacenc: Move initializing DSP out of aacenc.c Otherwise aacenc.o gets pulled in by the aacencdsp checkasm test and it in turn pulls the rest of lavc in. Besides being bad size-wise this also has the downside that it pulls in avpriv_(cga\|vga16)_font from libavutil which are marked as being imported from another library when building libavcodec as a DLL and this breaks checkasm because it links both lavc and lavu statically. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> 2024-02-28 13:29:19 +01:00
			`static inline void abs_pow34_v(float out, const float in, const int size)`
			`{`
			`for (int i = 0; i < size; i++) {`
			`float a = fabsf(in[i]);`
			`out[i] = sqrtf(a * sqrtf(a));`
			`}`
			`}`

			`static inline void quantize_bands(int out, const float in, const float *scaled,`
			`int size, int is_signed, int maxval, const float Q34,`
			`const float rounding)`
			`{`
			`for (int i = 0; i < size; i++) {`
			`float qc = scaled[i] * Q34;`
aacencdsp: Improve consistency with assembly, for x87 math Currently, the aacencdsp checkasm tests fails for many seeds, if the C code has been built with x87 math. This happens because the excess precision of x87 math can make it end up rounding to a different integer, and the checkasm tests checks that the output integers match exactly between C and assembly. One such failing case is "tests/checkasm/checkasm --test=aacencdsp 41" when compiled with GCC. When compiled with Clang, the test seed 21 produces a failure. To avoid the issue, we need to limit the precision of intermediates to their nominal float range, matching the assembly implementations. This can be achieved when compiling with GCC, by just adding a single cast. To observe the effect of this cast, compile the following snippet, int cast(float a, float b) { return (int) #ifdef CAST (float) #endif (a + b); } with "gcc -m32 -std=c17 -O2", with/without -DCAST. For x86_64 cases (without the "-m32"), the cast doesn't make any difference on the generated code. This cast would seem to not have any effect, as a binary expression with float inputs also would have the type float. However, if compiling with GCC with -fexcess-precision=standard, the cast forces limiting the precision according to the language standard here - according to the GCC docs [1]: > When compiling C or C++, if -fexcess-precision=standard is > specified then excess precision follows the rules specified in > ISO C99 or C++; in particular, both casts and assignments cause > values to be rounded to their semantic types (whereas -ffloat-store > only affects assignments). This option is enabled by default for > C or C++ if a strict conformance option such as -std=c99 or > -std=c++17 is used. Ffmpeg's configure scripts enables -std=c17 by default. This only helps with GCC though - the cast doesn't make any difference for Clang. (Although, upstream Clang seems to default to SSE math, while Ubuntu provided Clang defaults to x87 math.) Limiting the precision with Clang would require casting to volatile float for both intermediates here - and that does have a code generation effect on all architectures. [1] https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html 2025-08-22 23:26:11 +03:00			`int tmp = (int)FFMIN((float)(qc + rounding), (float)maxval);`
avcodec/aacenc: Move initializing DSP out of aacenc.c Otherwise aacenc.o gets pulled in by the aacencdsp checkasm test and it in turn pulls the rest of lavc in. Besides being bad size-wise this also has the downside that it pulls in avpriv_(cga\|vga16)_font from libavutil which are marked as being imported from another library when building libavcodec as a DLL and this breaks checkasm because it links both lavc and lavu statically. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> 2024-02-28 13:29:19 +01:00			`if (is_signed && in[i] < 0.0f) {`
			`tmp = -tmp;`
			`}`
			`out[i] = tmp;`
			`}`
			`}`

			`static inline void ff_aacenc_dsp_init(AACEncDSPContext *s)`
			`{`
			`s->abs_pow34 = abs_pow34_v;`
			`s->quant_bands = quantize_bands;`

			`#if ARCH_RISCV`
			`ff_aacenc_dsp_init_riscv(s);`
			`#elif ARCH_X86`
			`ff_aacenc_dsp_init_x86(s);`
avcodec/aarch64/aacencdsp: NEON implementation This patch supplies handwritten NEON code for AAC. The benchmarks below were collected by invoking these two commands on each of my boards, A78, A72 and Thinkpad x13s: 1) ./tests/checkasm/checkasm --test=aacencdsp --bench --runs=12 2) ./ffmpeg -y -t 10:00 -f lavfi -i sine /tmp/foo.aac (the first line is speed without the patch, second, with) - A78 abs_pow34_c: 4161.5 ( 1.00x) abs_pow34_neon: 3586.2 ( 1.16x) quant_bands_signed_c: 5548.0 ( 1.00x) quant_bands_signed_neon: 1126.8 ( 4.92x) quant_bands_unsigned_c: 3979.2 ( 1.00x) quant_bands_unsigned_neon: 800.2 ( 4.97x) size= 5251KiB time=00:10:00.00 bitrate= 71.7kbits/s speed=71.6x size= 5251KiB time=00:10:00.00 bitrate= 71.7kbits/s speed=82.3x - A72 abs_pow34_c: 15362.2 ( 1.00x) abs_pow34_neon: 15382.5 ( 1.00x) quant_bands_signed_c: 9926.5 ( 1.00x) quant_bands_signed_neon: 2467.8 ( 4.02x) quant_bands_unsigned_c: 5469.8 ( 1.00x) quant_bands_unsigned_neon: 2089.5 ( 2.62x) size= 5251KiB time=00:10:00.00 bitrate= 71.7kbits/s speed=34.3x size= 5251KiB time=00:10:00.00 bitrate= 71.7kbits/s speed=37.8 - x13s abs_pow34_c: 2413.4 ( 1.00x) abs_pow34_neon: 1796.2 ( 1.34x) quant_bands_signed_c: 2968.9 ( 1.00x) quant_bands_signed_neon: 675.6 ( 4.39x) quant_bands_unsigned_c: 2311.9 ( 1.00x) quant_bands_unsigned_neon: 477.1 ( 4.85x) size= 5251KiB time=00:10:00.00 bitrate= 71.7kbits/s speed= 135x size= 5251KiB time=00:10:00.00 bitrate= 71.7kbits/s speed= 159x Signed-off-by: Martin Storsjö <martin@martin.st> 2025-01-24 19:58:26 +01:00			`#elif ARCH_AARCH64`
			`ff_aacenc_dsp_init_aarch64(s);`
avcodec/aacenc: Move initializing DSP out of aacenc.c Otherwise aacenc.o gets pulled in by the aacencdsp checkasm test and it in turn pulls the rest of lavc in. Besides being bad size-wise this also has the downside that it pulls in avpriv_(cga\|vga16)_font from libavutil which are marked as being imported from another library when building libavcodec as a DLL and this breaks checkasm because it links both lavc and lavu statically. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> 2024-02-28 13:29:19 +01:00			`#endif`
			`}`

			`#endif`