mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-18 03:19:31 +02:00
42c1cc35b7
The previous implementation targeted DTS Coherent Acoustics, which only requires mdct_bits == 6. This relatively small size lent itself to unrolling the loops a small number of times, and encoding offsets calculated at assembly time within the load/store instructions of each iteration. In the more general case (codecs such as AAC and AC3) much larger arrays are used - mdct_bits == [8, 9, 11]. The old method does not scale for these cases, so more integer registers are used with non-unrolled versions of the loops (and with some stack spillage). The postrotation filter loop is still unrolled by a factor of 2 to permit the double-buffering of some VFP registers to facilitate overlap of neighbouring iterations. I benchmarked the result by measuring the number of gperftools samples that hit anywhere in the AAC decoder (starting from aac_decode_frame()) or specifically in ff_imdct_half_c / ff_imdct_half_vfp, for the same example AAC stream: Before After Mean StdDev Mean StdDev Confidence Change aac_decode_frame 2368.1 35.8 2117.2 35.3 100.0% +11.8% ff_imdct_half_* 457.5 22.4 251.2 16.2 100.0% +82.1% Signed-off-by: Michael Niedermayer <michaelni@gmx.at> |
||
---|---|---|
.. | ||
aac.h | ||
aacpsdsp_init_arm.c | ||
aacpsdsp_neon.S | ||
ac3dsp_arm.S | ||
ac3dsp_armv6.S | ||
ac3dsp_init_arm.c | ||
ac3dsp_neon.S | ||
asm-offsets.h | ||
audiodsp_arm.h | ||
audiodsp_init_arm.c | ||
audiodsp_init_neon.c | ||
audiodsp_neon.S | ||
blockdsp_arm.h | ||
blockdsp_init_arm.c | ||
blockdsp_init_neon.c | ||
blockdsp_neon.S | ||
cabac.h | ||
dca.h | ||
dcadsp_init_arm.c | ||
dcadsp_neon.S | ||
dcadsp_vfp.S | ||
dsputil_arm.h | ||
dsputil_armv6.S | ||
dsputil_init_arm.c | ||
dsputil_init_armv6.c | ||
fft_fixed_init_arm.c | ||
fft_fixed_neon.S | ||
fft_init_arm.c | ||
fft_neon.S | ||
fft_vfp.S | ||
flacdsp_arm.S | ||
flacdsp_init_arm.c | ||
fmtconvert_init_arm.c | ||
fmtconvert_neon.S | ||
fmtconvert_vfp_armv6.S | ||
fmtconvert_vfp.S | ||
h264chroma_init_arm.c | ||
h264cmc_neon.S | ||
h264dsp_init_arm.c | ||
h264dsp_neon.S | ||
h264idct_neon.S | ||
h264pred_init_arm.c | ||
h264pred_neon.S | ||
h264qpel_init_arm.c | ||
h264qpel_neon.S | ||
hpeldsp_arm.h | ||
hpeldsp_arm.S | ||
hpeldsp_armv6.S | ||
hpeldsp_init_arm.c | ||
hpeldsp_init_armv6.c | ||
hpeldsp_init_neon.c | ||
hpeldsp_neon.S | ||
idctdsp_arm.h | ||
idctdsp_arm.S | ||
idctdsp_armv6.S | ||
idctdsp_init_arm.c | ||
idctdsp_init_armv5te.c | ||
idctdsp_init_armv6.c | ||
idctdsp_init_neon.c | ||
idctdsp_neon.S | ||
int_neon.S | ||
jrevdct_arm.S | ||
lossless_audiodsp_init_arm.c | ||
lossless_audiodsp_neon.S | ||
Makefile | ||
mathops.h | ||
mdct_fixed_neon.S | ||
mdct_neon.S | ||
mdct_vfp.S | ||
mlpdsp_armv5te.S | ||
mlpdsp_armv6.S | ||
mlpdsp_init_arm.c | ||
mpegaudiodsp_fixed_armv6.S | ||
mpegaudiodsp_init_arm.c | ||
mpegvideo_arm.c | ||
mpegvideo_arm.h | ||
mpegvideo_armv5te_s.S | ||
mpegvideo_armv5te.c | ||
mpegvideo_neon.S | ||
mpegvideoencdsp_armv6.S | ||
mpegvideoencdsp_init_arm.c | ||
neon.S | ||
neontest.c | ||
pixblockdsp_armv6.S | ||
pixblockdsp_init_arm.c | ||
rdft_neon.S | ||
rv34dsp_init_arm.c | ||
rv34dsp_neon.S | ||
rv40dsp_init_arm.c | ||
rv40dsp_neon.S | ||
sbrdsp_init_arm.c | ||
sbrdsp_neon.S | ||
simple_idct_arm.S | ||
simple_idct_armv5te.S | ||
simple_idct_armv6.S | ||
simple_idct_neon.S | ||
startcode_armv6.S | ||
synth_filter_neon.S | ||
synth_filter_vfp.S | ||
vc1dsp_init_arm.c | ||
vc1dsp_init_neon.c | ||
vc1dsp_neon.S | ||
vc1dsp.h | ||
videodsp_arm.h | ||
videodsp_armv5te.S | ||
videodsp_init_arm.c | ||
videodsp_init_armv5te.c | ||
vorbisdsp_init_arm.c | ||
vorbisdsp_neon.S | ||
vp3dsp_init_arm.c | ||
vp3dsp_neon.S | ||
vp6dsp_init_arm.c | ||
vp6dsp_neon.S | ||
vp8_armv6.S | ||
vp8.h | ||
vp8dsp_armv6.S | ||
vp8dsp_init_arm.c | ||
vp8dsp_init_armv6.c | ||
vp8dsp_init_neon.c | ||
vp8dsp_neon.S | ||
vp8dsp.h | ||
vp56_arith.h |