mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-03 05:10:03 +02:00
65aa002d54
The idct32x32 function actually pushed d8-d15 onto the stack even though it didn't clobber them; there are plenty of registers that can be used to allow keeping all the idct coefficients in registers without having to reload different subsets of them at different stages in the transform. After this, we still can skip pushing d12-d15. Before: vp9_inv_dct_dct_32x32_sub32_add_neon: 8128.3 After: vp9_inv_dct_dct_32x32_sub32_add_neon: 8053.3 Signed-off-by: Martin Storsjö <martin@martin.st> |
||
---|---|---|
.. | ||
asm-offsets.h | ||
cabac.h | ||
dcadsp_init.c | ||
dcadsp_neon.S | ||
fft_init_aarch64.c | ||
fft_neon.S | ||
fmtconvert_init.c | ||
fmtconvert_neon.S | ||
h264chroma_init_aarch64.c | ||
h264cmc_neon.S | ||
h264dsp_init_aarch64.c | ||
h264dsp_neon.S | ||
h264idct_neon.S | ||
h264pred_init.c | ||
h264pred_neon.S | ||
h264qpel_init_aarch64.c | ||
h264qpel_neon.S | ||
hpeldsp_init_aarch64.c | ||
hpeldsp_neon.S | ||
imdct15_init.c | ||
imdct15_neon.S | ||
Makefile | ||
mdct_init.c | ||
mdct_neon.S | ||
mpegaudiodsp_init.c | ||
mpegaudiodsp_neon.S | ||
neon.S | ||
neontest.c | ||
rv40dsp_init_aarch64.c | ||
synth_filter_neon.S | ||
vc1dsp_init_aarch64.c | ||
videodsp_init.c | ||
videodsp.S | ||
vorbisdsp_init.c | ||
vorbisdsp_neon.S | ||
vp9dsp_init_aarch64.c | ||
vp9itxfm_neon.S | ||
vp9lpf_neon.S | ||
vp9mc_neon.S |