mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
5ab138673b
As the plain neon qpel_h functions process two rows at a time, we need to allocate storage for h+8 rows instead of h+7. By allocating storage for h+8 rows, incrementing the stack pointer won't end up at the right spot in the end. Store the intended final stack pointer value in a register x14 which we store on the stack. AWS Graviton 3: put_hevc_qpel_uni_hv4_8_c: 384.2 put_hevc_qpel_uni_hv4_8_neon: 127.5 put_hevc_qpel_uni_hv4_8_i8mm: 85.5 put_hevc_qpel_uni_hv6_8_c: 705.5 put_hevc_qpel_uni_hv6_8_neon: 224.5 put_hevc_qpel_uni_hv6_8_i8mm: 176.2 put_hevc_qpel_uni_hv8_8_c: 1136.5 put_hevc_qpel_uni_hv8_8_neon: 216.5 put_hevc_qpel_uni_hv8_8_i8mm: 214.0 put_hevc_qpel_uni_hv12_8_c: 2259.5 put_hevc_qpel_uni_hv12_8_neon: 498.5 put_hevc_qpel_uni_hv12_8_i8mm: 410.7 put_hevc_qpel_uni_hv16_8_c: 3824.7 put_hevc_qpel_uni_hv16_8_neon: 670.0 put_hevc_qpel_uni_hv16_8_i8mm: 603.7 put_hevc_qpel_uni_hv24_8_c: 8113.5 put_hevc_qpel_uni_hv24_8_neon: 1474.7 put_hevc_qpel_uni_hv24_8_i8mm: 1351.5 put_hevc_qpel_uni_hv32_8_c: 14744.5 put_hevc_qpel_uni_hv32_8_neon: 2599.7 put_hevc_qpel_uni_hv32_8_i8mm: 2266.0 put_hevc_qpel_uni_hv48_8_c: 32800.0 put_hevc_qpel_uni_hv48_8_neon: 5650.0 put_hevc_qpel_uni_hv48_8_i8mm: 5011.7 put_hevc_qpel_uni_hv64_8_c: 57856.2 put_hevc_qpel_uni_hv64_8_neon: 9863.5 put_hevc_qpel_uni_hv64_8_i8mm: 8767.7 Signed-off-by: Martin Storsjö <martin@martin.st> |
||
---|---|---|
.. | ||
aacpsdsp_init_aarch64.c | ||
aacpsdsp_neon.S | ||
cabac.h | ||
fmtconvert_init.c | ||
fmtconvert_neon.S | ||
h264chroma_init_aarch64.c | ||
h264cmc_neon.S | ||
h264dsp_init_aarch64.c | ||
h264dsp_neon.S | ||
h264idct_neon.S | ||
h264pred_init.c | ||
h264pred_neon.S | ||
h264qpel_init_aarch64.c | ||
h264qpel_neon.S | ||
hevcdsp_deblock_neon.S | ||
hevcdsp_epel_neon.S | ||
hevcdsp_idct_neon.S | ||
hevcdsp_init_aarch64.c | ||
hevcdsp_qpel_neon.S | ||
hevcdsp_sao_neon.S | ||
hpeldsp_init_aarch64.c | ||
hpeldsp_neon.S | ||
idct.h | ||
idctdsp_init_aarch64.c | ||
idctdsp_neon.S | ||
Makefile | ||
me_cmp_init_aarch64.c | ||
me_cmp_neon.S | ||
mpegaudiodsp_init.c | ||
mpegaudiodsp_neon.S | ||
neon.S | ||
neontest.c | ||
opusdsp_init.c | ||
opusdsp_neon.S | ||
pixblockdsp_init_aarch64.c | ||
pixblockdsp_neon.S | ||
rv40dsp_init_aarch64.c | ||
sbrdsp_init_aarch64.c | ||
sbrdsp_neon.S | ||
simple_idct_neon.S | ||
synth_filter_init.c | ||
synth_filter_neon.S | ||
vc1dsp_init_aarch64.c | ||
vc1dsp_neon.S | ||
videodsp_init.c | ||
videodsp.S | ||
vorbisdsp_init.c | ||
vorbisdsp_neon.S | ||
vp8dsp_init_aarch64.c | ||
vp8dsp_neon.S | ||
vp8dsp.h | ||
vp9dsp_init_10bpp_aarch64.c | ||
vp9dsp_init_12bpp_aarch64.c | ||
vp9dsp_init_16bpp_aarch64_template.c | ||
vp9dsp_init_aarch64.c | ||
vp9dsp_init.h | ||
vp9itxfm_16bpp_neon.S | ||
vp9itxfm_neon.S | ||
vp9lpf_16bpp_neon.S | ||
vp9lpf_neon.S | ||
vp9mc_16bpp_neon.S | ||
vp9mc_aarch64.S | ||
vp9mc_neon.S |