mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-26 19:01:44 +02:00
9f10cff610
This work is sponsored by, and copyright, Google. This is similar to the arm version, but due to the larger registers on aarch64, we can do 8 pixels at a time for all filter sizes. Examples of runtimes vs the 32 bit version, on a Cortex A53: ARM AArch64 vp9_loop_filter_h_4_8_10bpp_neon: 213.2 172.6 vp9_loop_filter_h_8_8_10bpp_neon: 281.2 244.2 vp9_loop_filter_h_16_8_10bpp_neon: 657.0 444.5 vp9_loop_filter_h_16_16_10bpp_neon: 1280.4 877.7 vp9_loop_filter_mix2_h_44_16_10bpp_neon: 397.7 358.0 vp9_loop_filter_mix2_h_48_16_10bpp_neon: 465.7 429.0 vp9_loop_filter_mix2_h_84_16_10bpp_neon: 465.7 428.0 vp9_loop_filter_mix2_h_88_16_10bpp_neon: 533.7 499.0 vp9_loop_filter_mix2_v_44_16_10bpp_neon: 271.5 244.0 vp9_loop_filter_mix2_v_48_16_10bpp_neon: 330.0 305.0 vp9_loop_filter_mix2_v_84_16_10bpp_neon: 329.0 306.0 vp9_loop_filter_mix2_v_88_16_10bpp_neon: 386.0 365.0 vp9_loop_filter_v_4_8_10bpp_neon: 150.0 115.2 vp9_loop_filter_v_8_8_10bpp_neon: 209.0 175.5 vp9_loop_filter_v_16_8_10bpp_neon: 492.7 345.2 vp9_loop_filter_v_16_16_10bpp_neon: 951.0 682.7 This is significantly faster than the ARM version in almost all cases except for the mix2 functions. Based on START_TIMER/STOP_TIMER wrapping around a few individual functions, the speedup vs C code is around 2-3x. Signed-off-by: Martin Storsjö <martin@martin.st> |
||
---|---|---|
.. | ||
asm-offsets.h | ||
cabac.h | ||
fft_init_aarch64.c | ||
fft_neon.S | ||
fmtconvert_init.c | ||
fmtconvert_neon.S | ||
h264chroma_init_aarch64.c | ||
h264cmc_neon.S | ||
h264dsp_init_aarch64.c | ||
h264dsp_neon.S | ||
h264idct_neon.S | ||
h264pred_init.c | ||
h264pred_neon.S | ||
h264qpel_init_aarch64.c | ||
h264qpel_neon.S | ||
hpeldsp_init_aarch64.c | ||
hpeldsp_neon.S | ||
Makefile | ||
mdct_neon.S | ||
mpegaudiodsp_init.c | ||
mpegaudiodsp_neon.S | ||
neon.S | ||
neontest.c | ||
rv40dsp_init_aarch64.c | ||
synth_filter_init.c | ||
synth_filter_neon.S | ||
vc1dsp_init_aarch64.c | ||
videodsp_init.c | ||
videodsp.S | ||
vorbisdsp_init.c | ||
vorbisdsp_neon.S | ||
vp9dsp_init_10bpp_aarch64.c | ||
vp9dsp_init_12bpp_aarch64.c | ||
vp9dsp_init_16bpp_aarch64_template.c | ||
vp9dsp_init_aarch64.c | ||
vp9dsp_init.h | ||
vp9itxfm_16bpp_neon.S | ||
vp9itxfm_neon.S | ||
vp9lpf_16bpp_neon.S | ||
vp9lpf_neon.S | ||
vp9mc_16bpp_neon.S | ||
vp9mc_neon.S |