mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-26 19:01:44 +02:00
4041c1029b
We introduced a ff_horiz_slice_avx2/512() implemented on a new algorithm. In a nutshell, the new algorithm does three things, gathering data from 8/16 rows, blurring data, and scattering data back to the image buffer. Here we used a customized transpose 8x8/16x16 to avoid the huge overhead brought by gather and scatter instructions, which is dependent on the temporary buffer called localbuf added newly. Performance data: ff_horiz_slice_avx2(old): 109.89 ff_horiz_slice_avx2(new): 666.67 ff_horiz_slice_avx512: 1000 Co-authored-by: Cheng Yanfei <yanfei.cheng@intel.com> Co-authored-by: Jin Jun <jun.i.jin@intel.com> Signed-off-by: Wu Jianhua <jianhua.wu@intel.com> |
||
---|---|---|
.. | ||
af_afir_init.c | ||
af_afir.asm | ||
af_anlmdn_init.c | ||
af_anlmdn.asm | ||
af_volume_init.c | ||
af_volume.asm | ||
avf_showcqt_init.c | ||
avf_showcqt.asm | ||
colorspacedsp_init.c | ||
colorspacedsp.asm | ||
Makefile | ||
scene_sad_init.c | ||
scene_sad.asm | ||
vf_atadenoise_init.c | ||
vf_atadenoise.asm | ||
vf_blend_init.c | ||
vf_blend.asm | ||
vf_bwdif_init.c | ||
vf_bwdif.asm | ||
vf_convolution_init.c | ||
vf_convolution.asm | ||
vf_eq_init.c | ||
vf_eq.asm | ||
vf_framerate_init.c | ||
vf_framerate.asm | ||
vf_fspp_init.c | ||
vf_fspp.asm | ||
vf_gblur_init.c | ||
vf_gblur.asm | ||
vf_gradfun_init.c | ||
vf_gradfun.asm | ||
vf_hflip_init.c | ||
vf_hflip.asm | ||
vf_hqdn3d_init.c | ||
vf_hqdn3d.asm | ||
vf_idet_init.c | ||
vf_idet.asm | ||
vf_interlace.asm | ||
vf_limiter_init.c | ||
vf_limiter.asm | ||
vf_maskedclamp_init.c | ||
vf_maskedclamp.asm | ||
vf_maskedmerge_init.c | ||
vf_maskedmerge.asm | ||
vf_noise.c | ||
vf_overlay_init.c | ||
vf_overlay.asm | ||
vf_pp7_init.c | ||
vf_pp7.asm | ||
vf_psnr_init.c | ||
vf_psnr.asm | ||
vf_pullup_init.c | ||
vf_pullup.asm | ||
vf_removegrain_init.c | ||
vf_removegrain.asm | ||
vf_spp.c | ||
vf_ssim_init.c | ||
vf_ssim.asm | ||
vf_stereo3d_init.c | ||
vf_stereo3d.asm | ||
vf_threshold_init.c | ||
vf_threshold.asm | ||
vf_tinterlace_init.c | ||
vf_transpose_init.c | ||
vf_transpose.asm | ||
vf_v360_init.c | ||
vf_v360.asm | ||
vf_w3fdif_init.c | ||
vf_w3fdif.asm | ||
vf_yadif_init.c | ||
vf_yadif.asm | ||
yadif-10.asm | ||
yadif-16.asm |