mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
716b396740
I spotted an interesting pattern that I didn't see before that leads to the implementation being faster. The bit shifting table I was using before is no longer needed, and was able to remove quite a few lines. I also add use of FMA on the AVX2 version. f32 1920x1080 1 thread with prelut c impl 1434012700 UNITS in lut3d->interp, 1 runs, 0 skips 1434035335 UNITS in lut3d->interp, 2 runs, 0 skips 1423615347 UNITS in lut3d->interp, 4 runs, 0 skips 1426268863 UNITS in lut3d->interp, 8 runs, 0 skips sse2 905484420 UNITS in lut3d->interp, 1 runs, 0 skips 905659010 UNITS in lut3d->interp, 2 runs, 0 skips 915167140 UNITS in lut3d->interp, 4 runs, 0 skips 915834222 UNITS in lut3d->interp, 8 runs, 0 skips avx 574794860 UNITS in lut3d->interp, 1 runs, 0 skips 581035090 UNITS in lut3d->interp, 2 runs, 0 skips 584116720 UNITS in lut3d->interp, 4 runs, 0 skips 581460290 UNITS in lut3d->interp, 8 runs, 0 skips avx2 301698880 UNITS in lut3d->interp, 1 runs, 0 skips 301982880 UNITS in lut3d->interp, 2 runs, 0 skips 306962430 UNITS in lut3d->interp, 4 runs, 0 skips 305472025 UNITS in lut3d->interp, 8 runs, 0 skips gbrap16 1920x1080 1 thread with prelut c impl 1480894840 UNITS in lut3d->interp, 1 runs, 0 skips 1502922990 UNITS in lut3d->interp, 2 runs, 0 skips 1496114307 UNITS in lut3d->interp, 4 runs, 0 skips 1492554551 UNITS in lut3d->interp, 8 runs, 0 skips sse2 980777180 UNITS in lut3d->interp, 1 runs, 0 skips 986121520 UNITS in lut3d->interp, 2 runs, 0 skips 986489840 UNITS in lut3d->interp, 4 runs, 0 skips 998832248 UNITS in lut3d->interp, 8 runs, 0 skips avx 622212360 UNITS in lut3d->interp, 1 runs, 0 skips 622981160 UNITS in lut3d->interp, 2 runs, 0 skips 645396315 UNITS in lut3d->interp, 4 runs, 0 skips 641057075 UNITS in lut3d->interp, 8 runs, 0 skips avx2 321336400 UNITS in lut3d->interp, 1 runs, 0 skips 321268920 UNITS in lut3d->interp, 2 runs, 0 skips 323459895 UNITS in lut3d->interp, 4 runs, 0 skips 324949967 UNITS in lut3d->interp, 8 runs, 0 skips
82 lines
5.0 KiB
Makefile
82 lines
5.0 KiB
Makefile
OBJS-$(CONFIG_SCENE_SAD) += x86/scene_sad_init.o
|
|
|
|
OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir_init.o
|
|
OBJS-$(CONFIG_ANLMDN_FILTER) += x86/af_anlmdn_init.o
|
|
OBJS-$(CONFIG_ATADENOISE_FILTER) += x86/vf_atadenoise_init.o
|
|
OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o
|
|
OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o
|
|
OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o
|
|
OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution_init.o
|
|
OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq_init.o
|
|
OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp_init.o
|
|
OBJS-$(CONFIG_GBLUR_FILTER) += x86/vf_gblur_init.o
|
|
OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun_init.o
|
|
OBJS-$(CONFIG_FRAMERATE_FILTER) += x86/vf_framerate_init.o
|
|
OBJS-$(CONFIG_HFLIP_FILTER) += x86/vf_hflip_init.o
|
|
OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o
|
|
OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet_init.o
|
|
OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_tinterlace_init.o
|
|
OBJS-$(CONFIG_LIMITER_FILTER) += x86/vf_limiter_init.o
|
|
OBJS-$(CONFIG_LUT3D_FILTER) += x86/vf_lut3d_init.o
|
|
OBJS-$(CONFIG_MASKEDCLAMP_FILTER) += x86/vf_maskedclamp_init.o
|
|
OBJS-$(CONFIG_MASKEDMERGE_FILTER) += x86/vf_maskedmerge_init.o
|
|
OBJS-$(CONFIG_NOISE_FILTER) += x86/vf_noise.o
|
|
OBJS-$(CONFIG_OVERLAY_FILTER) += x86/vf_overlay_init.o
|
|
OBJS-$(CONFIG_PP7_FILTER) += x86/vf_pp7_init.o
|
|
OBJS-$(CONFIG_PSNR_FILTER) += x86/vf_psnr_init.o
|
|
OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup_init.o
|
|
OBJS-$(CONFIG_REMOVEGRAIN_FILTER) += x86/vf_removegrain_init.o
|
|
OBJS-$(CONFIG_SHOWCQT_FILTER) += x86/avf_showcqt_init.o
|
|
OBJS-$(CONFIG_SPP_FILTER) += x86/vf_spp.o
|
|
OBJS-$(CONFIG_SSIM_FILTER) += x86/vf_ssim_init.o
|
|
OBJS-$(CONFIG_STEREO3D_FILTER) += x86/vf_stereo3d_init.o
|
|
OBJS-$(CONFIG_TBLEND_FILTER) += x86/vf_blend_init.o
|
|
OBJS-$(CONFIG_THRESHOLD_FILTER) += x86/vf_threshold_init.o
|
|
OBJS-$(CONFIG_TINTERLACE_FILTER) += x86/vf_tinterlace_init.o
|
|
OBJS-$(CONFIG_TRANSPOSE_FILTER) += x86/vf_transpose_init.o
|
|
OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o
|
|
OBJS-$(CONFIG_V360_FILTER) += x86/vf_v360_init.o
|
|
OBJS-$(CONFIG_W3FDIF_FILTER) += x86/vf_w3fdif_init.o
|
|
OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif_init.o
|
|
|
|
X86ASM-OBJS-$(CONFIG_SCENE_SAD) += x86/scene_sad.o
|
|
|
|
X86ASM-OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir.o
|
|
X86ASM-OBJS-$(CONFIG_ANLMDN_FILTER) += x86/af_anlmdn.o
|
|
X86ASM-OBJS-$(CONFIG_ATADENOISE_FILTER) += x86/vf_atadenoise.o
|
|
X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o
|
|
X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o
|
|
X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o
|
|
X86ASM-OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution.o
|
|
X86ASM-OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq.o
|
|
X86ASM-OBJS-$(CONFIG_FRAMERATE_FILTER) += x86/vf_framerate.o
|
|
X86ASM-OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp.o
|
|
X86ASM-OBJS-$(CONFIG_GBLUR_FILTER) += x86/vf_gblur.o
|
|
X86ASM-OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun.o
|
|
X86ASM-OBJS-$(CONFIG_HFLIP_FILTER) += x86/vf_hflip.o
|
|
X86ASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d.o
|
|
X86ASM-OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet.o
|
|
X86ASM-OBJS-$(CONFIG_INTERLACE_FILTER) += x86/vf_interlace.o
|
|
X86ASM-OBJS-$(CONFIG_LIMITER_FILTER) += x86/vf_limiter.o
|
|
X86ASM-OBJS-$(CONFIG_LUT3D_FILTER) += x86/vf_lut3d.o
|
|
X86ASM-OBJS-$(CONFIG_MASKEDCLAMP_FILTER) += x86/vf_maskedclamp.o
|
|
X86ASM-OBJS-$(CONFIG_MASKEDMERGE_FILTER) += x86/vf_maskedmerge.o
|
|
X86ASM-OBJS-$(CONFIG_OVERLAY_FILTER) += x86/vf_overlay.o
|
|
X86ASM-OBJS-$(CONFIG_PP7_FILTER) += x86/vf_pp7.o
|
|
X86ASM-OBJS-$(CONFIG_PSNR_FILTER) += x86/vf_psnr.o
|
|
X86ASM-OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup.o
|
|
ifdef CONFIG_GPL
|
|
X86ASM-OBJS-$(CONFIG_REMOVEGRAIN_FILTER) += x86/vf_removegrain.o
|
|
endif
|
|
X86ASM-OBJS-$(CONFIG_SHOWCQT_FILTER) += x86/avf_showcqt.o
|
|
X86ASM-OBJS-$(CONFIG_SSIM_FILTER) += x86/vf_ssim.o
|
|
X86ASM-OBJS-$(CONFIG_STEREO3D_FILTER) += x86/vf_stereo3d.o
|
|
X86ASM-OBJS-$(CONFIG_TBLEND_FILTER) += x86/vf_blend.o
|
|
X86ASM-OBJS-$(CONFIG_THRESHOLD_FILTER) += x86/vf_threshold.o
|
|
X86ASM-OBJS-$(CONFIG_TINTERLACE_FILTER) += x86/vf_interlace.o
|
|
X86ASM-OBJS-$(CONFIG_TRANSPOSE_FILTER) += x86/vf_transpose.o
|
|
X86ASM-OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume.o
|
|
X86ASM-OBJS-$(CONFIG_V360_FILTER) += x86/vf_v360.o
|
|
X86ASM-OBJS-$(CONFIG_W3FDIF_FILTER) += x86/vf_w3fdif.o
|
|
X86ASM-OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif.o x86/yadif-16.o x86/yadif-10.o
|