mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-12 19:18:44 +02:00
e5c9de2ab7
Provide MMX, SSE2 and SSSE3 versions, with a fast-path when the weights are multiples of 512 (which is often the case when the values round up nicely). *_TIMER report for the 16x16 and 8x8 cases: C: 9015 decicycles in 16, 524257 runs, 31 skips 2656 decicycles in 8, 524271 runs, 17 skips MMX: 4156 decicycles in 16, 262090 runs, 54 skips 1206 decicycles in 8, 262131 runs, 13 skips MMX on fast-path: 2760 decicycles in 16, 524222 runs, 66 skips 995 decicycles in 8, 524252 runs, 36 skips SSE2: 2163 decicycles in 16, 262131 runs, 13 skips 832 decicycles in 8, 262137 runs, 7 skips SSE2 with fast path: 1783 decicycles in 16, 524276 runs, 12 skips 711 decicycles in 8, 524283 runs, 5 skips SSSE3: 2117 decicycles in 16, 262136 runs, 8 skips 814 decicycles in 8, 262143 runs, 1 skips SSSE3 with fast path: 1315 decicycles in 16, 524285 runs, 3 skips 578 decicycles in 8, 524286 runs, 2 skips This means around a 4% speedup for some sequences. Signed-off-by: Diego Biurrun <diego@biurrun.de>
77 lines
4.1 KiB
Makefile
77 lines
4.1 KiB
Makefile
OBJS-$(CONFIG_MLP_DECODER) += x86/mlpdsp.o
|
|
OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
|
|
|
|
YASM-OBJS-$(CONFIG_DCT) += x86/dct32_sse.o
|
|
|
|
YASM-OBJS-FFT-$(HAVE_AMD3DNOW) += x86/fft_3dn.o
|
|
YASM-OBJS-FFT-$(HAVE_AMD3DNOWEXT) += x86/fft_3dn2.o
|
|
YASM-OBJS-FFT-$(HAVE_SSE) += x86/fft_sse.o
|
|
YASM-OBJS-$(CONFIG_FFT) += x86/fft_mmx.o \
|
|
$(YASM-OBJS-FFT-yes)
|
|
|
|
YASM-OBJS-$(CONFIG_H264CHROMA) += x86/h264_chromamc.o \
|
|
x86/h264_chromamc_10bit.o
|
|
|
|
MMX-OBJS-$(CONFIG_H264DSP) += x86/h264dsp_mmx.o
|
|
YASM-OBJS-$(CONFIG_H264DSP) += x86/h264_deblock.o \
|
|
x86/h264_deblock_10bit.o \
|
|
x86/h264_idct.o \
|
|
x86/h264_idct_10bit.o \
|
|
x86/h264_weight.o \
|
|
x86/h264_weight_10bit.o \
|
|
|
|
YASM-OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred.o \
|
|
x86/h264_intrapred_10bit.o
|
|
MMX-OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred_init.o
|
|
|
|
MMX-OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp_init.o
|
|
YASM-OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp.o
|
|
MMX-OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp_init.o \
|
|
x86/rv40dsp_init.o
|
|
YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp.o \
|
|
x86/rv40dsp.o
|
|
|
|
YASM-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_yasm.o
|
|
|
|
MMX-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp_mmx.o
|
|
YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o
|
|
MMX-OBJS-$(CONFIG_CAVS_DECODER) += x86/cavsdsp_mmx.o
|
|
MMX-OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhd_mmx.o
|
|
MMX-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodec_mmx.o
|
|
YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36_sse.o
|
|
MMX-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_mmx.o
|
|
YASM-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_yasm.o
|
|
MMX-OBJS-$(CONFIG_GPL) += x86/idct_mmx.o
|
|
MMX-OBJS-$(CONFIG_LPC) += x86/lpc_mmx.o
|
|
YASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o
|
|
MMX-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp-init.o
|
|
YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
|
|
MMX-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp-init.o
|
|
MMX-OBJS-$(CONFIG_DWT) += x86/snowdsp_mmx.o
|
|
MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o
|
|
YASM-OBJS-$(CONFIG_VP3_DECODER) += x86/vp3dsp.o
|
|
YASM-OBJS-$(CONFIG_VP5_DECODER) += x86/vp3dsp.o
|
|
MMX-OBJS-$(CONFIG_VP5_DECODER) += x86/vp56dsp_init.o
|
|
YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp3dsp.o \
|
|
x86/vp56dsp.o
|
|
MMX-OBJS-$(CONFIG_VP6_DECODER) += x86/vp56dsp_init.o
|
|
YASM-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp.o
|
|
MMX-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp-init.o
|
|
MMX-OBJS-$(HAVE_YASM) += x86/dsputil_yasm.o \
|
|
x86/deinterlace.o \
|
|
x86/fmtconvert.o \
|
|
x86/h264_qpel_10bit.o \
|
|
$(YASM-OBJS-yes)
|
|
|
|
MMX-OBJS-$(CONFIG_FFT) += x86/fft.o
|
|
|
|
OBJS-$(HAVE_MMX) += x86/dsputil_mmx.o \
|
|
x86/fdct_mmx.o \
|
|
x86/fmtconvert_mmx.o \
|
|
x86/idct_mmx_xvid.o \
|
|
x86/idct_sse2_xvid.o \
|
|
x86/motion_est_mmx.o \
|
|
x86/mpegvideo_mmx.o \
|
|
x86/simple_idct_mmx.o \
|
|
|