2013-03-12 17:28:11 +03:00
|
|
|
OBJS += x86/constants.o \
|
2012-10-05 20:54:10 +03:00
|
|
|
|
2014-07-31 14:52:24 +03:00
|
|
|
# subsystems
|
2012-10-05 20:54:10 +03:00
|
|
|
OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp_init.o
|
2014-01-16 19:30:19 +03:00
|
|
|
OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp_init.o
|
2014-07-05 20:19:22 +03:00
|
|
|
OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp_init.o
|
2014-02-13 19:57:05 +03:00
|
|
|
OBJS-$(CONFIG_BSWAPDSP) += x86/bswapdsp_init.o
|
2013-08-04 14:24:15 +03:00
|
|
|
OBJS-$(CONFIG_DCT) += x86/dct_init.o
|
2014-02-03 21:09:45 +03:00
|
|
|
OBJS-$(CONFIG_FDCTDSP) += x86/fdctdsp_init.o
|
2012-10-05 20:54:10 +03:00
|
|
|
OBJS-$(CONFIG_FFT) += x86/fft_init.o
|
2014-02-04 02:17:04 +03:00
|
|
|
OBJS-$(CONFIG_FLAC_DECODER) += x86/flacdsp_init.o
|
2014-02-12 01:49:39 +03:00
|
|
|
OBJS-$(CONFIG_FLAC_ENCODER) += x86/flacdsp_init.o
|
2015-02-21 12:24:44 +02:00
|
|
|
OBJS-$(CONFIG_FMTCONVERT) += x86/fmtconvert_init.o
|
2013-11-05 10:11:47 +03:00
|
|
|
OBJS-$(CONFIG_H263DSP) += x86/h263dsp_init.o
|
2013-02-06 17:34:39 +03:00
|
|
|
OBJS-$(CONFIG_H264CHROMA) += x86/h264chroma_init.o
|
2012-10-05 20:54:10 +03:00
|
|
|
OBJS-$(CONFIG_H264DSP) += x86/h264dsp_init.o
|
|
|
|
OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred_init.o
|
2013-01-24 07:24:53 +03:00
|
|
|
OBJS-$(CONFIG_H264QPEL) += x86/h264_qpel.o
|
2013-05-06 02:01:05 +03:00
|
|
|
OBJS-$(CONFIG_HPELDSP) += x86/hpeldsp_init.o
|
2014-05-30 17:28:49 +03:00
|
|
|
OBJS-$(CONFIG_LLAUDDSP) += x86/lossless_audiodsp_init.o
|
2014-01-20 22:32:51 +03:00
|
|
|
OBJS-$(CONFIG_LLVIDDSP) += x86/lossless_videodsp_init.o
|
2014-01-07 14:23:13 +03:00
|
|
|
OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_init.o
|
2013-12-23 20:42:11 +03:00
|
|
|
OBJS-$(CONFIG_HUFFYUVENCDSP) += x86/huffyuvencdsp_mmx.o
|
2014-01-24 13:55:16 +03:00
|
|
|
OBJS-$(CONFIG_IDCTDSP) += x86/idctdsp_init.o
|
2012-10-05 20:54:10 +03:00
|
|
|
OBJS-$(CONFIG_LPC) += x86/lpc.o
|
2014-02-08 04:59:58 +03:00
|
|
|
OBJS-$(CONFIG_ME_CMP) += x86/me_cmp_init.o
|
2013-04-27 02:03:37 +03:00
|
|
|
OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodsp.o
|
2014-01-24 12:41:12 +03:00
|
|
|
OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o \
|
|
|
|
x86/mpegvideodsp.o
|
2013-12-30 21:19:39 +03:00
|
|
|
OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o \
|
|
|
|
x86/mpegvideoencdsp_init.o
|
2014-02-04 01:29:09 +03:00
|
|
|
OBJS-$(CONFIG_PIXBLOCKDSP) += x86/pixblockdsp_init.o
|
2014-01-08 16:00:10 +03:00
|
|
|
OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp_init.o
|
2013-12-20 17:28:18 +03:00
|
|
|
OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o
|
|
|
|
OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o
|
|
|
|
OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
|
|
|
|
|
2014-07-31 14:52:24 +03:00
|
|
|
# decoders/encoders
|
2013-12-20 17:28:18 +03:00
|
|
|
OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp_init.o
|
2015-02-16 04:16:25 +02:00
|
|
|
OBJS-$(CONFIG_ADPCM_G722_DECODER) += x86/g722dsp_init.o
|
|
|
|
OBJS-$(CONFIG_ADPCM_G722_ENCODER) += x86/g722dsp_init.o
|
2014-11-23 14:21:29 +02:00
|
|
|
OBJS-$(CONFIG_APNG_DECODER) += x86/pngdsp_init.o
|
2013-12-20 17:28:18 +03:00
|
|
|
OBJS-$(CONFIG_CAVS_DECODER) += x86/cavsdsp.o
|
|
|
|
OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp_init.o
|
2014-03-27 05:06:30 +03:00
|
|
|
OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc_init.o
|
2014-07-27 01:04:44 +03:00
|
|
|
OBJS-$(CONFIG_HEVC_DECODER) += x86/hevcdsp_init.o
|
2014-10-03 02:38:01 +03:00
|
|
|
OBJS-$(CONFIG_MLP_DECODER) += x86/mlpdsp_init.o
|
2014-07-21 23:13:21 +03:00
|
|
|
OBJS-$(CONFIG_MPEG4_DECODER) += x86/xvididct_init.o
|
2012-10-05 20:54:10 +03:00
|
|
|
OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp_init.o
|
2012-12-11 04:52:55 +03:00
|
|
|
OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp_init.o
|
2012-10-08 22:36:14 +03:00
|
|
|
OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o
|
2012-10-05 20:54:10 +03:00
|
|
|
OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp_init.o
|
|
|
|
OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp_init.o \
|
|
|
|
x86/rv40dsp_init.o
|
2014-07-05 22:40:29 +03:00
|
|
|
OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_init.o
|
2014-10-03 02:38:01 +03:00
|
|
|
OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o
|
2014-11-23 02:49:04 +02:00
|
|
|
OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o
|
|
|
|
OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o
|
2014-11-26 17:59:14 +02:00
|
|
|
OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc_init.o
|
2012-10-07 18:41:10 +03:00
|
|
|
OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_init.o
|
2013-01-20 09:21:10 +03:00
|
|
|
OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o
|
2012-10-06 21:28:56 +03:00
|
|
|
OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp_init.o
|
2014-02-18 13:52:30 +03:00
|
|
|
OBJS-$(CONFIG_VP7_DECODER) += x86/vp8dsp_init.o
|
2012-10-05 20:54:10 +03:00
|
|
|
OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp_init.o
|
2013-09-22 04:24:03 +03:00
|
|
|
OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o
|
2013-03-30 10:25:44 +03:00
|
|
|
OBJS-$(CONFIG_WEBP_DECODER) += x86/vp8dsp_init.o
|
2010-03-16 23:22:59 +02:00
|
|
|
|
2014-07-31 14:52:24 +03:00
|
|
|
|
|
|
|
# GCC inline assembly optimizations
|
|
|
|
# subsystems
|
2014-07-01 15:38:57 +03:00
|
|
|
MMX-OBJS-$(CONFIG_DIRAC_DECODER) += x86/dirac_dwt.o
|
2014-02-03 21:09:45 +03:00
|
|
|
MMX-OBJS-$(CONFIG_FDCTDSP) += x86/fdct.o
|
2014-09-25 01:53:07 +03:00
|
|
|
MMX-OBJS-$(CONFIG_IDCTDSP) += x86/simple_idct.o
|
2013-12-20 17:28:18 +03:00
|
|
|
|
2014-07-31 14:52:24 +03:00
|
|
|
# decoders/encoders
|
2013-01-15 00:30:30 +03:00
|
|
|
MMX-OBJS-$(CONFIG_SNOW_DECODER) += x86/snowdsp.o
|
|
|
|
MMX-OBJS-$(CONFIG_SNOW_ENCODER) += x86/snowdsp.o
|
2012-02-03 01:55:57 +03:00
|
|
|
MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o
|
|
|
|
|
2014-07-31 14:52:24 +03:00
|
|
|
|
|
|
|
# YASM optimizations
|
2013-05-30 00:15:17 +03:00
|
|
|
YASM-OBJS += x86/deinterlace.o \
|
|
|
|
|
2014-07-31 14:52:24 +03:00
|
|
|
# subsystems
|
2012-02-03 01:55:57 +03:00
|
|
|
YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o
|
2014-01-16 19:30:19 +03:00
|
|
|
YASM-OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp.o
|
2014-06-19 07:00:17 +03:00
|
|
|
YASM-OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp.o
|
2014-02-13 19:57:05 +03:00
|
|
|
YASM-OBJS-$(CONFIG_BSWAPDSP) += x86/bswapdsp.o
|
2012-08-08 02:49:46 +03:00
|
|
|
YASM-OBJS-$(CONFIG_DCT) += x86/dct32.o
|
2013-01-15 00:30:30 +03:00
|
|
|
YASM-OBJS-$(CONFIG_DIRAC_DECODER) += x86/diracdsp_mmx.o x86/diracdsp_yasm.o\
|
2013-01-15 01:40:26 +03:00
|
|
|
x86/dwt_yasm.o
|
2014-03-27 05:06:30 +03:00
|
|
|
YASM-OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc.o
|
2012-08-08 03:12:17 +03:00
|
|
|
YASM-OBJS-$(CONFIG_FFT) += x86/fft.o
|
2014-02-04 02:17:04 +03:00
|
|
|
YASM-OBJS-$(CONFIG_FLAC_DECODER) += x86/flacdsp.o
|
2014-08-13 00:22:02 +03:00
|
|
|
ifdef CONFIG_GPL
|
|
|
|
YASM-OBJS-$(CONFIG_FLAC_ENCODER) += x86/flac_dsp_gpl.o
|
|
|
|
endif
|
2015-02-21 12:24:44 +02:00
|
|
|
YASM-OBJS-$(CONFIG_FMTCONVERT) += x86/fmtconvert.o
|
2013-11-05 10:11:47 +03:00
|
|
|
YASM-OBJS-$(CONFIG_H263DSP) += x86/h263_loopfilter.o
|
2013-02-06 17:34:39 +03:00
|
|
|
YASM-OBJS-$(CONFIG_H264CHROMA) += x86/h264_chromamc.o \
|
2011-12-14 00:54:52 +03:00
|
|
|
x86/h264_chromamc_10bit.o
|
2010-09-03 19:52:46 +03:00
|
|
|
YASM-OBJS-$(CONFIG_H264DSP) += x86/h264_deblock.o \
|
2011-05-10 18:55:12 +03:00
|
|
|
x86/h264_deblock_10bit.o \
|
2010-09-14 16:36:26 +03:00
|
|
|
x86/h264_idct.o \
|
2011-05-24 22:14:38 +03:00
|
|
|
x86/h264_idct_10bit.o \
|
|
|
|
x86/h264_weight.o \
|
2012-02-03 01:55:57 +03:00
|
|
|
x86/h264_weight_10bit.o
|
2011-06-06 02:20:05 +03:00
|
|
|
YASM-OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred.o \
|
|
|
|
x86/h264_intrapred_10bit.o
|
2012-10-13 18:04:50 +03:00
|
|
|
YASM-OBJS-$(CONFIG_H264QPEL) += x86/h264_qpel_8bit.o \
|
2013-03-09 07:40:16 +03:00
|
|
|
x86/h264_qpel_10bit.o \
|
2013-03-11 01:37:59 +03:00
|
|
|
x86/fpel.o \
|
2013-03-12 17:28:11 +03:00
|
|
|
x86/qpel.o
|
2013-03-11 01:37:59 +03:00
|
|
|
YASM-OBJS-$(CONFIG_HPELDSP) += x86/fpel.o \
|
|
|
|
x86/hpeldsp.o
|
2014-01-07 14:23:13 +03:00
|
|
|
YASM-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp.o
|
2014-07-02 00:10:15 +03:00
|
|
|
YASM-OBJS-$(CONFIG_IDCTDSP) += x86/idctdsp.o
|
2014-05-30 17:28:49 +03:00
|
|
|
YASM-OBJS-$(CONFIG_LLAUDDSP) += x86/lossless_audiodsp.o
|
2014-01-20 22:32:51 +03:00
|
|
|
YASM-OBJS-$(CONFIG_LLVIDDSP) += x86/lossless_videodsp.o
|
2014-02-08 04:59:58 +03:00
|
|
|
YASM-OBJS-$(CONFIG_ME_CMP) += x86/me_cmp.o
|
2012-08-08 02:49:46 +03:00
|
|
|
YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o
|
2013-12-30 21:56:07 +03:00
|
|
|
YASM-OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoencdsp.o
|
2014-02-04 01:29:09 +03:00
|
|
|
YASM-OBJS-$(CONFIG_PIXBLOCKDSP) += x86/pixblockdsp.o
|
2014-01-08 16:00:10 +03:00
|
|
|
YASM-OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp.o \
|
|
|
|
x86/fpel.o \
|
|
|
|
x86/qpel.o
|
2013-12-20 17:28:18 +03:00
|
|
|
YASM-OBJS-$(CONFIG_VIDEODSP) += x86/videodsp.o
|
|
|
|
YASM-OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp.o
|
|
|
|
|
2014-07-31 14:52:24 +03:00
|
|
|
# decoders/encoders
|
2013-12-20 17:28:18 +03:00
|
|
|
YASM-OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp.o
|
2015-02-16 04:16:25 +02:00
|
|
|
YASM-OBJS-$(CONFIG_ADPCM_G722_DECODER) += x86/g722dsp.o
|
|
|
|
YASM-OBJS-$(CONFIG_ADPCM_G722_ENCODER) += x86/g722dsp.o
|
2014-11-23 14:21:29 +02:00
|
|
|
YASM-OBJS-$(CONFIG_APNG_DECODER) += x86/pngdsp.o
|
2013-12-20 17:28:18 +03:00
|
|
|
YASM-OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp.o
|
2014-07-29 01:34:53 +03:00
|
|
|
YASM-OBJS-$(CONFIG_HEVC_DECODER) += x86/hevc_mc.o \
|
|
|
|
x86/hevc_deblock.o \
|
2014-08-18 11:01:09 +03:00
|
|
|
x86/hevc_idct.o \
|
x86/hevc: add ff_hevc_sao_band_filter_{8,10,12}_{sse2,avx,avx2}
Original x86 intrinsics code and initial 8bit yasm port by Pierre-Edouard Lepere.
10/12bit yasm ports, refactoring and optimizations by James Almer
Benchmarks of BQTerrace_1920x1080_60_qp22.bin with an Intel Core i5-4200U
width 32
40338 decicycles in sao_band_filter_0_8, 2048 runs, 0 skips
8056 decicycles in ff_hevc_sao_band_filter_8_32_sse2, 2048 runs, 0 skips
7458 decicycles in ff_hevc_sao_band_filter_8_32_avx, 2048 runs, 0 skips
4504 decicycles in ff_hevc_sao_band_filter_8_32_avx2, 2048 runs, 0 skips
width 64
136046 decicycles in sao_band_filter_0_8, 16384 runs, 0 skips
28576 decicycles in ff_hevc_sao_band_filter_8_32_sse2, 16384 runs, 0 skips
26707 decicycles in ff_hevc_sao_band_filter_8_32_avx, 16384 runs, 0 skips
14387 decicycles in ff_hevc_sao_band_filter_8_32_avx2, 16384 runs, 0 skips
Reviewed-by: Christophe Gisquet <christophe.gisquet@gmail.com>
Signed-off-by: James Almer <jamrial@gmail.com>
2015-02-01 20:01:36 +02:00
|
|
|
x86/hevc_res_add.o \
|
|
|
|
x86/hevc_sao.o
|
2014-10-03 02:38:01 +03:00
|
|
|
YASM-OBJS-$(CONFIG_MLP_DECODER) += x86/mlpdsp.o
|
2015-03-11 01:11:51 +02:00
|
|
|
YASM-OBJS-$(CONFIG_MPEG4_DECODER) += x86/xvididct.o
|
2012-01-27 18:21:55 +03:00
|
|
|
YASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o
|
2011-10-12 20:10:22 +03:00
|
|
|
YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
|
2012-03-26 21:34:29 +03:00
|
|
|
YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
|
2012-01-01 20:33:22 +03:00
|
|
|
YASM-OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp.o
|
rv40: x86 SIMD for biweight
Provide MMX, SSE2 and SSSE3 versions, with a fast-path when the weights are
multiples of 512 (which is often the case when the values round up nicely).
*_TIMER report for the 16x16 and 8x8 cases:
C:
9015 decicycles in 16, 524257 runs, 31 skips
2656 decicycles in 8, 524271 runs, 17 skips
MMX:
4156 decicycles in 16, 262090 runs, 54 skips
1206 decicycles in 8, 262131 runs, 13 skips
MMX on fast-path:
2760 decicycles in 16, 524222 runs, 66 skips
995 decicycles in 8, 524252 runs, 36 skips
SSE2:
2163 decicycles in 16, 262131 runs, 13 skips
832 decicycles in 8, 262137 runs, 7 skips
SSE2 with fast path:
1783 decicycles in 16, 524276 runs, 12 skips
711 decicycles in 8, 524283 runs, 5 skips
SSSE3:
2117 decicycles in 16, 262136 runs, 8 skips
814 decicycles in 8, 262143 runs, 1 skips
SSSE3 with fast path:
1315 decicycles in 16, 524285 runs, 3 skips
578 decicycles in 8, 524286 runs, 2 skips
This means around a 4% speedup for some sequences.
Signed-off-by: Diego Biurrun <diego@biurrun.de>
2012-01-12 02:11:15 +03:00
|
|
|
YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp.o \
|
|
|
|
x86/rv40dsp.o
|
2014-07-05 22:40:29 +03:00
|
|
|
YASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o
|
2014-10-03 02:38:01 +03:00
|
|
|
YASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
|
2014-02-12 04:28:54 +03:00
|
|
|
YASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o
|
2014-11-26 17:59:14 +02:00
|
|
|
YASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o
|
2011-10-18 21:50:49 +03:00
|
|
|
YASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o
|
2012-08-08 01:35:43 +03:00
|
|
|
YASM-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp.o
|
2013-01-16 04:00:41 +03:00
|
|
|
YASM-OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp.o
|
2012-10-06 21:28:56 +03:00
|
|
|
YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp.o
|
2014-02-18 13:52:30 +03:00
|
|
|
YASM-OBJS-$(CONFIG_VP7_DECODER) += x86/vp8dsp.o \
|
|
|
|
x86/vp8dsp_loopfilter.o
|
2013-10-29 16:45:48 +03:00
|
|
|
YASM-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp.o \
|
|
|
|
x86/vp8dsp_loopfilter.o
|
2014-02-16 21:30:50 +03:00
|
|
|
YASM-OBJS-$(CONFIG_VP9_DECODER) += x86/vp9intrapred.o \
|
|
|
|
x86/vp9itxfm.o \
|
2014-01-05 23:00:40 +03:00
|
|
|
x86/vp9lpf.o \
|
2013-12-02 00:27:16 +03:00
|
|
|
x86/vp9mc.o
|
2013-03-30 10:25:44 +03:00
|
|
|
YASM-OBJS-$(CONFIG_WEBP_DECODER) += x86/vp8dsp.o
|