1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-12-12 19:18:44 +02:00
FFmpeg/libavcodec/x86
Ilia Valiakhmetov 73d9a9a6af libavcodec/vp9: ipred_dl_32x32_16 avx2 implementation
vp9_diag_downleft_32x32_8bpp_c: 580.2
vp9_diag_downleft_32x32_8bpp_sse2: 75.6
vp9_diag_downleft_32x32_8bpp_ssse3: 73.7
vp9_diag_downleft_32x32_8bpp_avx: 72.7
vp9_diag_downleft_32x32_10bpp_c: 1101.2
vp9_diag_downleft_32x32_10bpp_sse2: 145.4
vp9_diag_downleft_32x32_10bpp_ssse3: 137.5
vp9_diag_downleft_32x32_10bpp_avx: 134.8
vp9_diag_downleft_32x32_10bpp_avx2: 94.0
vp9_diag_downleft_32x32_12bpp_c: 1108.5
vp9_diag_downleft_32x32_12bpp_sse2: 145.5
vp9_diag_downleft_32x32_12bpp_ssse3: 137.3
vp9_diag_downleft_32x32_12bpp_avx: 135.2
vp9_diag_downleft_32x32_12bpp_avx2: 94.0

~30% faster than avx implementation

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
2017-06-06 08:05:03 -04:00
..
aacencdsp_init.c aacenc: add SIMD optimizations for abs_pow34 and quantization 2016-10-18 21:41:18 +01:00
aacencdsp.asm aacenc: add SIMD optimizations for abs_pow34 and quantization 2016-10-18 21:41:18 +01:00
aacpsdsp_init.c x86/aacps: add ff_ps_stereo_interpolate_ipdopd_sse3() 2017-06-02 11:06:24 -03:00
aacpsdsp.asm x86/aacpsdsp: optimize ff_ps_mul_pair_single_sse 2017-06-04 23:29:56 -03:00
ac3dsp_downmix.asm Merge commit 'b57e38f52cc3f31a27105c28887d57cd6812c3eb' 2017-03-22 12:49:29 +01:00
ac3dsp_init.c Merge commit 'b57e38f52cc3f31a27105c28887d57cd6812c3eb' 2017-03-22 12:49:29 +01:00
ac3dsp.asm
alacdsp_init.c
alacdsp.asm
audiodsp_init.c Merge commit '12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5' 2017-03-20 22:35:07 +01:00
audiodsp.asm Merge commit '6be7944ee2ec2f045e6eb9a93237e992c8b20ac4' 2017-03-23 18:05:27 -03:00
blockdsp_init.c
blockdsp.asm
bswapdsp_init.c
bswapdsp.asm
cabac.h asm: FF_-prefix internal macros used in inline assembly 2016-06-27 17:21:18 +02:00
cavsdsp.c cavs: add a sse2 idct implementation. 2017-04-06 10:03:28 -04:00
cavsidct.asm cavs: add a sse2 idct implementation. 2017-04-06 10:03:28 -04:00
constants.c vp9mc/x86: add AVX and AVX2 MC 2016-08-03 11:00:08 +02:00
constants.h Merge commit '3c504bc3599f00bfc5923adc114beef34bce11d0' 2017-03-15 22:07:28 -03:00
dcadsp_init.c x86/dcadec: add ff_lfe_fir1_float_{sse3,avx} 2016-02-22 21:21:34 -03:00
dcadsp.asm x86: Add missing colons after assembly labels 2016-10-17 16:31:26 +02:00
dct32.asm
dct_init.c
dirac_dwt_init.c dirac_dwt: Make x86 files/functions names consistent 2016-02-05 19:30:23 -08:00
dirac_dwt.asm dirac_dwt: Make x86 files/functions names consistent 2016-02-05 19:30:23 -08:00
diracdsp_init.c x86/diracdsp: make ff_put_signed_rect_clamped_10_sse4 work on x86_32 2016-07-20 13:43:38 -03:00
diracdsp.asm x86/diracdsp: make ff_put_signed_rect_clamped_10_sse4 work on x86_32 2016-07-20 13:43:38 -03:00
dnxhdenc_init.c
dnxhdenc.asm
fdct.c
fdct.h
fdctdsp_init.c
fft_init.c Merge commit '73ff983e8dd22ccee166403d0bbbc9c1cd543622' 2016-04-12 15:42:21 +01:00
fft.asm avcodec: Extend fft to size 2^17 2016-03-04 13:51:42 +01:00
fft.h fft: Split MDCT bits off from FFT 2016-03-01 10:18:28 +01:00
flac_dsp_gpl.asm
flacdsp_init.c
flacdsp.asm
fmtconvert_init.c Merge commit 'dc40a70c5755bccfb1a1349639943e1f408bea50' 2016-06-26 15:53:00 +02:00
fmtconvert.asm
fpel.asm Merge commit '009adfd4fbdd78a890a4a65d6f141c467bb027fa' 2017-03-21 15:02:31 +01:00
fpel.h
g722dsp_init.c
g722dsp.asm
h263_loopfilter.asm
h263dsp_init.c
h264_cabac.c Merge commit '0a35f128f3c6e0ae9a0a2236c557602c108da269' 2017-04-08 14:30:13 +02:00
h264_chromamc_10bit.asm Merge commit 'e4a94d8b36c48d95a7d412c40d7b558422ff659c' 2017-03-21 15:20:45 -03:00
h264_chromamc.asm Merge commit 'e4a94d8b36c48d95a7d412c40d7b558422ff659c' 2017-03-21 15:20:45 -03:00
h264_deblock_10bit.asm avcodec/x86: deduplicate PASS8ROWS macro 2017-02-18 20:26:49 +01:00
h264_deblock.asm avcodec/h264: enable sse2 chroma deblock/loop filter functions 2017-02-27 13:22:06 +01:00
h264_idct_10bit.asm avcodec/h264: sse2 and avx 4:2:2 idct add8 10-bit functions 2016-11-30 22:58:28 +01:00
h264_idct.asm avcodec/h264: add sse2 versions of previous idct functions 2017-05-15 15:00:20 +02:00
h264_intrapred_10bit.asm Merge commit '5801f9ed245ca5ebb57b0b5183de7a24aaece133' 2017-03-23 11:58:01 +01:00
h264_intrapred_init.c h264pred: added AVX2 implementation for tm_vp8 16x16. 2017-03-20 09:45:42 -04:00
h264_intrapred.asm Merge commit '5801f9ed245ca5ebb57b0b5183de7a24aaece133' 2017-03-23 11:58:01 +01:00
h264_qpel_8bit.asm
h264_qpel_10bit.asm x86: Add missing colons after assembly labels 2016-10-17 16:31:26 +02:00
h264_qpel.c Merge commit '0361e4dcb4d394c88c33364415a3b8fe315b67d1' 2017-03-31 09:44:04 +02:00
h264_weight_10bit.asm
h264_weight.asm x86: Add missing colons after assembly labels 2016-10-17 16:31:26 +02:00
h264chroma_init.c Merge commit 'e4a94d8b36c48d95a7d412c40d7b558422ff659c' 2017-03-21 15:20:45 -03:00
h264dsp_init.c avcodec/h264: add sse2 versions of previous idct functions 2017-05-15 15:00:20 +02:00
hevc_add_res.asm x86/hevc_add_res: merge last remaining changes from 3d65359832 2017-03-31 20:49:45 -03:00
hevc_deblock.asm avcodec/x86: deduplicate PASS8ROWS macro 2017-02-18 20:26:49 +01:00
hevc_idct.asm Merge commit '112cee0241f5799edff0e4682b9e8639b046dc78' 2017-03-23 15:58:46 +01:00
hevc_mc.asm x86: Add missing colons after assembly labels 2016-10-17 16:31:26 +02:00
hevc_sao_10bit.asm
hevc_sao.asm
hevcdsp_init.c Merge commit '6d5636ad9ab6bd9bedf902051d88b7044385f88b' 2017-03-24 12:33:25 +01:00
hevcdsp.h Merge commit '6d5636ad9ab6bd9bedf902051d88b7044385f88b' 2017-03-24 12:33:25 +01:00
hpeldsp_init.c Revert "Merge commit '0a39c9ac0bfd7345fe676b4e2707d9cec3cbb553'" 2017-02-01 02:01:07 +01:00
hpeldsp_rnd_template.c asm: FF_-prefix internal macros used in inline assembly 2016-06-27 17:21:18 +02:00
hpeldsp_vp3_init.c Revert "Merge commit '0a39c9ac0bfd7345fe676b4e2707d9cec3cbb553'" 2017-02-01 02:01:07 +01:00
hpeldsp_vp3.asm Merge commit '1dfc3cf89d0eb026af28be46294b85d79499ffb5' 2017-01-31 14:49:29 -03:00
hpeldsp.asm Merge commit '1dfc3cf89d0eb026af28be46294b85d79499ffb5' 2017-01-31 14:49:29 -03:00
hpeldsp.h Revert "Merge commit '0a39c9ac0bfd7345fe676b4e2707d9cec3cbb553'" 2017-02-01 02:01:07 +01:00
huffyuvdsp_init.c lavc/huffyuvdsp: only transmit the pix_fmt instead of the whole avctx 2017-03-22 16:22:20 +01:00
huffyuvdsp.asm huffyuvdsp: move functions only used by huffyuv from lossless_videodsp 2017-01-12 22:53:05 -03:00
huffyuvencdsp_init.c huffyuvencdsp: move shared functions to a new lossless_videoencdsp context 2017-01-12 22:53:04 -03:00
huffyuvencdsp.asm huffyuvencdsp: move shared functions to a new lossless_videoencdsp context 2017-01-12 22:53:04 -03:00
idctdsp_init.c avcodec/x86/idctdsp_init: reindent 2017-05-30 13:20:44 +02:00
idctdsp.asm
idctdsp.h avcodec/x86/idctdsp: Remove duplicate include 2017-03-26 19:17:30 +02:00
imdct36.asm avcodec/x86/imdct36: fix building with nasm 2.11.05 2017-01-02 20:44:16 +01:00
inline_asm.h Merge commit '41ed7ab45fc693f7d7fc35664c0233f4c32d69bb' 2016-06-21 21:55:34 +02:00
jpeg2000dsp_init.c x86: use the new helper macros where useful 2016-02-14 20:00:21 -03:00
jpeg2000dsp.asm
lossless_audiodsp_init.c x86: lossless audio: SSE4 madd 32bits 2016-05-07 23:28:48 +02:00
lossless_audiodsp.asm x86: lossless audio: SSE4 madd 32bits 2016-05-07 23:28:48 +02:00
lossless_videodsp_init.c avcodec/lossless_videodsp: use ptrdiff_t for length parameters 2017-03-22 18:38:35 -03:00
lossless_videodsp.asm Merge commit 'd06dfaa5cbdd20acfd2364b16c0f4ae4ddb30a65' 2017-01-31 15:36:49 -03:00
lossless_videoencdsp_init.c huffyuvencdsp: move shared functions to a new lossless_videoencdsp context 2017-01-12 22:53:04 -03:00
lossless_videoencdsp.asm huffyuvencdsp: move shared functions to a new lossless_videoencdsp context 2017-01-12 22:53:04 -03:00
lpc.c Merge commit '4efab89332ea39a77145e8b15562b981d9dbde68' 2017-01-31 15:08:19 -03:00
Makefile avcodec/x86: move simple_idct to external assembly 2017-05-30 13:20:42 +02:00
mathops.h
me_cmp_init.c asm: FF_-prefix internal macros used in inline assembly 2016-06-27 17:21:18 +02:00
me_cmp.asm
mlpdsp_init.c x86: use the new helper macros where useful 2016-02-14 20:00:21 -03:00
mlpdsp.asm
mpegaudiodsp.c Merge commit '2caa93b813adc5dbb7771dfe615da826a2947d18' 2017-03-21 16:04:22 -03:00
mpegvideo.c asm: FF_-prefix internal macros used in inline assembly 2016-06-27 17:21:18 +02:00
mpegvideodsp.c Merge commit 'dc40a70c5755bccfb1a1349639943e1f408bea50' 2016-06-26 15:53:00 +02:00
mpegvideoenc_qns_template.c
mpegvideoenc_template.c asm: FF_-prefix internal macros used in inline assembly 2016-06-27 17:21:18 +02:00
mpegvideoenc.c lavc/mpegvideoenc: reformat inv_zigzag_direct16 so the zigzag pattern is visible 2017-05-19 11:17:58 +02:00
mpegvideoencdsp_init.c
mpegvideoencdsp.asm
pixblockdsp_init.c Merge commit 'de452e503734ebb0fdbce86e9d16693b3530fad3' 2017-03-20 15:58:32 +01:00
pixblockdsp.asm Merge commit 'de452e503734ebb0fdbce86e9d16693b3530fad3' 2017-03-20 15:58:32 +01:00
pngdsp_init.c
pngdsp.asm
proresdsp_init.c Merge commit '3fd22538bc0e0de84b31335266b4b1577d3d609e' 2017-03-19 15:30:13 -03:00
proresdsp.asm prores: Change type of stride parameters to ptrdiff_t 2016-08-26 11:50:21 +02:00
qpel.asm
qpeldsp_init.c
qpeldsp.asm
rnd_template.c asm: FF_-prefix internal macros used in inline assembly 2016-06-27 17:21:18 +02:00
rv34dsp_init.c x86/rv34dsp: add ff_rv34_idct_dc_add_sse2 2017-02-02 17:51:21 -03:00
rv34dsp.asm x86/rv34dsp: add ff_rv34_idct_dc_add_sse2 2017-02-02 17:51:21 -03:00
rv40dsp_init.c Merge commit 'e4a94d8b36c48d95a7d412c40d7b558422ff659c' 2017-03-21 15:20:45 -03:00
rv40dsp.asm Merge commit '41ed7ab45fc693f7d7fc35664c0233f4c32d69bb' 2016-06-21 21:55:34 +02:00
sbrdsp_init.c
sbrdsp.asm x86/aacdec: use HADDPS macro 2016-06-08 14:18:18 -03:00
simple_idct10_template.asm Merge commit '3fd22538bc0e0de84b31335266b4b1577d3d609e' 2017-03-19 15:30:13 -03:00
simple_idct10.asm
simple_idct.asm avcodec/x86: move simple_idct to external assembly 2017-05-30 13:20:42 +02:00
simple_idct.h x86/simple_idct: add explicit sse2 simple_idct_put/add versions. 2017-04-06 10:03:28 -04:00
snowdsp.c asm: FF_-prefix internal macros used in inline assembly 2016-06-27 17:21:18 +02:00
svq1enc_init.c
svq1enc.asm
synth_filter_init.c x86: use the new helper macros where useful 2016-02-14 20:00:21 -03:00
synth_filter.asm
takdsp_init.c
takdsp.asm
ttadsp_init.c avcodec/ttadsp: cosmetics 2016-08-06 18:27:01 -03:00
ttadsp.asm avcodec/ttadsp: cosmetics 2016-08-06 18:27:01 -03:00
ttaencdsp_init.c x86/ttaenc: add ff_ttaenc_filter_process_{ssse3,sse4} 2016-08-02 15:48:04 -03:00
ttaencdsp.asm x86/ttaenc: add ff_ttaenc_filter_process_{ssse3,sse4} 2016-08-02 15:48:04 -03:00
v210-init.c
v210.asm
v210enc_init.c Merge commit 'e280fe13291e9c712a5f4aa13b5263f3e8afed45' 2016-02-16 17:23:32 +00:00
v210enc.asm x86: Add missing colons after assembly labels 2016-10-17 16:31:26 +02:00
vc1dsp_init.c avcodec/x86/vc1dsp_init: Fix build failure with --disable-optimizations and clang 2017-04-27 04:25:31 +02:00
vc1dsp_loopfilter.asm x86/vc1dsp: Split the file into MC and loopfilter 2016-02-29 08:46:53 -08:00
vc1dsp_mc.asm Merge commit '2ec9fa5ec60dcd10e1cb10d8b4e4437e634ea428' 2017-03-21 14:29:52 -03:00
vc1dsp_mmx.c idct: Change type of array stride parameters to ptrdiff_t 2016-09-29 14:48:03 +02:00
vc1dsp.h
videodsp_init.c
videodsp.asm Merge commit 'b89804da9bad2d94dd95bf20ac6187447e9c17e9' 2017-03-23 18:35:49 -03:00
vorbisdsp_init.c
vorbisdsp.asm
vp3dsp_init.c Merge commit '6892df9294d93322d43255ada299507465bc93c8' 2017-03-19 18:41:26 +01:00
vp3dsp.asm Merge commit '6892df9294d93322d43255ada299507465bc93c8' 2017-03-19 18:41:26 +01:00
vp6dsp_init.c Merge commit '721d57e608dc4fd6c86f27c5ae76ef559d646220' 2017-03-19 17:15:24 -03:00
vp6dsp.asm Merge commit 'd9d26a3674f31f482f54e936fcb382160830877a' 2017-03-19 14:54:25 -03:00
vp8dsp_init.c x86/vp8dsp: add ff_vp8_idct_dc_add_sse2 2017-02-02 17:18:58 -03:00
vp8dsp_loopfilter.asm Merge commit '802727b538b484e3f9d1345bfcc4ab24cfea8898' 2017-03-19 15:18:31 -03:00
vp8dsp.asm Merge commit '802727b538b484e3f9d1345bfcc4ab24cfea8898' 2017-03-19 15:18:31 -03:00
vp9dsp_init_10bpp.c
vp9dsp_init_12bpp.c
vp9dsp_init_16bpp_template.c vp9: re-split the decoder/format/dsp interface header files. 2017-03-28 18:04:26 -04:00
vp9dsp_init_16bpp.c libavcodec/vp9: ipred_dl_32x32_16 avx2 implementation 2017-06-06 08:05:03 -04:00
vp9dsp_init.c vp9: re-split the decoder/format/dsp interface header files. 2017-03-28 18:04:26 -04:00
vp9dsp_init.h vp9: re-split the decoder/format/dsp interface header files. 2017-03-28 18:04:26 -04:00
vp9intrapred_16bpp.asm libavcodec/vp9: ipred_dl_32x32_16 avx2 implementation 2017-06-06 08:05:03 -04:00
vp9intrapred.asm
vp9itxfm_16bpp.asm
vp9itxfm_template.asm
vp9itxfm.asm x86/vp9itxfm: add missing AVX2 guards 2016-11-18 17:01:11 -03:00
vp9lpf_16bpp.asm doc: fix spelling errors 2016-10-21 23:58:47 +02:00
vp9lpf.asm vp9lpf/x86: make filter_16_h work on 32-bit. 2016-10-04 10:54:09 +02:00
vp9mc_16bpp.asm
vp9mc.asm Merge commit 'e99ecda55082cb9dde8fd349361e169dc383943a' 2017-03-16 20:25:39 +01:00
vp56_arith.h
w64xmmtest.c Merge commit 'de2ae3c1fae5a2eb539b9abd7bc2a9ca8c286ff0' 2017-03-21 14:43:53 +01:00
xvididct_init.c x86/xvididct: remove use of ff_put/add_pixels_clamped function pointer. 2017-04-06 10:03:27 -04:00
xvididct.asm
xvididct.h Merge commit '2ec9fa5ec60dcd10e1cb10d8b4e4437e634ea428' 2017-03-21 14:29:52 -03:00