1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-12-07 11:13:41 +02:00
FFmpeg/libavcodec/x86
James Darnley 5336887867 avcodec/h264: sse2, avx h luma mbaff deblock/loop filter
x86-64 only

Yorkfield:
- sse2: ~2.17x (434 vs. 200 cycles)

Nehalem:
- sse2: ~2.94x (409 vs. 139 cycles)

Skylake:
- sse2: ~3.10x (370 vs. 119 cycles)
- avx:  ~3.29x (370 vs. 112 cycles)
2017-02-18 20:26:52 +01:00
..
aacencdsp_init.c aacenc: add SIMD optimizations for abs_pow34 and quantization 2016-10-18 21:41:18 +01:00
aacencdsp.asm aacenc: add SIMD optimizations for abs_pow34 and quantization 2016-10-18 21:41:18 +01:00
aacpsdsp_init.c
aacpsdsp.asm x86/aacpsdsp: optimize add_squares loop 2016-06-14 12:41:23 -03:00
ac3dsp_init.c Merge commit '4efab89332ea39a77145e8b15562b981d9dbde68' 2017-01-31 15:08:19 -03:00
ac3dsp.asm
alacdsp_init.c x86/alacdsp: add simd optimized functions 2015-10-06 20:22:00 -03:00
alacdsp.asm x86/alacdsp: add simd optimized functions 2015-10-06 20:22:00 -03:00
audiodsp_init.c Merge commit 'dc40a70c5755bccfb1a1349639943e1f408bea50' 2016-06-26 15:53:00 +02:00
audiodsp.asm
blockdsp_init.c blockdsp: reindent after parameter removal 2015-10-03 23:34:56 +02:00
blockdsp.asm
bswapdsp_init.c
bswapdsp.asm
cabac.h asm: FF_-prefix internal macros used in inline assembly 2016-06-27 17:21:18 +02:00
cavsdsp.c avcodec/x86/cavsdsp: silence -Wunused-variable on --disable-mmx 2015-09-24 04:27:50 +02:00
constants.c avcodec/v210: add avx2 version of the 10-bit line encoder 2016-01-17 16:03:43 +01:00
constants.h avcodec/v210: add avx2 version of the 10-bit line encoder 2016-01-17 16:03:43 +01:00
dcadsp_init.c x86/dcadec: add ff_lfe_fir1_float_{sse3,avx} 2016-02-22 21:21:34 -03:00
dcadsp.asm x86/dcadsp: optimize lfe_fir0_float_fma3 on x86_32 2016-07-05 17:48:20 -03:00
dct32.asm
dct_init.c
dirac_dwt_init.c dirac_dwt: Make x86 files/functions names consistent 2016-02-05 19:30:23 -08:00
dirac_dwt.asm dirac_dwt: Make x86 files/functions names consistent 2016-02-05 19:30:23 -08:00
diracdsp_init.c x86/diracdsp: make ff_put_signed_rect_clamped_10_sse4 work on x86_32 2016-07-20 13:43:38 -03:00
diracdsp.asm x86/diracdsp: make ff_put_signed_rect_clamped_10_sse4 work on x86_32 2016-07-20 13:43:38 -03:00
dnxhdenc_init.c
dnxhdenc.asm
fdct.c
fdct.h
fdctdsp_init.c
fft_init.c Merge commit '73ff983e8dd22ccee166403d0bbbc9c1cd543622' 2016-04-12 15:42:21 +01:00
fft.asm avcodec: Extend fft to size 2^17 2016-03-04 13:51:42 +01:00
fft.h fft: Split MDCT bits off from FFT 2016-03-01 10:18:28 +01:00
flac_dsp_gpl.asm
flacdsp_init.c
flacdsp.asm
fmtconvert_init.c Merge commit 'dc40a70c5755bccfb1a1349639943e1f408bea50' 2016-06-26 15:53:00 +02:00
fmtconvert.asm avcodec/x86/fmtconvert: Add emms to int32_to_float_fmul_array8_sse() 2016-01-15 17:08:37 +01:00
fpel.asm
fpel.h x86: fpel: Remove erroneous ff_put_pixels8_mmxext prototype 2015-10-19 16:52:37 -07:00
g722dsp_init.c
g722dsp.asm
h263_loopfilter.asm
h263dsp_init.c
h264_chromamc_10bit.asm
h264_chromamc.asm Merge commit '41ed7ab45fc693f7d7fc35664c0233f4c32d69bb' 2016-06-21 21:55:34 +02:00
h264_deblock_10bit.asm avcodec/x86: deduplicate PASS8ROWS macro 2017-02-18 20:26:49 +01:00
h264_deblock.asm avcodec/h264: sse2, avx h luma mbaff deblock/loop filter 2017-02-18 20:26:52 +01:00
h264_i386.h asm: FF_-prefix internal macros used in inline assembly 2016-06-27 17:21:18 +02:00
h264_idct_10bit.asm avcodec/h264: sse2 and avx 4:2:2 idct add8 10-bit functions 2016-11-30 22:58:28 +01:00
h264_idct.asm avcodec/h264: mmx 4:2:2 idct add8 function 2016-11-30 22:58:27 +01:00
h264_intrapred_10bit.asm vp9: 16bpp tm/dc/h/v intra pred simd (mostly sse2) functions. 2015-10-03 14:42:39 -04:00
h264_intrapred_init.c
h264_intrapred.asm
h264_qpel_8bit.asm
h264_qpel_10bit.asm vp9: 10/12bpp SIMD (sse2/ssse3/avx) for directional intra prediction. 2015-10-03 14:42:39 -04:00
h264_qpel.c Merge commit '4efab89332ea39a77145e8b15562b981d9dbde68' 2017-01-31 15:08:19 -03:00
h264_weight_10bit.asm
h264_weight.asm x86/h264_weight: use appropriate register size for weight parameters 2016-09-23 16:40:57 +02:00
h264chroma_init.c
h264dsp_init.c avcodec/h264: sse2, avx h luma mbaff deblock/loop filter 2017-02-18 20:26:52 +01:00
hevc_deblock.asm avcodec/x86: deduplicate PASS8ROWS macro 2017-02-18 20:26:49 +01:00
hevc_idct.asm Merge commit 'fca3c3b61952aacc45e9ca54d86a762946c21942' 2017-01-31 16:53:37 +01:00
hevc_mc.asm hevcdsp: use a macro for .rodata section 2015-12-11 16:19:30 +01:00
hevc_res_add.asm Merge commit '1bd890ad173d79e7906c5e1d06bf0a06cca4519d' 2017-01-31 15:31:34 +01:00
hevc_sao_10bit.asm x86/hevc_sao: add ff_hevc_sao_edge_filter_{8,16}_{10,12} 2015-12-20 17:01:15 -03:00
hevc_sao.asm x86/hevc_sao: move 10/12bit functions into a separate file 2015-09-30 02:59:55 -03:00
hevcdsp_init.c lavc/hevc: remove a few random spaces to reduce diff with libav 2017-01-31 17:02:24 +01:00
hevcdsp.h Merge commit 'fca3c3b61952aacc45e9ca54d86a762946c21942' 2017-01-31 16:53:37 +01:00
hpeldsp_init.c Revert "Merge commit '0a39c9ac0bfd7345fe676b4e2707d9cec3cbb553'" 2017-02-01 02:01:07 +01:00
hpeldsp_rnd_template.c asm: FF_-prefix internal macros used in inline assembly 2016-06-27 17:21:18 +02:00
hpeldsp_vp3_init.c Revert "Merge commit '0a39c9ac0bfd7345fe676b4e2707d9cec3cbb553'" 2017-02-01 02:01:07 +01:00
hpeldsp_vp3.asm Merge commit '1dfc3cf89d0eb026af28be46294b85d79499ffb5' 2017-01-31 14:49:29 -03:00
hpeldsp.asm Merge commit '1dfc3cf89d0eb026af28be46294b85d79499ffb5' 2017-01-31 14:49:29 -03:00
hpeldsp.h Revert "Merge commit '0a39c9ac0bfd7345fe676b4e2707d9cec3cbb553'" 2017-02-01 02:01:07 +01:00
huffyuvdsp_init.c huffyuvdsp: move functions only used by huffyuv from lossless_videodsp 2017-01-12 22:53:05 -03:00
huffyuvdsp.asm huffyuvdsp: move functions only used by huffyuv from lossless_videodsp 2017-01-12 22:53:05 -03:00
huffyuvencdsp_init.c huffyuvencdsp: move shared functions to a new lossless_videoencdsp context 2017-01-12 22:53:04 -03:00
huffyuvencdsp.asm huffyuvencdsp: move shared functions to a new lossless_videoencdsp context 2017-01-12 22:53:04 -03:00
idctdsp_init.c x86: simple_idct: 12bits versions 2015-10-13 15:34:32 +02:00
idctdsp.asm
idctdsp.h
imdct36.asm avcodec/x86/imdct36: fix building with nasm 2.11.05 2017-01-02 20:44:16 +01:00
inline_asm.h Merge commit '41ed7ab45fc693f7d7fc35664c0233f4c32d69bb' 2016-06-21 21:55:34 +02:00
jpeg2000dsp_init.c x86: use the new helper macros where useful 2016-02-14 20:00:21 -03:00
jpeg2000dsp.asm
lossless_audiodsp_init.c x86: lossless audio: SSE4 madd 32bits 2016-05-07 23:28:48 +02:00
lossless_audiodsp.asm x86: lossless audio: SSE4 madd 32bits 2016-05-07 23:28:48 +02:00
lossless_videodsp_init.c Merge commit 'd06dfaa5cbdd20acfd2364b16c0f4ae4ddb30a65' 2017-01-31 15:36:49 -03:00
lossless_videodsp.asm Merge commit 'd06dfaa5cbdd20acfd2364b16c0f4ae4ddb30a65' 2017-01-31 15:36:49 -03:00
lossless_videoencdsp_init.c huffyuvencdsp: move shared functions to a new lossless_videoencdsp context 2017-01-12 22:53:04 -03:00
lossless_videoencdsp.asm huffyuvencdsp: move shared functions to a new lossless_videoencdsp context 2017-01-12 22:53:04 -03:00
lpc.c Merge commit '4efab89332ea39a77145e8b15562b981d9dbde68' 2017-01-31 15:08:19 -03:00
Makefile Merge commit '1dfc3cf89d0eb026af28be46294b85d79499ffb5' 2017-01-31 14:49:29 -03:00
mathops.h
me_cmp_init.c asm: FF_-prefix internal macros used in inline assembly 2016-06-27 17:21:18 +02:00
me_cmp.asm
mlpdsp_init.c x86: use the new helper macros where useful 2016-02-14 20:00:21 -03:00
mlpdsp.asm
mpegaudiodsp.c avcodec/x86/mpegaudiodsp: silence -Wunused-variable on --disable-mmx 2015-09-22 23:45:03 +02:00
mpegvideo.c asm: FF_-prefix internal macros used in inline assembly 2016-06-27 17:21:18 +02:00
mpegvideodsp.c Merge commit 'dc40a70c5755bccfb1a1349639943e1f408bea50' 2016-06-26 15:53:00 +02:00
mpegvideoenc_qns_template.c
mpegvideoenc_template.c asm: FF_-prefix internal macros used in inline assembly 2016-06-27 17:21:18 +02:00
mpegvideoenc.c avcodec/x86/mpegvideoenc: silence -Wunused-function on --disable-mmx 2015-09-19 23:26:57 +02:00
mpegvideoencdsp_init.c
mpegvideoencdsp.asm
pixblockdsp_init.c
pixblockdsp.asm pixblockdsp: x86: Condense diff_pixels_* to a shared macro 2015-11-07 14:31:34 -08:00
pngdsp_init.c
pngdsp.asm
proresdsp_init.c
proresdsp.asm x86inc: Add debug symbols indicating sizes of compiled functions 2016-01-23 20:46:28 +01:00
qpel.asm
qpeldsp_init.c
qpeldsp.asm
rnd_template.c asm: FF_-prefix internal macros used in inline assembly 2016-06-27 17:21:18 +02:00
rv34dsp_init.c x86/rv34dsp: add ff_rv34_idct_dc_add_sse2 2017-02-02 17:51:21 -03:00
rv34dsp.asm x86/rv34dsp: add ff_rv34_idct_dc_add_sse2 2017-02-02 17:51:21 -03:00
rv40dsp_init.c all: fix -Wextra-semi reported on clang 2015-10-24 17:58:17 -04:00
rv40dsp.asm Merge commit '41ed7ab45fc693f7d7fc35664c0233f4c32d69bb' 2016-06-21 21:55:34 +02:00
sbrdsp_init.c
sbrdsp.asm x86/aacdec: use HADDPS macro 2016-06-08 14:18:18 -03:00
simple_idct10_template.asm x86: simple_idct10_template: use const 2015-10-13 22:52:33 +02:00
simple_idct10.asm x86inc: Add debug symbols indicating sizes of compiled functions 2016-01-21 23:19:46 +01:00
simple_idct.c
simple_idct.h x86: simple_idct: 12bits versions 2015-10-13 15:34:32 +02:00
snowdsp.c asm: FF_-prefix internal macros used in inline assembly 2016-06-27 17:21:18 +02:00
svq1enc_init.c
svq1enc.asm
synth_filter_init.c x86: use the new helper macros where useful 2016-02-14 20:00:21 -03:00
synth_filter.asm avcodec/synth_filter: split off remaining code from dcadec files 2016-01-25 14:57:38 -03:00
takdsp_init.c avcodec/takdec: add x86 SIMD for rest of decorrelation modes 2015-10-09 21:38:15 +02:00
takdsp.asm x86/takdsp: use arithmetic shift instructions 2015-10-09 23:52:39 -03:00
ttadsp_init.c avcodec/ttadsp: cosmetics 2016-08-06 18:27:01 -03:00
ttadsp.asm avcodec/ttadsp: cosmetics 2016-08-06 18:27:01 -03:00
ttaencdsp_init.c x86/ttaenc: add ff_ttaenc_filter_process_{ssse3,sse4} 2016-08-02 15:48:04 -03:00
ttaencdsp.asm x86/ttaenc: add ff_ttaenc_filter_process_{ssse3,sse4} 2016-08-02 15:48:04 -03:00
v210-init.c
v210.asm
v210enc_init.c Merge commit 'e280fe13291e9c712a5f4aa13b5263f3e8afed45' 2016-02-16 17:23:32 +00:00
v210enc.asm Merge commit 'eafb05fcf37cd19a910ca3b17824384f9006bc0a' 2016-02-16 17:02:56 +00:00
vc1dsp_init.c x86: vc1dsp: Convert vc1_inv_trans_*_dc to NASM format 2016-02-01 17:01:11 -08:00
vc1dsp_loopfilter.asm x86/vc1dsp: Split the file into MC and loopfilter 2016-02-29 08:46:53 -08:00
vc1dsp_mc.asm avcodec/x86/vc1dsp_mc: Fix build with NASM 2.09.10 2017-01-02 22:37:55 +01:00
vc1dsp_mmx.c asm: FF_-prefix internal macros used in inline assembly 2016-06-27 17:21:18 +02:00
vc1dsp.h
videodsp_init.c
videodsp.asm videodsp: fix 1-byte overread in top/bottom READ_NUM_BYTES iterations. 2016-01-18 11:12:47 -05:00
vorbisdsp_init.c
vorbisdsp.asm
vp3dsp_init.c
vp3dsp.asm
vp6dsp_init.c Merge commit 'dc40a70c5755bccfb1a1349639943e1f408bea50' 2016-06-26 15:53:00 +02:00
vp6dsp.asm
vp8dsp_init.c x86/vp8dsp: add ff_vp8_idct_dc_add_sse2 2017-02-02 17:18:58 -03:00
vp8dsp_loopfilter.asm
vp8dsp.asm x86/vp8dsp: add ff_vp8_idct_dc_add_sse2 2017-02-02 17:18:58 -03:00
vp9dsp_init_10bpp.c vp9: add subpel MC SIMD for 10/12bpp. 2015-09-16 21:11:34 -04:00
vp9dsp_init_12bpp.c vp9: add subpel MC SIMD for 10/12bpp. 2015-09-16 21:11:34 -04:00
vp9dsp_init_16bpp_template.c x86: use the new helper macros where useful 2016-02-14 20:00:21 -03:00
vp9dsp_init_16bpp.c x86: use the new helper macros where useful 2016-02-14 20:00:21 -03:00
vp9dsp_init.c vp9: add avx2 iadst16 implementations. 2016-11-15 11:01:36 -05:00
vp9dsp_init.h all: fix -Wextra-semi reported on clang 2015-10-24 17:58:17 -04:00
vp9intrapred_16bpp.asm vp9: don't keep a stack pointer if we don't need it. 2015-10-07 08:55:19 -04:00
vp9intrapred.asm
vp9itxfm_16bpp.asm x86/vp9itxfm: fix register clobbering in ff_vp9_idct_idct_4x4_add_12_sse2 2015-10-13 20:21:33 -03:00
vp9itxfm_template.asm vp9: add x86 simd (sse2/ssse3) for iadst4 10bpp functions. 2015-10-13 11:05:58 -04:00
vp9itxfm.asm x86/vp9itxfm: add missing AVX2 guards 2016-11-18 17:01:11 -03:00
vp9lpf_16bpp.asm doc: fix spelling errors 2016-10-21 23:58:47 +02:00
vp9lpf.asm vp9: add mxext versions of the single-block (w=8,npx=8) h/v loopfilters. 2016-07-26 15:59:07 -04:00
vp9mc_16bpp.asm vp9: sse2/ssse3/avx 16bpp loopfilter x86 simd. 2015-10-03 14:42:39 -04:00
vp9mc.asm x86/vp9mc: fix string concatenation of fullpel function names 2015-09-20 12:32:27 -03:00
vp56_arith.h
w64xmmtest.c Merge commit '4a081f224e12f4227ae966bcbdd5384f22121ecf' 2016-11-13 17:30:33 +01:00
xvididct_init.c
xvididct.asm
xvididct.h