1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-12-07 11:13:41 +02:00
FFmpeg/libavcodec/x86
Linjie Fu 8b8492452d lavc/x86/hevc_add_res: Fix coeff overflow in ADD_RES_SSE_16_32_8
Fix overflow for coeff -32768 in function ADD_RES_SSE_16_32_8 with no
performance drop.(SSE2/AVX/AVX2)

./checkasm --test=hevc_add_res --bench

Mainline:
  - hevc_add_res.add_residual [OK]
    hevc_add_res_32x32_8_sse2: 127.5
    hevc_add_res_32x32_8_avx: 127.0
    hevc_add_res_32x32_8_avx2: 86.5

Add overflow test case:
  - hevc_add_res.add_residual [FAILED]

After:
  - hevc_add_res.add_residual [OK]
    hevc_add_res_32x32_8_sse2: 126.8
    hevc_add_res_32x32_8_avx: 128.3
    hevc_add_res_32x32_8_avx2: 86.8

Signed-off-by: Xu Guangxin <guangxin.xu@intel.com>
Signed-off-by: Linjie Fu <linjie.fu@intel.com>
Signed-off-by: Anton Khirnov <anton@khirnov.net>
2020-03-27 10:57:40 +01:00
..
aacencdsp_init.c
aacencdsp.asm
aacpsdsp_init.c
aacpsdsp.asm
ac3dsp_downmix.asm
ac3dsp_init.c
ac3dsp.asm
alacdsp_init.c
alacdsp.asm
audiodsp_init.c
audiodsp.asm
blockdsp_init.c
blockdsp.asm
bswapdsp_init.c
bswapdsp.asm avcodec/x86/bswapdsp : use macro for 128 bits constants loading in xmm or ymm 2017-12-02 18:25:25 +01:00
cabac.h
cavsdsp.c
cavsidct.asm
celt_pvq_init.c celt_pvq_init: only build when CONFIG_OPUS_ENCODER is enabled 2019-03-31 23:36:43 +02:00
celt_pvq_search.asm x86/opus_dsp: rename to celt_pvq 2019-03-31 23:35:00 +02:00
constants.c x86/constants: make pb_80 32 byte wide 2017-11-21 10:57:03 -03:00
constants.h x86/constants: make pb_80 32 byte wide 2017-11-21 10:57:03 -03:00
dcadsp_init.c
dcadsp.asm
dct32.asm
dct_init.c
dirac_dwt_init.c
dirac_dwt.asm
diracdsp_init.c
diracdsp.asm avcodec/x86/diracdsp: Fix high bits on Windows x86_64 2020-01-31 00:04:22 +01:00
dnxhdenc_init.c
dnxhdenc.asm
exrdsp_init.c
exrdsp.asm avcodec/x86/exrdsp : use ymm constant for pb_80 2017-11-23 20:00:13 +01:00
fdct.c
fdct.h
fdctdsp_init.c
fft_init.c
fft.asm avcodec/fft: fix INTERL macro on 3dnow 2017-11-25 13:11:45 -03:00
fft.h
flac_dsp_gpl.asm
flacdsp_init.c
flacdsp.asm
fmtconvert_init.c
fmtconvert.asm
fpel.asm
fpel.h
g722dsp_init.c
g722dsp.asm
h263_loopfilter.asm
h263dsp_init.c
h264_cabac.c
h264_chromamc_10bit.asm
h264_chromamc.asm
h264_deblock_10bit.asm
h264_deblock.asm
h264_idct_10bit.asm
h264_idct.asm h264_idct: enable unmacro on newer NASM versions 2018-02-12 10:50:37 +00:00
h264_intrapred_10bit.asm
h264_intrapred_init.c
h264_intrapred.asm
h264_qpel_8bit.asm
h264_qpel_10bit.asm
h264_qpel.c
h264_weight_10bit.asm
h264_weight.asm
h264chroma_init.c
h264dsp_init.c avcodec/h264dsp: change loop filter stride argument to ptrdiff_t 2019-02-20 15:27:43 -03:00
hevc_add_res.asm lavc/x86/hevc_add_res: Fix coeff overflow in ADD_RES_SSE_16_32_8 2020-03-27 10:57:40 +01:00
hevc_deblock.asm
hevc_idct.asm
hevc_mc.asm
hevc_sao_10bit.asm avcodec: increase AV_INPUT_BUFFER_PADDING_SIZE to 64 2018-01-11 23:46:31 -03:00
hevc_sao.asm avcodec: increase AV_INPUT_BUFFER_PADDING_SIZE to 64 2018-01-11 23:46:31 -03:00
hevcdsp_init.c
hevcdsp.h
hpeldsp_init.c
hpeldsp_rnd_template.c
hpeldsp_vp3_init.c
hpeldsp_vp3.asm
hpeldsp.asm
hpeldsp.h
huffyuvdsp_init.c avcodec/huffyuvdsp : add add_int16 AVX2 func 2017-11-21 09:41:58 +01:00
huffyuvdsp_template.asm avcodec/huffyuvdsp : add add_int16 AVX2 func 2017-11-21 09:41:58 +01:00
huffyuvdsp.asm avcodec/huffyuvdsp : add add_int16 AVX2 func 2017-11-21 09:41:58 +01:00
huffyuvencdsp_init.c avcodec/huffyuvdspenc : add diff_int16 AVX2 func 2017-11-21 09:42:08 +01:00
huffyuvencdsp.asm avcodec/huffyuvdspenc : add diff_int16 AVX2 func 2017-11-21 09:42:08 +01:00
idctdsp_init.c mpeg4video: Add support for MPEG-4 Simple Studio Profile. 2018-04-02 13:06:23 +01:00
idctdsp.asm
idctdsp.h
imdct36.asm
inline_asm.h
jpeg2000dsp_init.c
jpeg2000dsp.asm
lossless_audiodsp_init.c
lossless_audiodsp.asm
lossless_videodsp_init.c x86/lossless_videodsp: rename ff_add_left_pred_int16_sse4 to ff_add_left_pred_int16_unaligned_ssse3 2017-12-10 00:51:01 -03:00
lossless_videodsp.asm x86/lossless_videodsp: rename ff_add_left_pred_int16_sse4 to ff_add_left_pred_int16_unaligned_ssse3 2017-12-10 00:51:01 -03:00
lossless_videoencdsp_init.c avcodec/utvideoenc : add SIMD (avx) for sub_left_prediction 2018-01-28 20:23:11 +01:00
lossless_videoencdsp.asm avcodec/utvideoenc : add SIMD (avx) for sub_left_prediction 2018-01-28 20:23:11 +01:00
lpc.c
Makefile avcodec/Makefile: add missing pngdsp dependency to the lscr decoder 2019-05-14 16:47:56 -03:00
mathops.h
mdct15_init.c mdct15: simplify x86 exptab permutation 2018-05-07 23:44:40 +01:00
mdct15.asm mdct15: simplify the fft15 x86 SIMD 2018-05-07 23:27:41 +01:00
me_cmp_init.c
me_cmp.asm
mlpdsp_init.c
mlpdsp.asm
mpegaudiodsp.c
mpegvideo.c
mpegvideodsp.c
mpegvideoenc_qns_template.c
mpegvideoenc_template.c
mpegvideoenc.c
mpegvideoencdsp_init.c
mpegvideoencdsp.asm
opusdsp_init.c x85/opusdsp: enable the functions on all FMA3 CPUs 2019-09-11 20:50:45 -03:00
opusdsp.asm x86/opusdps: clear the high bits from some gprs 2019-09-11 20:42:31 -03:00
pixblockdsp_init.c
pixblockdsp.asm
pngdsp_init.c
pngdsp.asm
proresdsp_init.c avcodec/proresdsp indent after prev commit 2018-12-02 12:55:35 +01:00
proresdsp.asm
qpel.asm
qpeldsp_init.c
qpeldsp.asm
rnd_template.c
rv34dsp_init.c
rv34dsp.asm
rv40dsp_init.c
rv40dsp.asm
sbcdsp_init.c sbcenc: add MMX optimizations 2018-03-07 22:26:53 +01:00
sbcdsp.asm sbcenc: add MMX optimizations 2018-03-07 22:26:53 +01:00
sbrdsp_init.c
sbrdsp.asm
simple_idct10_template.asm
simple_idct10.asm
simple_idct.asm
simple_idct.h
snowdsp.c
svq1enc_init.c
svq1enc.asm
synth_filter_init.c
synth_filter.asm
takdsp_init.c
takdsp.asm
ttadsp_init.c
ttadsp.asm
ttaencdsp_init.c
ttaencdsp.asm
utvideodsp_init.c avcodec/utvideodsp : add avx2 version for the dsp 2017-11-21 09:00:42 +01:00
utvideodsp.asm x86/utvideodsp: reuse shared constants 2017-11-21 10:57:14 -03:00
v210-init.c libavcodec Adding ff_v210_planar_unpack AVX2 2019-05-02 19:21:37 +02:00
v210.asm x86/v210dec: use named registers 2019-05-03 01:20:18 -03:00
v210enc_init.c
v210enc.asm
vc1dsp_init.c
vc1dsp_loopfilter.asm
vc1dsp_mc.asm
vc1dsp_mmx.c
vc1dsp.h
videodsp_init.c
videodsp.asm
vorbisdsp_init.c
vorbisdsp.asm
vp3dsp_init.c vp4: prevent unaligned memory access in loop filter 2019-10-30 10:06:38 +01:00
vp3dsp.asm
vp6dsp_init.c
vp6dsp.asm
vp8dsp_init.c
vp8dsp_loopfilter.asm
vp8dsp.asm
vp9dsp_init_10bpp.c
vp9dsp_init_12bpp.c
vp9dsp_init_16bpp_template.c
vp9dsp_init_16bpp.c
vp9dsp_init.c
vp9dsp_init.h
vp9intrapred_16bpp.asm
vp9intrapred.asm
vp9itxfm_16bpp.asm
vp9itxfm_template.asm
vp9itxfm.asm
vp9lpf_16bpp.asm
vp9lpf.asm
vp9mc_16bpp.asm
vp9mc.asm
vp56_arith.h
w64xmmtest.c
xvididct_init.c
xvididct.asm
xvididct.h