1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-13 21:28:01 +02:00

Merge remote-tracking branch 'rbultje/vp9-simd'

* rbultje/vp9-simd:
  vp9: fix memory corruption if header decoding fails after size change.
  vp9/x86: use explicit register for relative stack references.
  vp9/x86: iwht4x4 (lossless) mmx.
  vp9/x86: 4x4 iadst SIMD (ssse3) variants.
  vp9/x86: 8x8 iadst SIMD (ssse3/avx) variants.

Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Michael Niedermayer 2014-01-25 01:43:50 +01:00
commit 5554c6dd45
3 changed files with 455 additions and 265 deletions

View File

@ -3772,7 +3772,8 @@ static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo
VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
// detect size changes in other threads
if (s->above_partition_ctx && (s->cols != ssrc->cols || s->rows != ssrc->rows)) {
if (s->above_partition_ctx &&
(!ssrc->above_partition_ctx || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
free_buffers(s);
}

View File

@ -166,13 +166,14 @@ itxfm_func(iadst, idct, size, opt); \
itxfm_func(idct, iadst, size, opt); \
itxfm_func(iadst, iadst, size, opt)
itxfm_func(idct, idct, 4, ssse3);
itxfm_func(idct, idct, 8, ssse3);
itxfm_func(idct, idct, 8, avx);
itxfm_funcs(4, ssse3);
itxfm_funcs(8, ssse3);
itxfm_funcs(8, avx);
itxfm_funcs(16, ssse3);
itxfm_funcs(16, avx);
itxfm_func(idct, idct, 32, ssse3);
itxfm_func(idct, idct, 32, avx);
itxfm_func(iwht, iwht, 4, mmx);
#undef itxfm_func
#undef itxfm_funcs
@ -223,6 +224,10 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
if (EXTERNAL_MMX(cpu_flags)) {
init_fpel(4, 0, 4, put, mmx);
init_fpel(3, 0, 8, put, mmx);
dsp->itxfm_add[4 /* lossless */][DCT_DCT] =
dsp->itxfm_add[4 /* lossless */][ADST_DCT] =
dsp->itxfm_add[4 /* lossless */][DCT_ADST] =
dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx;
}
if (EXTERNAL_MMXEXT(cpu_flags)) {
@ -250,8 +255,14 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
init_subpel3(0, put, ssse3);
init_subpel3(1, avg, ssse3);
dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_ssse3;
dsp->itxfm_add[TX_4X4][ADST_DCT] = ff_vp9_idct_iadst_4x4_add_ssse3;
dsp->itxfm_add[TX_4X4][DCT_ADST] = ff_vp9_iadst_idct_4x4_add_ssse3;
dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_ssse3;
if (ARCH_X86_64) {
dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3;
dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_ssse3;
dsp->itxfm_add[TX_8X8][DCT_ADST] = ff_vp9_iadst_idct_8x8_add_ssse3;
dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_ssse3;
dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_ssse3;
dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_ssse3;
dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_ssse3;
@ -268,6 +279,9 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
if (EXTERNAL_AVX(cpu_flags)) {
if (ARCH_X86_64) {
dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_avx;
dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_avx;
dsp->itxfm_add[TX_8X8][DCT_ADST] = ff_vp9_iadst_idct_8x8_add_avx;
dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_avx;
dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx;
dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_avx;
dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_avx;

File diff suppressed because it is too large Load Diff