1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-08-04 22:03:09 +02:00

avcodec/x86/vp9: Add AVX-512ICL for 16x16 and 32x32 10bpc inverse transforms

This commit is contained in:
Henrik Gramner
2025-05-21 14:49:17 +02:00
committed by Henrik Gramner
parent df967d095a
commit eda0ac7e5f
4 changed files with 1176 additions and 2 deletions

View File

@ -220,8 +220,8 @@ struct VP9TileData {
DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
// block reconstruction intermediates
DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2];
DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2];
DECLARE_ALIGNED(64, uint8_t, tmp_y)[64 * 64 * 2];
DECLARE_ALIGNED(64, uint8_t, tmp_uv)[2][64 * 64 * 2];
struct { int x, y; } min_mv, max_mv;
int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];

View File

@ -186,6 +186,7 @@ X86ASM-OBJS-$(CONFIG_VP9_DECODER) += x86/vp9intrapred.o \
x86/vp9itxfm.o \
x86/vp9itxfm_avx512.o \
x86/vp9itxfm_16bpp.o \
x86/vp9itxfm_16bpp_avx512.o \
x86/vp9lpf.o \
x86/vp9lpf_16bpp.o \
x86/vp9mc.o \

View File

@ -127,6 +127,8 @@ decl_itxfm_func(iwht, iwht, 4, BPC, mmxext);
#if BPC == 10
decl_itxfm_func(idct, idct, 4, BPC, mmxext);
decl_itxfm_funcs(4, BPC, ssse3);
decl_itxfm_funcs(16, BPC, avx512icl);
decl_itxfm_func(idct, idct, 32, BPC, avx512icl);
#else
decl_itxfm_func(idct, idct, 4, BPC, sse2);
#endif
@ -233,6 +235,12 @@ av_cold void INIT_FUNC(VP9DSPContext *dsp, int bitexact)
#endif
}
#if ARCH_X86_64 && BPC == 10
if (EXTERNAL_AVX512ICL(cpu_flags)) {
init_itx_funcs(TX_16X16, 16, BPC, avx512icl);
init_itx_func_one(TX_32X32, idct, idct, 32, BPC, avx512icl);
}
#endif
#endif /* HAVE_X86ASM */
ff_vp9dsp_init_16bpp_x86(dsp);

File diff suppressed because it is too large Load Diff