You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-04 22:03:09 +02:00
avcodec/x86/vp9: Add AVX-512ICL for 16x16 and 32x32 8bpc inverse transforms
This commit is contained in:
committed by
Henrik Gramner
parent
b6803bf104
commit
fd18ae88ae
@ -184,6 +184,7 @@ X86ASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp.o
|
|||||||
X86ASM-OBJS-$(CONFIG_VP9_DECODER) += x86/vp9intrapred.o \
|
X86ASM-OBJS-$(CONFIG_VP9_DECODER) += x86/vp9intrapred.o \
|
||||||
x86/vp9intrapred_16bpp.o \
|
x86/vp9intrapred_16bpp.o \
|
||||||
x86/vp9itxfm.o \
|
x86/vp9itxfm.o \
|
||||||
|
x86/vp9itxfm_avx512.o \
|
||||||
x86/vp9itxfm_16bpp.o \
|
x86/vp9itxfm_16bpp.o \
|
||||||
x86/vp9lpf.o \
|
x86/vp9lpf.o \
|
||||||
x86/vp9lpf_16bpp.o \
|
x86/vp9lpf_16bpp.o \
|
||||||
|
@ -114,7 +114,9 @@ itxfm_func(idct, idct, 32, ssse3);
|
|||||||
itxfm_func(idct, idct, 32, avx);
|
itxfm_func(idct, idct, 32, avx);
|
||||||
itxfm_func(iwht, iwht, 4, mmx);
|
itxfm_func(iwht, iwht, 4, mmx);
|
||||||
itxfm_funcs(16, avx2);
|
itxfm_funcs(16, avx2);
|
||||||
|
itxfm_funcs(16, avx512icl);
|
||||||
itxfm_func(idct, idct, 32, avx2);
|
itxfm_func(idct, idct, 32, avx2);
|
||||||
|
itxfm_func(idct, idct, 32, avx512icl);
|
||||||
|
|
||||||
#undef itxfm_func
|
#undef itxfm_func
|
||||||
#undef itxfm_funcs
|
#undef itxfm_funcs
|
||||||
@ -406,6 +408,19 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
|
|||||||
init_ipred(32, avx2, tm, TM_VP8);
|
init_ipred(32, avx2, tm, TM_VP8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if ARCH_X86_64
|
||||||
|
if (EXTERNAL_AVX512ICL(cpu_flags)) {
|
||||||
|
dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx512icl;
|
||||||
|
dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_avx512icl;
|
||||||
|
dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_avx512icl;
|
||||||
|
dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx512icl;
|
||||||
|
dsp->itxfm_add[TX_32X32][ADST_ADST] =
|
||||||
|
dsp->itxfm_add[TX_32X32][ADST_DCT] =
|
||||||
|
dsp->itxfm_add[TX_32X32][DCT_ADST] =
|
||||||
|
dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx512icl;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#undef init_fpel
|
#undef init_fpel
|
||||||
#undef init_subpel1
|
#undef init_subpel1
|
||||||
#undef init_subpel2
|
#undef init_subpel2
|
||||||
|
1629
libavcodec/x86/vp9itxfm_avx512.asm
Normal file
1629
libavcodec/x86/vp9itxfm_avx512.asm
Normal file
File diff suppressed because it is too large
Load Diff
@ -131,4 +131,6 @@
|
|||||||
|
|
||||||
#define LOCAL_ALIGNED_32(t, v, ...) E1(LOCAL_ALIGNED_D(32, t, v, __VA_ARGS__,,))
|
#define LOCAL_ALIGNED_32(t, v, ...) E1(LOCAL_ALIGNED_D(32, t, v, __VA_ARGS__,,))
|
||||||
|
|
||||||
|
#define LOCAL_ALIGNED_64(t, v, ...) E1(LOCAL_ALIGNED_D(64, t, v, __VA_ARGS__,,))
|
||||||
|
|
||||||
#endif /* AVUTIL_MEM_INTERNAL_H */
|
#endif /* AVUTIL_MEM_INTERNAL_H */
|
||||||
|
@ -310,13 +310,13 @@ static int is_zero(const int16_t *c, int sz)
|
|||||||
|
|
||||||
static void check_itxfm(void)
|
static void check_itxfm(void)
|
||||||
{
|
{
|
||||||
LOCAL_ALIGNED_32(uint8_t, src, [32 * 32 * 2]);
|
LOCAL_ALIGNED_64(uint8_t, src, [32 * 32 * 2]);
|
||||||
LOCAL_ALIGNED_32(uint8_t, dst, [32 * 32 * 2]);
|
LOCAL_ALIGNED_64(uint8_t, dst, [32 * 32 * 2]);
|
||||||
LOCAL_ALIGNED_32(uint8_t, dst0, [32 * 32 * 2]);
|
LOCAL_ALIGNED_64(uint8_t, dst0, [32 * 32 * 2]);
|
||||||
LOCAL_ALIGNED_32(uint8_t, dst1, [32 * 32 * 2]);
|
LOCAL_ALIGNED_64(uint8_t, dst1, [32 * 32 * 2]);
|
||||||
LOCAL_ALIGNED_32(int16_t, coef, [32 * 32 * 2]);
|
LOCAL_ALIGNED_64(int16_t, coef, [32 * 32 * 2]);
|
||||||
LOCAL_ALIGNED_32(int16_t, subcoef0, [32 * 32 * 2]);
|
LOCAL_ALIGNED_64(int16_t, subcoef0, [32 * 32 * 2]);
|
||||||
LOCAL_ALIGNED_32(int16_t, subcoef1, [32 * 32 * 2]);
|
LOCAL_ALIGNED_64(int16_t, subcoef1, [32 * 32 * 2]);
|
||||||
declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
|
declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
|
||||||
VP9DSPContext dsp;
|
VP9DSPContext dsp;
|
||||||
int y, x, tx, txtp, bit_depth, sub;
|
int y, x, tx, txtp, bit_depth, sub;
|
||||||
|
Reference in New Issue
Block a user