diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c index 93453b83e3..139603c82f 100644 --- a/libavcodec/x86/vp9dsp_init.c +++ b/libavcodec/x86/vp9dsp_init.c @@ -217,10 +217,17 @@ filters_8tap_1d_fn2(avg, 32, avx2, ssse3) #undef filters_8tap_1d_fn3 #undef filter_8tap_1d_fn -void ff_vp9_loop_filter_v_16_16_ssse3(uint8_t *dst, ptrdiff_t stride, int E, int I, int H); -void ff_vp9_loop_filter_v_16_16_avx (uint8_t *dst, ptrdiff_t stride, int E, int I, int H); -void ff_vp9_loop_filter_h_16_16_ssse3(uint8_t *dst, ptrdiff_t stride, int E, int I, int H); -void ff_vp9_loop_filter_h_16_16_avx (uint8_t *dst, ptrdiff_t stride, int E, int I, int H); +#define lpf_funcs(size1, size2, opt) \ +void ff_vp9_loop_filter_v_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stride, \ + int E, int I, int H); \ +void ff_vp9_loop_filter_h_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stride, \ + int E, int I, int H) + +lpf_funcs(16, 16, sse2); +lpf_funcs(16, 16, ssse3); +lpf_funcs(16, 16, avx); + +#undef lpf_funcs #endif /* HAVE_YASM */ @@ -283,6 +290,10 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp) init_fpel(2, 1, 16, avg, sse2); init_fpel(1, 1, 32, avg, sse2); init_fpel(0, 1, 64, avg, sse2); + if (ARCH_X86_64) { + dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_sse2; + dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_sse2; + } } if (EXTERNAL_SSSE3(cpu_flags)) { diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm index 8568f1d613..d4c70f5b9c 100644 --- a/libavcodec/x86/vp9lpf.asm +++ b/libavcodec/x86/vp9lpf.asm @@ -327,11 +327,11 @@ SECTION .text %endif ; calc fm mask +%if cpuflag(ssse3) pxor m0, m0 - movd m2, Id - movd m3, Ed - pshufb m2, m0 ; I I I I ... - pshufb m3, m0 ; E E E E ... +%endif + SPLATB_REG m2, I, m0 ; I I I I ... + SPLATB_REG m3, E, m0 ; E E E E ... mova m0, [pb_80] pxor m2, m0 pxor m3, m0 @@ -383,9 +383,10 @@ SECTION .text ABSSUB_CMP m1, m9, m11, m6, m4, m5, m8 ; abs(p2 - p0) <= 1 pand m2, m1 ABSSUB m4, m10, m11, m5 ; abs(p1 - p0) +%if cpuflag(ssse3) pxor m0, m0 - movd m7, Hd - pshufb m7, m0 ; H H H H ... +%endif + SPLATB_REG m7, H, m0 ; H H H H ... pxor m7, m8 pxor m4, m8 pcmpgtb m0, m4, m7 ; abs(p1 - p0) > H (1/2 hev condition) @@ -595,6 +596,7 @@ cglobal vp9_loop_filter_h_16_16, 5,10,16, 256, dst, stride, E, I, H, mstride, ds RET %endmacro +LPF_16_16_VH sse2 LPF_16_16_VH ssse3 LPF_16_16_VH avx