mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-28 20:53:54 +02:00
vp9lpf/x86: add ff_vp9_loop_filter_[vh]_16_16_sse2().
Similar gains in performance as the SSSE3 version Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Anton Khirnov <anton@khirnov.net>
This commit is contained in:
parent
a692724c58
commit
1f451eed60
@ -217,10 +217,17 @@ filters_8tap_1d_fn2(avg, 32, avx2, ssse3)
|
||||
#undef filters_8tap_1d_fn3
|
||||
#undef filter_8tap_1d_fn
|
||||
|
||||
void ff_vp9_loop_filter_v_16_16_ssse3(uint8_t *dst, ptrdiff_t stride, int E, int I, int H);
|
||||
void ff_vp9_loop_filter_v_16_16_avx (uint8_t *dst, ptrdiff_t stride, int E, int I, int H);
|
||||
void ff_vp9_loop_filter_h_16_16_ssse3(uint8_t *dst, ptrdiff_t stride, int E, int I, int H);
|
||||
void ff_vp9_loop_filter_h_16_16_avx (uint8_t *dst, ptrdiff_t stride, int E, int I, int H);
|
||||
#define lpf_funcs(size1, size2, opt) \
|
||||
void ff_vp9_loop_filter_v_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stride, \
|
||||
int E, int I, int H); \
|
||||
void ff_vp9_loop_filter_h_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stride, \
|
||||
int E, int I, int H)
|
||||
|
||||
lpf_funcs(16, 16, sse2);
|
||||
lpf_funcs(16, 16, ssse3);
|
||||
lpf_funcs(16, 16, avx);
|
||||
|
||||
#undef lpf_funcs
|
||||
|
||||
#endif /* HAVE_YASM */
|
||||
|
||||
@ -283,6 +290,10 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
|
||||
init_fpel(2, 1, 16, avg, sse2);
|
||||
init_fpel(1, 1, 32, avg, sse2);
|
||||
init_fpel(0, 1, 64, avg, sse2);
|
||||
if (ARCH_X86_64) {
|
||||
dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_sse2;
|
||||
dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_sse2;
|
||||
}
|
||||
}
|
||||
|
||||
if (EXTERNAL_SSSE3(cpu_flags)) {
|
||||
|
@ -327,11 +327,11 @@ SECTION .text
|
||||
%endif
|
||||
|
||||
; calc fm mask
|
||||
%if cpuflag(ssse3)
|
||||
pxor m0, m0
|
||||
movd m2, Id
|
||||
movd m3, Ed
|
||||
pshufb m2, m0 ; I I I I ...
|
||||
pshufb m3, m0 ; E E E E ...
|
||||
%endif
|
||||
SPLATB_REG m2, I, m0 ; I I I I ...
|
||||
SPLATB_REG m3, E, m0 ; E E E E ...
|
||||
mova m0, [pb_80]
|
||||
pxor m2, m0
|
||||
pxor m3, m0
|
||||
@ -383,9 +383,10 @@ SECTION .text
|
||||
ABSSUB_CMP m1, m9, m11, m6, m4, m5, m8 ; abs(p2 - p0) <= 1
|
||||
pand m2, m1
|
||||
ABSSUB m4, m10, m11, m5 ; abs(p1 - p0)
|
||||
%if cpuflag(ssse3)
|
||||
pxor m0, m0
|
||||
movd m7, Hd
|
||||
pshufb m7, m0 ; H H H H ...
|
||||
%endif
|
||||
SPLATB_REG m7, H, m0 ; H H H H ...
|
||||
pxor m7, m8
|
||||
pxor m4, m8
|
||||
pcmpgtb m0, m4, m7 ; abs(p1 - p0) > H (1/2 hev condition)
|
||||
@ -595,6 +596,7 @@ cglobal vp9_loop_filter_h_16_16, 5,10,16, 256, dst, stride, E, I, H, mstride, ds
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
LPF_16_16_VH sse2
|
||||
LPF_16_16_VH ssse3
|
||||
LPF_16_16_VH avx
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user