1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-12-23 12:43:46 +02:00

avfilter/x86/af_afir: add FMA3 SIMD

This commit is contained in:
Paul B Mahol 2023-09-10 19:25:20 +02:00
parent 5f810435c2
commit c5effe7d3d
2 changed files with 32 additions and 0 deletions

View File

@ -67,3 +67,30 @@ INIT_XMM sse3
FCMUL_ADD FCMUL_ADD
INIT_YMM avx INIT_YMM avx
FCMUL_ADD FCMUL_ADD
%if HAVE_FMA3_EXTERNAL
INIT_YMM fma3
cglobal fcmul_add, 4,4,4, sum, t, c, len
shl lend, 3
add tq, lenq
add cq, lenq
add sumq, lenq
neg lenq
.loop:
movaps m0, [tq + lenq]
movaps m1, [cq + lenq]
vpermilps m3, m0, 177
vpermilps m2, m1, 160
vpermilps m1, m1, 245
mulps m1, m1, m3
vfmaddsub132ps m0, m1, m2
addps m0, m0, [sumq + lenq]
movaps [sumq + lenq], m0
add lenq, mmsize
jl .loop
movss xm0, [tq + lenq]
mulss xm0, [cq + lenq]
addss xm0, [sumq + lenq]
movss [sumq + lenq], xm0
RET
%endif

View File

@ -26,6 +26,8 @@ void ff_fcmul_add_sse3(float *sum, const float *t, const float *c,
ptrdiff_t len); ptrdiff_t len);
void ff_fcmul_add_avx(float *sum, const float *t, const float *c, void ff_fcmul_add_avx(float *sum, const float *t, const float *c,
ptrdiff_t len); ptrdiff_t len);
void ff_fcmul_add_fma3(float *sum, const float *t, const float *c,
ptrdiff_t len);
av_cold void ff_afir_init_x86(AudioFIRDSPContext *s) av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
{ {
@ -37,4 +39,7 @@ av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
if (EXTERNAL_AVX_FAST(cpu_flags)) { if (EXTERNAL_AVX_FAST(cpu_flags)) {
s->fcmul_add = ff_fcmul_add_avx; s->fcmul_add = ff_fcmul_add_avx;
} }
if (EXTERNAL_FMA3_FAST(cpu_flags)) {
s->fcmul_add = ff_fcmul_add_fma3;
}
} }