mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-24 13:56:33 +02:00
swresample/x86/resample: extend resample_double to support avx and fma3
benchmark: sse2 10.670s avx 8.763s fma3 8.380s Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
This commit is contained in:
parent
3d5c2169e4
commit
de1308429a
@ -203,7 +203,7 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_count, dst, frac, \
|
|||||||
; horizontal sum & store
|
; horizontal sum & store
|
||||||
%if mmsize == 32
|
%if mmsize == 32
|
||||||
vextractf128 xm1, m0, 0x1
|
vextractf128 xm1, m0, 0x1
|
||||||
addps xm0, xm1
|
addp%4 xm0, xm1
|
||||||
%endif
|
%endif
|
||||||
movhlps xm1, xm0
|
movhlps xm1, xm0
|
||||||
%ifidn %1, float
|
%ifidn %1, float
|
||||||
@ -489,8 +489,8 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
|
|||||||
%if mmsize == 32
|
%if mmsize == 32
|
||||||
vextractf128 xm1, m0, 0x1
|
vextractf128 xm1, m0, 0x1
|
||||||
vextractf128 xm3, m2, 0x1
|
vextractf128 xm3, m2, 0x1
|
||||||
addps xm0, xm1
|
addp%4 xm0, xm1
|
||||||
addps xm2, xm3
|
addp%4 xm2, xm3
|
||||||
%endif
|
%endif
|
||||||
cvtsi2s%4 xm1, fracd
|
cvtsi2s%4 xm1, fracd
|
||||||
subp%4 xm2, xm0
|
subp%4 xm2, xm0
|
||||||
@ -608,3 +608,12 @@ RESAMPLE_FNS int16, 2, 1
|
|||||||
|
|
||||||
INIT_XMM sse2
|
INIT_XMM sse2
|
||||||
RESAMPLE_FNS double, 8, 3, d, pdbl_1
|
RESAMPLE_FNS double, 8, 3, d, pdbl_1
|
||||||
|
|
||||||
|
%if HAVE_AVX_EXTERNAL
|
||||||
|
INIT_YMM avx
|
||||||
|
RESAMPLE_FNS double, 8, 3, d, pdbl_1
|
||||||
|
%endif
|
||||||
|
%if HAVE_FMA3_EXTERNAL
|
||||||
|
INIT_YMM fma3
|
||||||
|
RESAMPLE_FNS double, 8, 3, d, pdbl_1
|
||||||
|
%endif
|
||||||
|
@ -42,6 +42,8 @@ RESAMPLE_FUNCS(float, avx);
|
|||||||
RESAMPLE_FUNCS(float, fma3);
|
RESAMPLE_FUNCS(float, fma3);
|
||||||
RESAMPLE_FUNCS(float, fma4);
|
RESAMPLE_FUNCS(float, fma4);
|
||||||
RESAMPLE_FUNCS(double, sse2);
|
RESAMPLE_FUNCS(double, sse2);
|
||||||
|
RESAMPLE_FUNCS(double, avx);
|
||||||
|
RESAMPLE_FUNCS(double, fma3);
|
||||||
|
|
||||||
av_cold void swri_resample_dsp_x86_init(ResampleContext *c)
|
av_cold void swri_resample_dsp_x86_init(ResampleContext *c)
|
||||||
{
|
{
|
||||||
@ -85,6 +87,14 @@ av_cold void swri_resample_dsp_x86_init(ResampleContext *c)
|
|||||||
c->dsp.resample_linear = ff_resample_linear_double_sse2;
|
c->dsp.resample_linear = ff_resample_linear_double_sse2;
|
||||||
c->dsp.resample_common = ff_resample_common_double_sse2;
|
c->dsp.resample_common = ff_resample_common_double_sse2;
|
||||||
}
|
}
|
||||||
|
if (EXTERNAL_AVX_FAST(mm_flags)) {
|
||||||
|
c->dsp.resample_linear = ff_resample_linear_double_avx;
|
||||||
|
c->dsp.resample_common = ff_resample_common_double_avx;
|
||||||
|
}
|
||||||
|
if (EXTERNAL_FMA3_FAST(mm_flags)) {
|
||||||
|
c->dsp.resample_linear = ff_resample_linear_double_fma3;
|
||||||
|
c->dsp.resample_common = ff_resample_common_double_fma3;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user