mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
x86/swr: add ff_resample_{common, linear}_float_fma
Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
a441a2437b
commit
1a69224f44
@ -179,17 +179,16 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_shift, dst, frac, \
|
||||
pmaddwd m1, [filterq+min_filter_count_x4q*1]
|
||||
paddd m0, m1
|
||||
%else ; float/double
|
||||
%if cpuflag(fma4) || cpuflag(fma3)
|
||||
fmaddp%4 m0, m1, [filterq+min_filter_count_x4q*1], m0
|
||||
%else
|
||||
mulp%4 m1, m1, [filterq+min_filter_count_x4q*1]
|
||||
addp%4 m0, m0, m1
|
||||
%endif ; cpuflag
|
||||
%endif
|
||||
add min_filter_count_x4q, mmsize
|
||||
js .inner_loop
|
||||
|
||||
%if cpuflag(avx)
|
||||
vextractf128 xm1, m0, 0x1
|
||||
addps xm0, xm1
|
||||
%endif
|
||||
|
||||
%ifidn %1, int16
|
||||
%if mmsize == 16
|
||||
pshufd m1, m0, q0032
|
||||
@ -206,6 +205,10 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_shift, dst, frac, \
|
||||
movd [dstq], m0
|
||||
%else ; float/double
|
||||
; horizontal sum & store
|
||||
%if mmsize == 32
|
||||
vextractf128 xm1, m0, 0x1
|
||||
addps xm0, xm1
|
||||
%endif
|
||||
movhlps xm1, xm0
|
||||
%ifidn %1, float
|
||||
addps xm0, xm1
|
||||
@ -429,21 +432,19 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
|
||||
paddd m2, m3
|
||||
paddd m0, m1
|
||||
%else ; float/double
|
||||
%if cpuflag(fma4) || cpuflag(fma3)
|
||||
fmaddp%4 m2, m1, [filter2q+min_filter_count_x4q*1], m2
|
||||
fmaddp%4 m0, m1, [filter1q+min_filter_count_x4q*1], m0
|
||||
%else
|
||||
mulp%4 m3, m1, [filter2q+min_filter_count_x4q*1]
|
||||
mulp%4 m1, m1, [filter1q+min_filter_count_x4q*1]
|
||||
addp%4 m2, m2, m3
|
||||
addp%4 m0, m0, m1
|
||||
%endif ; cpuflag
|
||||
%endif
|
||||
add min_filter_count_x4q, mmsize
|
||||
js .inner_loop
|
||||
|
||||
%if cpuflag(avx)
|
||||
vextractf128 xm1, m0, 0x1
|
||||
vextractf128 xm3, m2, 0x1
|
||||
addps xm0, xm1
|
||||
addps xm2, xm3
|
||||
%endif
|
||||
|
||||
%ifidn %1, int16
|
||||
%if mmsize == 16
|
||||
pshufd m3, m2, q0032
|
||||
@ -479,12 +480,22 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
|
||||
; - unix64: eax=r6[filter1], edx=r2[todo]
|
||||
%else ; float/double
|
||||
; val += (v2 - val) * (FELEML) frac / c->src_incr;
|
||||
%if mmsize == 32
|
||||
vextractf128 xm1, m0, 0x1
|
||||
vextractf128 xm3, m2, 0x1
|
||||
addps xm0, xm1
|
||||
addps xm2, xm3
|
||||
%endif
|
||||
cvtsi2s%4 xm1, fracd
|
||||
subp%4 xm2, xm0
|
||||
mulp%4 xm1, xm4
|
||||
shufp%4 xm1, xm1, q0000
|
||||
%if cpuflag(fma4) || cpuflag(fma3)
|
||||
fmaddp%4 xm0, xm2, xm1, xm0
|
||||
%else
|
||||
mulp%4 xm2, xm1
|
||||
addp%4 xm0, xm2
|
||||
%endif ; cpuflag
|
||||
|
||||
; horizontal sum & store
|
||||
movhlps xm1, xm0
|
||||
@ -564,6 +575,14 @@ RESAMPLE_FNS float, 4, 2, s, pf_1
|
||||
INIT_YMM avx
|
||||
RESAMPLE_FNS float, 4, 2, s, pf_1
|
||||
%endif
|
||||
%if HAVE_FMA3_EXTERNAL
|
||||
INIT_YMM fma3
|
||||
RESAMPLE_FNS float, 4, 2, s, pf_1
|
||||
%endif
|
||||
%if HAVE_FMA4_EXTERNAL
|
||||
INIT_XMM fma4
|
||||
RESAMPLE_FNS float, 4, 2, s, pf_1
|
||||
%endif
|
||||
|
||||
%if ARCH_X86_32
|
||||
INIT_MMX mmxext
|
||||
|
@ -27,30 +27,19 @@
|
||||
|
||||
#include "libswresample/resample.h"
|
||||
|
||||
int ff_resample_common_int16_mmxext(ResampleContext *c, uint8_t *dst,
|
||||
const uint8_t *src, int sz, int upd);
|
||||
int ff_resample_linear_int16_mmxext(ResampleContext *c, uint8_t *dst,
|
||||
const uint8_t *src, int sz, int upd);
|
||||
#define RESAMPLE_FUNCS(type, opt) \
|
||||
int ff_resample_common_##type##_##opt(ResampleContext *c, uint8_t *dst, \
|
||||
const uint8_t *src, int sz, int upd); \
|
||||
int ff_resample_linear_##type##_##opt(ResampleContext *c, uint8_t *dst, \
|
||||
const uint8_t *src, int sz, int upd)
|
||||
|
||||
int ff_resample_common_int16_sse2(ResampleContext *c, uint8_t *dst,
|
||||
const uint8_t *src, int sz, int upd);
|
||||
int ff_resample_linear_int16_sse2(ResampleContext *c, uint8_t *dst,
|
||||
const uint8_t *src, int sz, int upd);
|
||||
|
||||
int ff_resample_common_float_sse(ResampleContext *c, uint8_t *dst,
|
||||
const uint8_t *src, int sz, int upd);
|
||||
int ff_resample_linear_float_sse(ResampleContext *c, uint8_t *dst,
|
||||
const uint8_t *src, int sz, int upd);
|
||||
|
||||
int ff_resample_common_float_avx(ResampleContext *c, uint8_t *dst,
|
||||
const uint8_t *src, int sz, int upd);
|
||||
int ff_resample_linear_float_avx(ResampleContext *c, uint8_t *dst,
|
||||
const uint8_t *src, int sz, int upd);
|
||||
|
||||
int ff_resample_common_double_sse2(ResampleContext *c, uint8_t *dst,
|
||||
const uint8_t *src, int sz, int upd);
|
||||
int ff_resample_linear_double_sse2(ResampleContext *c, uint8_t *dst,
|
||||
const uint8_t *src, int sz, int upd);
|
||||
RESAMPLE_FUNCS(int16, mmxext);
|
||||
RESAMPLE_FUNCS(int16, sse2);
|
||||
RESAMPLE_FUNCS(float, sse);
|
||||
RESAMPLE_FUNCS(float, avx);
|
||||
RESAMPLE_FUNCS(float, fma3);
|
||||
RESAMPLE_FUNCS(float, fma4);
|
||||
RESAMPLE_FUNCS(double, sse2);
|
||||
|
||||
void swresample_dsp_x86_init(ResampleContext *c)
|
||||
{
|
||||
@ -76,4 +65,12 @@ void swresample_dsp_x86_init(ResampleContext *c)
|
||||
c->dsp.resample_common[FNIDX(FLTP)] = ff_resample_common_float_avx;
|
||||
c->dsp.resample_linear[FNIDX(FLTP)] = ff_resample_linear_float_avx;
|
||||
}
|
||||
if (HAVE_FMA3_EXTERNAL && mm_flags & AV_CPU_FLAG_FMA3) {
|
||||
c->dsp.resample_common[FNIDX(FLTP)] = ff_resample_common_float_fma3;
|
||||
c->dsp.resample_linear[FNIDX(FLTP)] = ff_resample_linear_float_fma3;
|
||||
}
|
||||
if (HAVE_FMA4_EXTERNAL && mm_flags & AV_CPU_FLAG_FMA4) {
|
||||
c->dsp.resample_common[FNIDX(FLTP)] = ff_resample_common_float_fma4;
|
||||
c->dsp.resample_linear[FNIDX(FLTP)] = ff_resample_linear_float_fma4;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user