mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-28 20:53:54 +02:00
x86/aacps: add ff_ps_stereo_interpolate_ipdopd_sse3()
About 2x faster than the c version. Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
parent
3385989b98
commit
b5a0971ff0
@ -117,6 +117,57 @@ align 16
|
||||
.ret:
|
||||
REP_RET
|
||||
|
||||
;***************************************************************************
|
||||
;void ps_stereo_interpolate_ipdopd_sse3(float (*l)[2], float (*r)[2],
|
||||
; float h[2][4], float h_step[2][4],
|
||||
; int len);
|
||||
;***************************************************************************
|
||||
INIT_XMM sse3
|
||||
cglobal ps_stereo_interpolate_ipdopd, 5, 5, 10, l, r, h, h_step, n
|
||||
cmp nd, 0
|
||||
jle .ret
|
||||
movaps m0, [hq]
|
||||
movaps m1, [hq+mmsize]
|
||||
%if ARCH_X86_64
|
||||
movaps m8, [h_stepq]
|
||||
movaps m9, [h_stepq+mmsize]
|
||||
%define H_STEP0 m8
|
||||
%define H_STEP1 m9
|
||||
%else
|
||||
%define H_STEP0 [h_stepq]
|
||||
%define H_STEP1 [h_stepq+mmsize]
|
||||
%endif
|
||||
shl nd, 3
|
||||
add lq, nq
|
||||
add rq, nq
|
||||
neg nq
|
||||
|
||||
align 16
|
||||
.loop:
|
||||
addps m0, H_STEP0
|
||||
addps m1, H_STEP1
|
||||
movddup m2, [lq+nq]
|
||||
movddup m3, [rq+nq]
|
||||
shufps m4, m2, m2, q2301
|
||||
shufps m5, m3, m3, q2301
|
||||
unpcklps m6, m0, m0
|
||||
unpckhps m7, m0, m0
|
||||
mulps m2, m6
|
||||
mulps m3, m7
|
||||
unpcklps m6, m1, m1
|
||||
unpckhps m7, m1, m1
|
||||
mulps m4, m6
|
||||
mulps m5, m7
|
||||
addps m2, m3
|
||||
addsubps m4, m5
|
||||
addsubps m2, m4
|
||||
movsd [lq+nq], m2
|
||||
movhps [rq+nq], m2
|
||||
add nq, 8
|
||||
jl .loop
|
||||
.ret:
|
||||
REP_RET
|
||||
|
||||
;*******************************************************************
|
||||
;void ff_ps_hybrid_analysis_<opt>(float (*out)[2], float (*in)[2],
|
||||
; const float (*filter)[8][2],
|
||||
|
@ -37,6 +37,9 @@ void ff_ps_hybrid_analysis_sse3(float (*out)[2], float (*in)[2],
|
||||
void ff_ps_stereo_interpolate_sse3(float (*l)[2], float (*r)[2],
|
||||
float h[2][4], float h_step[2][4],
|
||||
int len);
|
||||
void ff_ps_stereo_interpolate_ipdopd_sse3(float (*l)[2], float (*r)[2],
|
||||
float h[2][4], float h_step[2][4],
|
||||
int len);
|
||||
|
||||
av_cold void ff_psdsp_init_x86(PSDSPContext *s)
|
||||
{
|
||||
@ -50,6 +53,7 @@ av_cold void ff_psdsp_init_x86(PSDSPContext *s)
|
||||
if (EXTERNAL_SSE3(cpu_flags)) {
|
||||
s->add_squares = ff_ps_add_squares_sse3;
|
||||
s->stereo_interpolate[0] = ff_ps_stereo_interpolate_sse3;
|
||||
s->stereo_interpolate[1] = ff_ps_stereo_interpolate_ipdopd_sse3;
|
||||
s->hybrid_analysis = ff_ps_hybrid_analysis_sse3;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user