1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-13 21:28:01 +02:00

x86/ac3dsp: reduce instruction count inside the float_to_fixed24 loop

Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
James Almer 2023-11-22 16:04:02 -03:00
parent 2d9ed64859
commit d8b1a34433

View File

@ -77,16 +77,20 @@ AC3_EXPONENT_MIN
INIT_XMM sse2 INIT_XMM sse2
cglobal float_to_fixed24, 3, 3, 9, dst, src, len cglobal float_to_fixed24, 3, 3, 9, dst, src, len
movaps m0, [pf_1_24] movaps m0, [pf_1_24]
shl lenq, 2
add srcq, lenq
add dstq, lenq
neg lenq
.loop: .loop:
movaps m1, [srcq ] movaps m1, [srcq+lenq ]
movaps m2, [srcq+16 ] movaps m2, [srcq+lenq+16 ]
movaps m3, [srcq+32 ] movaps m3, [srcq+lenq+32 ]
movaps m4, [srcq+48 ] movaps m4, [srcq+lenq+48 ]
%ifdef m8 %ifdef m8
movaps m5, [srcq+64 ] movaps m5, [srcq+lenq+64 ]
movaps m6, [srcq+80 ] movaps m6, [srcq+lenq+80 ]
movaps m7, [srcq+96 ] movaps m7, [srcq+lenq+96 ]
movaps m8, [srcq+112] movaps m8, [srcq+lenq+112]
%endif %endif
mulps m1, m0 mulps m1, m0
mulps m2, m0 mulps m2, m0
@ -108,24 +112,20 @@ cglobal float_to_fixed24, 3, 3, 9, dst, src, len
cvtps2dq m7, m7 cvtps2dq m7, m7
cvtps2dq m8, m8 cvtps2dq m8, m8
%endif %endif
movdqa [dstq ], m1 movdqa [dstq+lenq ], m1
movdqa [dstq+16 ], m2 movdqa [dstq+lenq+16 ], m2
movdqa [dstq+32 ], m3 movdqa [dstq+lenq+32 ], m3
movdqa [dstq+48 ], m4 movdqa [dstq+lenq+48 ], m4
%ifdef m8 %ifdef m8
movdqa [dstq+64 ], m5 movdqa [dstq+lenq+64 ], m5
movdqa [dstq+80 ], m6 movdqa [dstq+lenq+80 ], m6
movdqa [dstq+96 ], m7 movdqa [dstq+lenq+96 ], m7
movdqa [dstq+112], m8 movdqa [dstq+lenq+112], m8
add srcq, 128 add lenq, 128
add dstq, 128
sub lenq, 32
%else %else
add srcq, 64 add lenq, 64
add dstq, 64
sub lenq, 16
%endif %endif
ja .loop jl .loop
RET RET
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------