1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-13 21:28:01 +02:00

x86/ac3dsp: reduce instruction count inside the float_to_fixed24 loop

Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
James Almer 2023-11-22 16:04:02 -03:00
parent 2d9ed64859
commit d8b1a34433

View File

@ -77,16 +77,20 @@ AC3_EXPONENT_MIN
INIT_XMM sse2
cglobal float_to_fixed24, 3, 3, 9, dst, src, len
movaps m0, [pf_1_24]
shl lenq, 2
add srcq, lenq
add dstq, lenq
neg lenq
.loop:
movaps m1, [srcq ]
movaps m2, [srcq+16 ]
movaps m3, [srcq+32 ]
movaps m4, [srcq+48 ]
movaps m1, [srcq+lenq ]
movaps m2, [srcq+lenq+16 ]
movaps m3, [srcq+lenq+32 ]
movaps m4, [srcq+lenq+48 ]
%ifdef m8
movaps m5, [srcq+64 ]
movaps m6, [srcq+80 ]
movaps m7, [srcq+96 ]
movaps m8, [srcq+112]
movaps m5, [srcq+lenq+64 ]
movaps m6, [srcq+lenq+80 ]
movaps m7, [srcq+lenq+96 ]
movaps m8, [srcq+lenq+112]
%endif
mulps m1, m0
mulps m2, m0
@ -108,24 +112,20 @@ cglobal float_to_fixed24, 3, 3, 9, dst, src, len
cvtps2dq m7, m7
cvtps2dq m8, m8
%endif
movdqa [dstq ], m1
movdqa [dstq+16 ], m2
movdqa [dstq+32 ], m3
movdqa [dstq+48 ], m4
movdqa [dstq+lenq ], m1
movdqa [dstq+lenq+16 ], m2
movdqa [dstq+lenq+32 ], m3
movdqa [dstq+lenq+48 ], m4
%ifdef m8
movdqa [dstq+64 ], m5
movdqa [dstq+80 ], m6
movdqa [dstq+96 ], m7
movdqa [dstq+112], m8
add srcq, 128
add dstq, 128
sub lenq, 32
movdqa [dstq+lenq+64 ], m5
movdqa [dstq+lenq+80 ], m6
movdqa [dstq+lenq+96 ], m7
movdqa [dstq+lenq+112], m8
add lenq, 128
%else
add srcq, 64
add dstq, 64
sub lenq, 16
add lenq, 64
%endif
ja .loop
jl .loop
RET
;------------------------------------------------------------------------------