You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-15 14:13:16 +02:00
x86/sbrdsp: sign extend start and end gprs in ff_sbr_hf_gen_sse
Tested-by: Michael Niedermayer <michael@niedermayer.cc> Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
@@ -149,19 +149,19 @@ cglobal sbr_hf_gen, 4,4,8, X_high, X_low, alpha0, alpha1, BW, S, E
|
|||||||
; start and end 6th and 7th args on stack
|
; start and end 6th and 7th args on stack
|
||||||
mov r2d, Sm
|
mov r2d, Sm
|
||||||
mov r3d, Em
|
mov r3d, Em
|
||||||
%define start r2q
|
DEFINE_ARGS X_high, X_low, start, end
|
||||||
%define end r3q
|
|
||||||
%else
|
%else
|
||||||
; BW does not actually occupy a register, so shift by 1
|
; BW does not actually occupy a register, so shift by 1
|
||||||
%define start BWq
|
DEFINE_ARGS X_high, X_low, alpha0, alpha1, start, end
|
||||||
%define end Sq
|
movsxd startq, startd
|
||||||
|
movsxd endq, endd
|
||||||
%endif
|
%endif
|
||||||
sub start, end ; neg num of loops
|
sub startq, endq ; neg num of loops
|
||||||
lea X_highq, [X_highq + end*2*4]
|
lea X_highq, [X_highq + endq*2*4]
|
||||||
lea X_lowq, [X_lowq + end*2*4 - 2*2*4]
|
lea X_lowq, [X_lowq + endq*2*4 - 2*2*4]
|
||||||
shl start, 3 ; offset from num loops
|
shl startq, 3 ; offset from num loops
|
||||||
|
|
||||||
mova m0, [X_lowq + start]
|
mova m0, [X_lowq + startq]
|
||||||
shufps m3, m3, q1111
|
shufps m3, m3, q1111
|
||||||
shufps m4, m4, q1111
|
shufps m4, m4, q1111
|
||||||
xorps m3, [ps_mask]
|
xorps m3, [ps_mask]
|
||||||
@@ -169,7 +169,7 @@ cglobal sbr_hf_gen, 4,4,8, X_high, X_low, alpha0, alpha1, BW, S, E
|
|||||||
shufps m2, m2, q0000
|
shufps m2, m2, q0000
|
||||||
xorps m4, [ps_mask]
|
xorps m4, [ps_mask]
|
||||||
.loop2:
|
.loop2:
|
||||||
movu m7, [X_lowq + start + 8] ; BbCc
|
movu m7, [X_lowq + startq + 8] ; BbCc
|
||||||
mova m6, m0
|
mova m6, m0
|
||||||
mova m5, m7
|
mova m5, m7
|
||||||
shufps m0, m0, q2301 ; aAbB
|
shufps m0, m0, q2301 ; aAbB
|
||||||
@@ -179,12 +179,12 @@ cglobal sbr_hf_gen, 4,4,8, X_high, X_low, alpha0, alpha1, BW, S, E
|
|||||||
mulps m6, m2
|
mulps m6, m2
|
||||||
mulps m5, m1
|
mulps m5, m1
|
||||||
addps m7, m0
|
addps m7, m0
|
||||||
mova m0, [X_lowq + start +16] ; CcDd
|
mova m0, [X_lowq + startq + 16] ; CcDd
|
||||||
addps m7, m0
|
addps m7, m0
|
||||||
addps m6, m5
|
addps m6, m5
|
||||||
addps m7, m6
|
addps m7, m6
|
||||||
mova [X_highq + start], m7
|
mova [X_highq + startq], m7
|
||||||
add start, 16
|
add startq, 16
|
||||||
jnz .loop2
|
jnz .loop2
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user