mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-03-03 14:32:16 +02:00
x86/sbrdsp: zero extend m_max in apply_noise_main
Tested-by: Michael Niedermayer <michael@niedermayer.cc> Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
parent
440285474b
commit
bcbe9e4447
@ -378,24 +378,24 @@ cglobal sbr_hf_apply_noise_3, 5,5+NREGS+UNIX64,8, Y,s_m,q_filt,noise,kx,m_max
|
||||
apply_noise_main:
|
||||
%if ARCH_X86_64 == 0 || WIN64
|
||||
mov kxd, m_maxm
|
||||
%define count kxq
|
||||
DEFINE_ARGS Y, s_m, q_filt, noise, count
|
||||
%else
|
||||
%define count m_maxq
|
||||
DEFINE_ARGS Y, s_m, q_filt, noise, kx, count
|
||||
%endif
|
||||
movsxdifnidn noiseq, noised
|
||||
dec noiseq
|
||||
shl count, 2
|
||||
shl countd, 2
|
||||
%ifdef PIC
|
||||
lea NOISE_TABLE, [sbr_noise_table]
|
||||
%endif
|
||||
lea Yq, [Yq + 2*count]
|
||||
add s_mq, count
|
||||
add q_filtq, count
|
||||
lea Yq, [Yq + 2*countq]
|
||||
add s_mq, countq
|
||||
add q_filtq, countq
|
||||
shl noiseq, 3
|
||||
pxor m5, m5
|
||||
neg count
|
||||
neg countq
|
||||
.loop:
|
||||
mova m1, [q_filtq + count]
|
||||
mova m1, [q_filtq + countq]
|
||||
movu m3, [noiseq + NOISE_TABLE + 1*mmsize]
|
||||
movu m4, [noiseq + NOISE_TABLE + 2*mmsize]
|
||||
add noiseq, 2*mmsize
|
||||
@ -404,7 +404,7 @@ apply_noise_main:
|
||||
punpckldq m1, m1
|
||||
mulps m1, m3 ; m2 = q_filt[m] * ff_sbr_noise_table[noise]
|
||||
mulps m2, m4 ; m2 = q_filt[m] * ff_sbr_noise_table[noise]
|
||||
mova m3, [s_mq + count]
|
||||
mova m3, [s_mq + countq]
|
||||
; TODO: replace by a vpermd in AVX2
|
||||
punpckhdq m4, m3, m3
|
||||
punpckldq m3, m3
|
||||
@ -414,15 +414,15 @@ apply_noise_main:
|
||||
mulps m4, m0 ; s_m[m] * phi_sign
|
||||
pand m1, m6
|
||||
pand m2, m7
|
||||
movu m6, [Yq + 2*count]
|
||||
movu m7, [Yq + 2*count + mmsize]
|
||||
movu m6, [Yq + 2*countq]
|
||||
movu m7, [Yq + 2*countq + mmsize]
|
||||
addps m3, m1
|
||||
addps m4, m2
|
||||
addps m6, m3
|
||||
addps m7, m4
|
||||
movu [Yq + 2*count], m6
|
||||
movu [Yq + 2*count + mmsize], m7
|
||||
add count, mmsize
|
||||
movu [Yq + 2*countq], m6
|
||||
movu [Yq + 2*countq + mmsize], m7
|
||||
add countq, mmsize
|
||||
jl .loop
|
||||
RET
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user