1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-12-23 12:43:46 +02:00

x86/lpc: fix even scalar loop overreads/writes

Passes checkasm with valgrind, tested to sizes of more than 4000 samples.
This commit is contained in:
Lynne 2022-09-22 03:41:02 +02:00
parent dea944b838
commit b67776e12f
No known key found for this signature in database
GPG Key ID: A2FEA5F03F034464
2 changed files with 23 additions and 13 deletions

View File

@ -38,6 +38,8 @@ SECTION .text
cglobal lpc_apply_welch_window, 3, 5, 8, data, len, out, off1, off2 cglobal lpc_apply_welch_window, 3, 5, 8, data, len, out, off1, off2
cmp lenq, 0 cmp lenq, 0
je .end je .end
cmp lenq, 2
je .two
cmp lenq, 1 cmp lenq, 1
je .one je .one
@ -192,14 +194,13 @@ cglobal lpc_apply_welch_window, 3, 5, 8, data, len, out, off1, off2
jge .loop_e jge .loop_e
.scalar_e: .scalar_e:
subpd m0, m7 subpd xm0, xm7
movapd m7, [dec_tab_scalar] movapd xm7, [dec_tab_scalar]
subpd m0, m7 subpd xm0, xm7
subpd m0, m7
subpd m0, m7
add off1q, (mmsize/2) add off1q, (mmsize/2)
sub off2q, (mmsize/2) - 4 - 8*cpuflag(avx2) sub off2q, (mmsize/2) - 8*cpuflag(avx2)
add lenq, 6 + 4*cpuflag(avx2)
addpd xm0, [sub_tab] addpd xm0, [sub_tab]
@ -208,22 +209,27 @@ cglobal lpc_apply_welch_window, 3, 5, 8, data, len, out, off1, off2
mulpd xm2, xm0, xm0 mulpd xm2, xm0, xm0
subpd xm1, xm2 subpd xm1, xm2
cvtdq2pd m3, [dataq + off1q - 4] cvtdq2pd xm3, [dataq + off1q]
cvtdq2pd m4, [dataq + off2q - 4] cvtdq2pd xm4, [dataq + off2q]
mulpd m3, m1 mulpd xm3, xm1
mulpd m4, m1 shufpd xm1, xm1, 00b
mulpd xm4, xm1
movhpd [outq + off1q*2], xm3 movlpd [outq + off1q*2], xm3
movhpd [outq + off2q*2], xm4 movhpd [outq + off2q*2 + 8], xm4
subpd xm0, xm7 subpd xm0, xm7
add off2q, 4 add off2q, 4
sub off1q, 4 sub off1q, 4
jge .loop_e_scalar sub lenq, 2
jg .loop_e_scalar
RET RET
.two:
xorpd xm0, xm0
movhpd [outq + 8], xm0
.one: .one:
xorpd xm0, xm0 xorpd xm0, xm0
movhpd [outq], xm0 movhpd [outq], xm0

View File

@ -68,6 +68,10 @@ void checkasm_check_lpc(void)
} }
report("apply_welch_window_odd"); report("apply_welch_window_odd");
if (check_func(ctx.lpc_apply_welch_window, "apply_welch_window_2560"))
test_window(2560);
report("apply_welch_window_2560");
if (check_func(ctx.lpc_apply_welch_window, "apply_welch_window_4096")) if (check_func(ctx.lpc_apply_welch_window, "apply_welch_window_4096"))
test_window(4096); test_window(4096);
report("apply_welch_window_4096"); report("apply_welch_window_4096");