mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
x86/dsputil: fix VECTOR_CLIP_INT32 macro
The inline loop was incrementing and using the value of %%i the wrong way. Disassembly of ff_vector_clip_int32_sse2 before and after this patch: movdqa (%rdx),%xmm0 | movdqa (%rdx),%xmm0 movdqa 0x10(%rdx),%xmm1 | movdqa 0x10(%rdx),%xmm1 movdqa 0x20(%rdx),%xmm2 | movdqa 0x20(%rdx),%xmm2 movdqa 0x30(%rdx),%xmm3 | movdqa 0x30(%rdx),%xmm3 [...] | movdqa %xmm0,(%rcx) | movdqa %xmm0,(%rcx) movdqa %xmm1,0x10(%rcx) | movdqa %xmm1,0x10(%rcx) movdqa %xmm2,0x20(%rcx) | movdqa %xmm2,0x20(%rcx) movdqa %xmm3,0x30(%rcx) | movdqa %xmm3,0x30(%rcx) movdqa (%rdx),%xmm0 | movdqa 0x40(%rdx),%xmm0 movdqa 0x20(%rdx),%xmm1 | movdqa 0x50(%rdx),%xmm1 movdqa 0x40(%rdx),%xmm2 | movdqa 0x60(%rdx),%xmm2 movdqa 0x60(%rdx),%xmm3 | movdqa 0x70(%rdx),%xmm3 [...] | movdqa %xmm0,(%rcx) | movdqa %xmm0,0x40(%rcx) movdqa %xmm1,0x20(%rcx) | movdqa %xmm1,0x50(%rcx) movdqa %xmm2,0x40(%rcx) | movdqa %xmm2,0x60(%rcx) movdqa %xmm3,0x60(%rcx) | movdqa %xmm3,0x70(%rcx) add $0x80,%rdx | add $0x80,%rdx add $0x80,%rcx | add $0x80,%rcx Other versions were unaffected. Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
6b88f22e89
commit
518cbf9b4a
@ -351,17 +351,17 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
|
||||
SPLATD m4
|
||||
SPLATD m5
|
||||
.loop:
|
||||
%assign %%i 1
|
||||
%assign %%i 0
|
||||
%rep %2
|
||||
mova m0, [srcq+mmsize*0*%%i]
|
||||
mova m1, [srcq+mmsize*1*%%i]
|
||||
mova m2, [srcq+mmsize*2*%%i]
|
||||
mova m3, [srcq+mmsize*3*%%i]
|
||||
mova m0, [srcq+mmsize*(0+%%i)]
|
||||
mova m1, [srcq+mmsize*(1+%%i)]
|
||||
mova m2, [srcq+mmsize*(2+%%i)]
|
||||
mova m3, [srcq+mmsize*(3+%%i)]
|
||||
%if %3
|
||||
mova m7, [srcq+mmsize*4*%%i]
|
||||
mova m8, [srcq+mmsize*5*%%i]
|
||||
mova m9, [srcq+mmsize*6*%%i]
|
||||
mova m10, [srcq+mmsize*7*%%i]
|
||||
mova m7, [srcq+mmsize*(4+%%i)]
|
||||
mova m8, [srcq+mmsize*(5+%%i)]
|
||||
mova m9, [srcq+mmsize*(6+%%i)]
|
||||
mova m10, [srcq+mmsize*(7+%%i)]
|
||||
%endif
|
||||
CLIPD m0, m4, m5, m6
|
||||
CLIPD m1, m4, m5, m6
|
||||
@ -373,17 +373,17 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
|
||||
CLIPD m9, m4, m5, m6
|
||||
CLIPD m10, m4, m5, m6
|
||||
%endif
|
||||
mova [dstq+mmsize*0*%%i], m0
|
||||
mova [dstq+mmsize*1*%%i], m1
|
||||
mova [dstq+mmsize*2*%%i], m2
|
||||
mova [dstq+mmsize*3*%%i], m3
|
||||
mova [dstq+mmsize*(0+%%i)], m0
|
||||
mova [dstq+mmsize*(1+%%i)], m1
|
||||
mova [dstq+mmsize*(2+%%i)], m2
|
||||
mova [dstq+mmsize*(3+%%i)], m3
|
||||
%if %3
|
||||
mova [dstq+mmsize*4*%%i], m7
|
||||
mova [dstq+mmsize*5*%%i], m8
|
||||
mova [dstq+mmsize*6*%%i], m9
|
||||
mova [dstq+mmsize*7*%%i], m10
|
||||
mova [dstq+mmsize*(4+%%i)], m7
|
||||
mova [dstq+mmsize*(5+%%i)], m8
|
||||
mova [dstq+mmsize*(6+%%i)], m9
|
||||
mova [dstq+mmsize*(7+%%i)], m10
|
||||
%endif
|
||||
%assign %%i %%i+1
|
||||
%assign %%i %%i+4*(%3+1)
|
||||
%endrep
|
||||
add srcq, mmsize*4*(%2+%3)
|
||||
add dstq, mmsize*4*(%2+%3)
|
||||
|
Loading…
Reference in New Issue
Block a user