mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
libswscale/x86/yuv2yuvX: Removes unrolling for mmx and mmxext
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
This commit is contained in:
parent
dc57762cb4
commit
3ce8d09244
@ -37,8 +37,10 @@ SECTION .text
|
|||||||
cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
|
cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
|
||||||
%if notcpuflag(sse3)
|
%if notcpuflag(sse3)
|
||||||
%define movr mova
|
%define movr mova
|
||||||
|
%define unroll 1
|
||||||
%else
|
%else
|
||||||
%define movr movdqu
|
%define movr movdqu
|
||||||
|
%define unroll 2
|
||||||
%endif
|
%endif
|
||||||
movsxdifnidn dstWq, dstWd
|
movsxdifnidn dstWq, dstWd
|
||||||
movsxdifnidn offsetq, offsetd
|
movsxdifnidn offsetq, offsetd
|
||||||
@ -70,8 +72,10 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
|
|||||||
.outerloop:
|
.outerloop:
|
||||||
mova m4, m7
|
mova m4, m7
|
||||||
mova m3, m7
|
mova m3, m7
|
||||||
|
%if cpuflag(sse3)
|
||||||
mova m6, m7
|
mova m6, m7
|
||||||
mova m1, m7
|
mova m1, m7
|
||||||
|
%endif
|
||||||
.loop:
|
.loop:
|
||||||
%if cpuflag(avx2)
|
%if cpuflag(avx2)
|
||||||
vpbroadcastq m0, [filterSizeq + 8]
|
vpbroadcastq m0, [filterSizeq + 8]
|
||||||
@ -84,28 +88,36 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
|
|||||||
pmulhw m5, m0, [srcq + offsetq * 2 + mmsize]
|
pmulhw m5, m0, [srcq + offsetq * 2 + mmsize]
|
||||||
paddw m3, m3, m2
|
paddw m3, m3, m2
|
||||||
paddw m4, m4, m5
|
paddw m4, m4, m5
|
||||||
|
%if cpuflag(sse3)
|
||||||
pmulhw m2, m0, [srcq + offsetq * 2 + 2 * mmsize]
|
pmulhw m2, m0, [srcq + offsetq * 2 + 2 * mmsize]
|
||||||
pmulhw m5, m0, [srcq + offsetq * 2 + 3 * mmsize]
|
pmulhw m5, m0, [srcq + offsetq * 2 + 3 * mmsize]
|
||||||
paddw m6, m6, m2
|
paddw m6, m6, m2
|
||||||
paddw m1, m1, m5
|
paddw m1, m1, m5
|
||||||
|
%endif
|
||||||
add filterSizeq, $10
|
add filterSizeq, $10
|
||||||
mov srcq, [filterSizeq]
|
mov srcq, [filterSizeq]
|
||||||
test srcq, srcq
|
test srcq, srcq
|
||||||
jnz .loop
|
jnz .loop
|
||||||
psraw m3, m3, 3
|
psraw m3, m3, 3
|
||||||
psraw m4, m4, 3
|
psraw m4, m4, 3
|
||||||
|
%if cpuflag(sse3)
|
||||||
psraw m6, m6, 3
|
psraw m6, m6, 3
|
||||||
psraw m1, m1, 3
|
psraw m1, m1, 3
|
||||||
|
%endif
|
||||||
packuswb m3, m3, m4
|
packuswb m3, m3, m4
|
||||||
|
%if cpuflag(sse3)
|
||||||
packuswb m6, m6, m1
|
packuswb m6, m6, m1
|
||||||
|
%endif
|
||||||
mov srcq, [filterq]
|
mov srcq, [filterq]
|
||||||
%if cpuflag(avx2)
|
%if cpuflag(avx2)
|
||||||
vpermq m3, m3, 216
|
vpermq m3, m3, 216
|
||||||
vpermq m6, m6, 216
|
vpermq m6, m6, 216
|
||||||
%endif
|
%endif
|
||||||
movr [destq + offsetq], m3
|
movr [destq + offsetq], m3
|
||||||
|
%if cpuflag(sse3)
|
||||||
movr [destq + offsetq + mmsize], m6
|
movr [destq + offsetq + mmsize], m6
|
||||||
add offsetq, mmsize * 2
|
%endif
|
||||||
|
add offsetq, mmsize * unroll
|
||||||
mov filterSizeq, filterq
|
mov filterSizeq, filterq
|
||||||
cmp offsetq, dstWq
|
cmp offsetq, dstWq
|
||||||
jb .outerloop
|
jb .outerloop
|
||||||
|
Loading…
Reference in New Issue
Block a user