mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
sws: allow avx2 hscale to process inputs of any size.
The main loop processes blocks of 16 pixels. The tail processes blocks of size 4. Signed-off-by: Anton Khirnov <anton@khirnov.net>
This commit is contained in:
parent
51a34e8525
commit
a6724285fd
@ -53,6 +53,9 @@ cglobal hscale8to15_%1, 7, 9, 16, pos0, dst, w, srcmem, filter, fltpos, fltsize,
|
|||||||
mova m14, [four]
|
mova m14, [four]
|
||||||
shr fltsized, 2
|
shr fltsized, 2
|
||||||
%endif
|
%endif
|
||||||
|
cmp wq, 0x10
|
||||||
|
jl .tail_loop
|
||||||
|
sub wq, 0x10
|
||||||
.loop:
|
.loop:
|
||||||
movu m1, [fltposq]
|
movu m1, [fltposq]
|
||||||
movu m2, [fltposq+32]
|
movu m2, [fltposq+32]
|
||||||
@ -101,7 +104,46 @@ cglobal hscale8to15_%1, 7, 9, 16, pos0, dst, w, srcmem, filter, fltpos, fltsize,
|
|||||||
add fltposq, 0x40
|
add fltposq, 0x40
|
||||||
add countq, 0x10
|
add countq, 0x10
|
||||||
cmp countq, wq
|
cmp countq, wq
|
||||||
jl .loop
|
jle .loop
|
||||||
|
|
||||||
|
add wq, 0x10
|
||||||
|
cmp countq, wq
|
||||||
|
jge .end
|
||||||
|
|
||||||
|
.tail_loop:
|
||||||
|
movu xm1, [fltposq]
|
||||||
|
%ifidn %1, X4
|
||||||
|
pxor xm9, xm9
|
||||||
|
pxor xm10, xm10
|
||||||
|
xor innerq, innerq
|
||||||
|
.tail_innerloop:
|
||||||
|
%endif
|
||||||
|
vpcmpeqd xm13, xm13
|
||||||
|
vpgatherdd xm3,[srcmemq + xm1], xm13
|
||||||
|
vpunpcklbw xm5, xm3, xm0
|
||||||
|
vpunpckhbw xm6, xm3, xm0
|
||||||
|
vpmaddwd xm5, xm5, [filterq]
|
||||||
|
vpmaddwd xm6, xm6, [filterq + 0x10]
|
||||||
|
add filterq, 0x20
|
||||||
|
%ifidn %1, X4
|
||||||
|
paddd xm9, xm5
|
||||||
|
paddd xm10, xm6
|
||||||
|
paddd xm1, xm14
|
||||||
|
add innerq, 1
|
||||||
|
cmp innerq, fltsizeq
|
||||||
|
jl .tail_innerloop
|
||||||
|
vphaddd xm5, xm9, xm10
|
||||||
|
%else
|
||||||
|
vphaddd xm5, xm5, xm6
|
||||||
|
%endif
|
||||||
|
vpsrad xm5, 7
|
||||||
|
vpackssdw xm5, xm5, xm5
|
||||||
|
vmovq [dstq + countq * 2], xm5
|
||||||
|
add fltposq, 0x10
|
||||||
|
add countq, 0x4
|
||||||
|
cmp countq, wq
|
||||||
|
jl .tail_loop
|
||||||
|
.end:
|
||||||
REP_RET
|
REP_RET
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user