mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-21 10:55:51 +02:00
x86/vf_stereo3d: optimize register usage
Reviewed-by: Paul B Mahol <onemda@gmail.com> Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
parent
4020787b5b
commit
6e243d17e9
@ -37,125 +37,133 @@ ex_b: db 2,-1,-1,-1,5,-1,-1,-1,8,-1,-1,-1,11,-1,-1,-1
|
||||
SECTION .text
|
||||
|
||||
INIT_XMM sse4
|
||||
cglobal anaglyph, 11, 13, 16, 2*6*mmsize, dst, lsrc, rsrc, dst_linesize, l_linesize, r_linesize, width, height, ana_matrix_r, ana_matrix_g, ana_matrix_b
|
||||
movu m13, [ana_matrix_rq+ 0]
|
||||
movq m15, [ana_matrix_rq+16]
|
||||
pshufd m10, m13, q0000
|
||||
pshufd m11, m13, q1111
|
||||
pshufd m12, m13, q2222
|
||||
pshufd m13, m13, q3333
|
||||
pshufd m14, m15, q0000
|
||||
pshufd m15, m15, q1111
|
||||
mova [rsp+mmsize*0], m10
|
||||
mova [rsp+mmsize*1], m11
|
||||
mova [rsp+mmsize*2], m12
|
||||
mova [rsp+mmsize*3], m13
|
||||
mova [rsp+mmsize*4], m14
|
||||
mova [rsp+mmsize*5], m15
|
||||
cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc, rsrc, dst_linesize, l_linesize, r_linesize, width, height, o, cnt
|
||||
%define ana_matrix_rq r6q
|
||||
%define ana_matrix_gq r7q
|
||||
%define ana_matrix_bq r8q
|
||||
mov ana_matrix_rq, r8m
|
||||
mov ana_matrix_gq, r9m
|
||||
mov ana_matrix_bq, r10m
|
||||
movu m3, [ana_matrix_rq+ 0]
|
||||
movq m5, [ana_matrix_rq+16]
|
||||
pshufd m0, m3, q0000
|
||||
pshufd m1, m3, q1111
|
||||
pshufd m2, m3, q2222
|
||||
pshufd m3, m3, q3333
|
||||
pshufd m4, m5, q0000
|
||||
pshufd m5, m5, q1111
|
||||
mova [rsp+mmsize*0], m0
|
||||
mova [rsp+mmsize*1], m1
|
||||
mova [rsp+mmsize*2], m2
|
||||
mova [rsp+mmsize*3], m3
|
||||
mova [rsp+mmsize*4], m4
|
||||
mova [rsp+mmsize*5], m5
|
||||
|
||||
movu m13, [ana_matrix_gq+ 0]
|
||||
movq m15, [ana_matrix_gq+16]
|
||||
pshufd m10, m13, q0000
|
||||
pshufd m11, m13, q1111
|
||||
pshufd m12, m13, q2222
|
||||
pshufd m13, m13, q3333
|
||||
pshufd m14, m15, q0000
|
||||
pshufd m15, m15, q1111
|
||||
mova [rsp+mmsize*6 ], m10
|
||||
mova [rsp+mmsize*7 ], m11
|
||||
mova [rsp+mmsize*8 ], m12
|
||||
mova [rsp+mmsize*9 ], m13
|
||||
mova [rsp+mmsize*10], m14
|
||||
mova [rsp+mmsize*11], m15
|
||||
movu m3, [ana_matrix_gq+ 0]
|
||||
movq m5, [ana_matrix_gq+16]
|
||||
pshufd m0, m3, q0000
|
||||
pshufd m1, m3, q1111
|
||||
pshufd m2, m3, q2222
|
||||
pshufd m3, m3, q3333
|
||||
pshufd m4, m5, q0000
|
||||
pshufd m5, m5, q1111
|
||||
mova [rsp+mmsize*6 ], m0
|
||||
mova [rsp+mmsize*7 ], m1
|
||||
mova [rsp+mmsize*8 ], m2
|
||||
mova [rsp+mmsize*9 ], m3
|
||||
mova [rsp+mmsize*10], m4
|
||||
mova [rsp+mmsize*11], m5
|
||||
|
||||
movu m11, [ana_matrix_bq+ 0]
|
||||
movq m13, [ana_matrix_bq+16]
|
||||
pshufd m8, m11, q0000
|
||||
pshufd m9, m11, q1111
|
||||
pshufd m10, m11, q2222
|
||||
pshufd m11, m11, q3333
|
||||
pshufd m12, m13, q0000
|
||||
pshufd m13, m13, q1111
|
||||
mov widthd, dword widthm
|
||||
mov heightd, dword heightm
|
||||
|
||||
movu m13, [ana_matrix_bq+ 0]
|
||||
movq m15, [ana_matrix_bq+16]
|
||||
pshufd m10, m13, q0000
|
||||
pshufd m11, m13, q1111
|
||||
pshufd m12, m13, q2222
|
||||
pshufd m13, m13, q3333
|
||||
pshufd m14, m15, q0000
|
||||
pshufd m15, m15, q1111
|
||||
.nextrow:
|
||||
mov r11q, widthq
|
||||
mov r12q, 0
|
||||
%define o r12q
|
||||
mov od, widthd
|
||||
xor cntd, cntd
|
||||
|
||||
.loop:
|
||||
movu m0, [lsrcq+o+0]
|
||||
movu m0, [lsrcq+cntq]
|
||||
pshufb m1, m0, [ex_r]
|
||||
pshufb m2, m0, [ex_g]
|
||||
pshufb m3, m0, [ex_b]
|
||||
movu m0, [rsrcq+o+0]
|
||||
movu m0, [rsrcq+cntq]
|
||||
pshufb m4, m0, [ex_r]
|
||||
pshufb m5, m0, [ex_g]
|
||||
pshufb m6, m0, [ex_b]
|
||||
pshufb m0, [ex_b]
|
||||
pmulld m1, [rsp+mmsize*0]
|
||||
pmulld m2, [rsp+mmsize*1]
|
||||
pmulld m3, [rsp+mmsize*2]
|
||||
pmulld m4, [rsp+mmsize*3]
|
||||
pmulld m5, [rsp+mmsize*4]
|
||||
pmulld m6, [rsp+mmsize*5]
|
||||
pmulld m0, [rsp+mmsize*5]
|
||||
paddd m1, m2
|
||||
paddd m3, m4
|
||||
paddd m5, m6
|
||||
paddd m5, m0
|
||||
paddd m1, m3
|
||||
paddd m1, m5
|
||||
|
||||
movu m0, [lsrcq+o+0]
|
||||
movu m0, [lsrcq+cntq]
|
||||
pshufb m7, m0, [ex_r]
|
||||
pshufb m2, m0, [ex_g]
|
||||
pshufb m3, m0, [ex_b]
|
||||
movu m0, [rsrcq+o+0]
|
||||
movu m0, [rsrcq+cntq]
|
||||
pshufb m4, m0, [ex_r]
|
||||
pshufb m5, m0, [ex_g]
|
||||
pshufb m6, m0, [ex_b]
|
||||
pshufb m0, [ex_b]
|
||||
pmulld m7, [rsp+mmsize*6]
|
||||
pmulld m2, [rsp+mmsize*7]
|
||||
pmulld m3, [rsp+mmsize*8]
|
||||
pmulld m4, [rsp+mmsize*9]
|
||||
pmulld m5, [rsp+mmsize*10]
|
||||
pmulld m6, [rsp+mmsize*11]
|
||||
pmulld m0, [rsp+mmsize*11]
|
||||
paddd m7, m2
|
||||
paddd m3, m4
|
||||
paddd m5, m6
|
||||
paddd m5, m0
|
||||
paddd m7, m3
|
||||
paddd m7, m5
|
||||
|
||||
movu m0, [lsrcq+o+0]
|
||||
pshufb m8, m0, [ex_r]
|
||||
pshufb m2, m0, [ex_g]
|
||||
pshufb m3, m0, [ex_b]
|
||||
movu m0, [rsrcq+o+0]
|
||||
pshufb m4, m0, [ex_r]
|
||||
pshufb m5, m0, [ex_g]
|
||||
pshufb m6, m0, [ex_b]
|
||||
pmulld m8, m10
|
||||
pmulld m2, m11
|
||||
pmulld m3, m12
|
||||
pmulld m4, m13
|
||||
pmulld m5, m14
|
||||
pmulld m6, m15
|
||||
paddd m8, m2
|
||||
paddd m3, m4
|
||||
paddd m5, m6
|
||||
paddd m8, m3
|
||||
paddd m8, m5
|
||||
movu m0, [lsrcq+cntq]
|
||||
pshufb m2, m0, [ex_r]
|
||||
pshufb m3, m0, [ex_g]
|
||||
pshufb m4, m0, [ex_b]
|
||||
movu m0, [rsrcq+cntq]
|
||||
pshufb m5, m0, [ex_r]
|
||||
pshufb m6, m0, [ex_g]
|
||||
pshufb m0, [ex_b]
|
||||
pmulld m2, m8
|
||||
pmulld m3, m9
|
||||
pmulld m4, m10
|
||||
pmulld m5, m11
|
||||
pmulld m6, m12
|
||||
pmulld m0, m13
|
||||
paddd m2, m3
|
||||
paddd m4, m5
|
||||
paddd m6, m0
|
||||
paddd m2, m4
|
||||
paddd m2, m6
|
||||
|
||||
psrld m1, 16
|
||||
psrld m7, 16
|
||||
psrld m8, 16
|
||||
psrld m2, 16
|
||||
|
||||
packusdw m1, m7
|
||||
packusdw m8, m8
|
||||
packuswb m1, m8
|
||||
packusdw m2, m2
|
||||
packuswb m1, m2
|
||||
pshufb m1, [shuf]
|
||||
|
||||
movq [dstq+o+0], m1
|
||||
movq [dstq+cntq+0], m1
|
||||
psrldq m1, 8
|
||||
movd [dstq+o+8], m1
|
||||
add r12d, 12
|
||||
sub r11d, 4
|
||||
movd [dstq+cntq+8], m1
|
||||
add cntd, 12
|
||||
sub od, 4
|
||||
jg .loop
|
||||
|
||||
add dstq, dst_linesizeq
|
||||
|
Loading…
Reference in New Issue
Block a user