1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-13 21:28:01 +02:00

x86/lossless_videodsp: use common macro for add and diff int16 loop.

This commit is contained in:
Clément Bœsch 2014-01-25 11:59:39 +01:00
parent cddbfd2a95
commit 5267e85056

View File

@ -31,7 +31,7 @@ pb_zzzzzzzz67676767: db -1,-1,-1,-1,-1,-1,-1,-1, 6, 7, 6, 7, 6, 7, 6, 7
SECTION_TEXT
%macro ADD_INT16_LOOP 1 ; %1 = a/u (aligned/unaligned)
%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub
movd m4, maskd
SPLATW m4, m4
add wq, wq
@ -39,24 +39,41 @@ SECTION_TEXT
jz %%.tomainloop
%%.wordloop:
sub wq, 2
%ifidn %2, add
mov ax, [srcq+wq]
add ax, [dstq+wq]
%else
mov ax, [src1q+wq]
sub ax, [src2q+wq]
%endif
and ax, maskw
mov [dstq+wq], ax
test wq, 2*mmsize - 1
jnz %%.wordloop
%%.tomainloop:
%ifidn %2, add
add srcq, wq
%else
add src1q, wq
add src2q, wq
%endif
add dstq, wq
neg wq
jz %%.end
%%.loop:
%ifidn %2, add
mov%1 m0, [srcq+wq]
mov%1 m1, [dstq+wq]
mov%1 m2, [srcq+wq+mmsize]
mov%1 m3, [dstq+wq+mmsize]
paddw m0, m1
paddw m2, m3
%else
mov%1 m0, [src1q+wq]
mov%1 m1, [src2q+wq]
mov%1 m2, [src1q+wq+mmsize]
mov%1 m3, [src2q+wq+mmsize]
%endif
p%2w m0, m1
p%2w m2, m3
pand m0, m4
pand m2, m4
mov%1 [dstq+wq] , m0
@ -69,7 +86,7 @@ SECTION_TEXT
INIT_MMX mmx
cglobal add_int16, 4,4,5, dst, src, mask, w
ADD_INT16_LOOP a
INT16_LOOP a, add
INIT_XMM sse2
cglobal add_int16, 4,4,5, dst, src, mask, w
@ -77,50 +94,13 @@ cglobal add_int16, 4,4,5, dst, src, mask, w
jnz .unaligned
test dstq, mmsize-1
jnz .unaligned
ADD_INT16_LOOP a
INT16_LOOP a, add
.unaligned:
ADD_INT16_LOOP u
%macro DIFF_INT16_LOOP 1 ; %1 = a/u (aligned/unaligned)
movd m4, maskd
SPLATW m4, m4
add wq, wq
test wq, 2*mmsize - 1
jz %%.tomainloop
%%.wordloop:
sub wq, 2
mov ax, [src1q+wq]
sub ax, [src2q+wq]
and ax, maskw
mov [dstq+wq], ax
test wq, 2*mmsize - 1
jnz %%.wordloop
%%.tomainloop:
add src1q, wq
add src2q, wq
add dstq, wq
neg wq
jz %%.end
%%.loop:
mov%1 m0, [src1q+wq]
mov%1 m1, [src2q+wq]
mov%1 m2, [src1q+wq+mmsize]
mov%1 m3, [src2q+wq+mmsize]
psubw m0, m1
psubw m2, m3
pand m0, m4
pand m2, m4
mov%1 [dstq+wq] , m0
mov%1 [dstq+wq+mmsize], m2
add wq, 2*mmsize
jl %%.loop
%%.end:
RET
%endmacro
INT16_LOOP u, add
INIT_MMX mmx
cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
DIFF_INT16_LOOP a
INT16_LOOP a, sub
INIT_XMM sse2
cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
@ -130,9 +110,9 @@ cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
jnz .unaligned
test dstq, mmsize-1
jnz .unaligned
DIFF_INT16_LOOP a
INT16_LOOP a, sub
.unaligned:
DIFF_INT16_LOOP u
INT16_LOOP u, sub
%macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst alignment (a/u), %2 = src alignment (a/u)