mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-21 10:55:51 +02:00
x86/lossless_videodsp: simplify and explicit aligned/unaligned flags
This commit is contained in:
parent
78a9f185eb
commit
cddbfd2a95
@ -31,7 +31,7 @@ pb_zzzzzzzz67676767: db -1,-1,-1,-1,-1,-1,-1,-1, 6, 7, 6, 7, 6, 7, 6, 7
|
||||
|
||||
SECTION_TEXT
|
||||
|
||||
%macro ADD_INT16_LOOP 1 ; %1 = is_aligned
|
||||
%macro ADD_INT16_LOOP 1 ; %1 = a/u (aligned/unaligned)
|
||||
movd m4, maskd
|
||||
SPLATW m4, m4
|
||||
add wq, wq
|
||||
@ -51,28 +51,16 @@ SECTION_TEXT
|
||||
neg wq
|
||||
jz %%.end
|
||||
%%.loop:
|
||||
%if %1
|
||||
mova m0, [srcq+wq]
|
||||
mova m1, [dstq+wq]
|
||||
mova m2, [srcq+wq+mmsize]
|
||||
mova m3, [dstq+wq+mmsize]
|
||||
%else
|
||||
movu m0, [srcq+wq]
|
||||
movu m1, [dstq+wq]
|
||||
movu m2, [srcq+wq+mmsize]
|
||||
movu m3, [dstq+wq+mmsize]
|
||||
%endif
|
||||
mov%1 m0, [srcq+wq]
|
||||
mov%1 m1, [dstq+wq]
|
||||
mov%1 m2, [srcq+wq+mmsize]
|
||||
mov%1 m3, [dstq+wq+mmsize]
|
||||
paddw m0, m1
|
||||
paddw m2, m3
|
||||
pand m0, m4
|
||||
pand m2, m4
|
||||
%if %1
|
||||
mova [dstq+wq] , m0
|
||||
mova [dstq+wq+mmsize], m2
|
||||
%else
|
||||
movu [dstq+wq] , m0
|
||||
movu [dstq+wq+mmsize], m2
|
||||
%endif
|
||||
mov%1 [dstq+wq] , m0
|
||||
mov%1 [dstq+wq+mmsize], m2
|
||||
add wq, 2*mmsize
|
||||
jl %%.loop
|
||||
%%.end:
|
||||
@ -81,7 +69,7 @@ SECTION_TEXT
|
||||
|
||||
INIT_MMX mmx
|
||||
cglobal add_int16, 4,4,5, dst, src, mask, w
|
||||
ADD_INT16_LOOP 1
|
||||
ADD_INT16_LOOP a
|
||||
|
||||
INIT_XMM sse2
|
||||
cglobal add_int16, 4,4,5, dst, src, mask, w
|
||||
@ -89,11 +77,11 @@ cglobal add_int16, 4,4,5, dst, src, mask, w
|
||||
jnz .unaligned
|
||||
test dstq, mmsize-1
|
||||
jnz .unaligned
|
||||
ADD_INT16_LOOP 1
|
||||
ADD_INT16_LOOP a
|
||||
.unaligned:
|
||||
ADD_INT16_LOOP 0
|
||||
ADD_INT16_LOOP u
|
||||
|
||||
%macro DIFF_INT16_LOOP 1 ; %1 = is_aligned
|
||||
%macro DIFF_INT16_LOOP 1 ; %1 = a/u (aligned/unaligned)
|
||||
movd m4, maskd
|
||||
SPLATW m4, m4
|
||||
add wq, wq
|
||||
@ -114,28 +102,16 @@ cglobal add_int16, 4,4,5, dst, src, mask, w
|
||||
neg wq
|
||||
jz %%.end
|
||||
%%.loop:
|
||||
%if %1
|
||||
mova m0, [src1q+wq]
|
||||
mova m1, [src2q+wq]
|
||||
mova m2, [src1q+wq+mmsize]
|
||||
mova m3, [src2q+wq+mmsize]
|
||||
%else
|
||||
movu m0, [src1q+wq]
|
||||
movu m1, [src2q+wq]
|
||||
movu m2, [src1q+wq+mmsize]
|
||||
movu m3, [src2q+wq+mmsize]
|
||||
%endif
|
||||
mov%1 m0, [src1q+wq]
|
||||
mov%1 m1, [src2q+wq]
|
||||
mov%1 m2, [src1q+wq+mmsize]
|
||||
mov%1 m3, [src2q+wq+mmsize]
|
||||
psubw m0, m1
|
||||
psubw m2, m3
|
||||
pand m0, m4
|
||||
pand m2, m4
|
||||
%if %1
|
||||
mova [dstq+wq] , m0
|
||||
mova [dstq+wq+mmsize], m2
|
||||
%else
|
||||
movu [dstq+wq] , m0
|
||||
movu [dstq+wq+mmsize], m2
|
||||
%endif
|
||||
mov%1 [dstq+wq] , m0
|
||||
mov%1 [dstq+wq+mmsize], m2
|
||||
add wq, 2*mmsize
|
||||
jl %%.loop
|
||||
%%.end:
|
||||
@ -144,7 +120,7 @@ cglobal add_int16, 4,4,5, dst, src, mask, w
|
||||
|
||||
INIT_MMX mmx
|
||||
cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
|
||||
DIFF_INT16_LOOP 1
|
||||
DIFF_INT16_LOOP a
|
||||
|
||||
INIT_XMM sse2
|
||||
cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
|
||||
@ -154,22 +130,18 @@ cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
|
||||
jnz .unaligned
|
||||
test dstq, mmsize-1
|
||||
jnz .unaligned
|
||||
DIFF_INT16_LOOP 1
|
||||
DIFF_INT16_LOOP a
|
||||
.unaligned:
|
||||
DIFF_INT16_LOOP 0
|
||||
DIFF_INT16_LOOP u
|
||||
|
||||
|
||||
%macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst_is_aligned, %2 = src_is_aligned
|
||||
%macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst alignment (a/u), %2 = src alignment (a/u)
|
||||
add wq, wq
|
||||
add srcq, wq
|
||||
add dstq, wq
|
||||
neg wq
|
||||
%%.loop:
|
||||
%if %2
|
||||
mova m1, [srcq+wq]
|
||||
%else
|
||||
movu m1, [srcq+wq]
|
||||
%endif
|
||||
mov%2 m1, [srcq+wq]
|
||||
mova m2, m1
|
||||
pslld m1, 16
|
||||
paddw m1, m2
|
||||
@ -185,7 +157,7 @@ cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
|
||||
%endif
|
||||
paddw m0, m1
|
||||
pand m0, m7
|
||||
%if %1
|
||||
%ifidn %1, a
|
||||
mova [dstq+wq], m0
|
||||
%else
|
||||
movq [dstq+wq], m0
|
||||
@ -214,7 +186,7 @@ cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left
|
||||
psllq m0, 48
|
||||
movd m7, maskm
|
||||
SPLATW m7 ,m7
|
||||
ADD_HFYU_LEFT_LOOP_INT16 1, 1
|
||||
ADD_HFYU_LEFT_LOOP_INT16 a, a
|
||||
|
||||
INIT_XMM sse4
|
||||
cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left
|
||||
@ -229,11 +201,11 @@ cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left
|
||||
jnz .src_unaligned
|
||||
test dstq, 15
|
||||
jnz .dst_unaligned
|
||||
ADD_HFYU_LEFT_LOOP_INT16 1, 1
|
||||
ADD_HFYU_LEFT_LOOP_INT16 a, a
|
||||
.dst_unaligned:
|
||||
ADD_HFYU_LEFT_LOOP_INT16 0, 1
|
||||
ADD_HFYU_LEFT_LOOP_INT16 u, a
|
||||
.src_unaligned:
|
||||
ADD_HFYU_LEFT_LOOP_INT16 0, 0
|
||||
ADD_HFYU_LEFT_LOOP_INT16 u, u
|
||||
|
||||
; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top)
|
||||
INIT_MMX mmxext
|
||||
|
Loading…
Reference in New Issue
Block a user