mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-04-14 00:58:38 +02:00
swscale/x86/range_convert: reduce amount of xmm regs clobbered in luma functions
Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
parent
f6859cade3
commit
8a4c9d6bd3
@ -52,21 +52,21 @@ SECTION .text
|
|||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
|
|
||||||
%macro LUMCONVERTRANGE 4
|
%macro LUMCONVERTRANGE 4
|
||||||
cglobal %1, 2, 2, 7, dst, width
|
cglobal %1, 2, 2, 5, dst, width
|
||||||
shl widthd, 1
|
shl widthd, 1
|
||||||
VBROADCASTI128 m4, [%2]
|
VBROADCASTI128 m2, [%2]
|
||||||
VBROADCASTI128 m5, [%3]
|
VBROADCASTI128 m3, [%3]
|
||||||
pxor m6, m6
|
pxor m4, m4
|
||||||
add dstq, widthq
|
add dstq, widthq
|
||||||
neg widthq
|
neg widthq
|
||||||
.loop:
|
.loop:
|
||||||
movu m0, [dstq+widthq]
|
movu m0, [dstq+widthq]
|
||||||
punpckhwd m1, m0, m6
|
punpckhwd m1, m0, m4
|
||||||
punpcklwd m0, m6
|
punpcklwd m0, m4
|
||||||
pmaddwd m0, m4
|
pmaddwd m0, m2
|
||||||
pmaddwd m1, m4
|
pmaddwd m1, m2
|
||||||
paddd m0, m5
|
paddd m0, m3
|
||||||
paddd m1, m5
|
paddd m1, m3
|
||||||
psrad m0, %4
|
psrad m0, %4
|
||||||
psrad m1, %4
|
psrad m1, %4
|
||||||
packssdw m0, m1
|
packssdw m0, m1
|
||||||
|
Loading…
x
Reference in New Issue
Block a user