1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-13 21:28:01 +02:00

x86: fmtconvert: port to cpuflags

This commit is contained in:
Diego Biurrun 2012-07-15 15:42:17 +02:00
parent 588fafe7f3
commit be923ed659

View File

@ -26,11 +26,11 @@ SECTION_TEXT
;---------------------------------------------------------------------------------
; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int len);
;---------------------------------------------------------------------------------
%macro INT32_TO_FLOAT_FMUL_SCALAR 2
%macro INT32_TO_FLOAT_FMUL_SCALAR 1
%if UNIX64
cglobal int32_to_float_fmul_scalar_%1, 3,3,%2, dst, src, len
cglobal int32_to_float_fmul_scalar, 3, 3, %1, dst, src, len
%else
cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
cglobal int32_to_float_fmul_scalar, 4, 4, %1, dst, src, mul, len
%endif
%if WIN64
SWAP 0, 2
@ -43,7 +43,7 @@ cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
add dstq, lenq
neg lenq
.loop:
%ifidn %1, sse2
%if cpuflag(sse2)
cvtdq2ps m1, [srcq+lenq ]
cvtdq2ps m2, [srcq+lenq+16]
%else
@ -63,27 +63,26 @@ cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
REP_RET
%endmacro
INIT_XMM
INIT_XMM sse
%define SPLATD SPLATD_SSE
%define movdqa movaps
INT32_TO_FLOAT_FMUL_SCALAR sse, 5
%undef movdqa
INT32_TO_FLOAT_FMUL_SCALAR 5
INIT_XMM sse2
%define SPLATD SPLATD_SSE2
INT32_TO_FLOAT_FMUL_SCALAR sse2, 3
INT32_TO_FLOAT_FMUL_SCALAR 3
%undef SPLATD
;------------------------------------------------------------------------------
; void ff_float_to_int16(int16_t *dst, const float *src, long len);
;------------------------------------------------------------------------------
%macro FLOAT_TO_INT16 2
cglobal float_to_int16_%1, 3,3,%2, dst, src, len
%macro FLOAT_TO_INT16 1
cglobal float_to_int16, 3, 3, %1, dst, src, len
add lenq, lenq
lea srcq, [srcq+2*lenq]
add dstq, lenq
neg lenq
.loop:
%ifidn %1, sse2
%if cpuflag(sse2)
cvtps2dq m0, [srcq+2*lenq ]
cvtps2dq m1, [srcq+2*lenq+16]
packssdw m0, m1
@ -100,31 +99,32 @@ cglobal float_to_int16_%1, 3,3,%2, dst, src, len
%endif
add lenq, 16
js .loop
%ifnidn %1, sse2
%if mmsize == 8
emms
%endif
REP_RET
%endmacro
INIT_XMM
FLOAT_TO_INT16 sse2, 2
INIT_MMX
FLOAT_TO_INT16 sse, 0
INIT_XMM sse2
FLOAT_TO_INT16 2
INIT_MMX sse
FLOAT_TO_INT16 0
%define cvtps2pi pf2id
FLOAT_TO_INT16 3dnow, 0
INIT_MMX 3dnow
FLOAT_TO_INT16 0
%undef cvtps2pi
;------------------------------------------------------------------------------
; void ff_float_to_int16_step(int16_t *dst, const float *src, long len, long step);
;------------------------------------------------------------------------------
%macro FLOAT_TO_INT16_STEP 2
cglobal float_to_int16_step_%1, 4,7,%2, dst, src, len, step, step3, v1, v2
%macro FLOAT_TO_INT16_STEP 1
cglobal float_to_int16_step, 4, 7, %1, dst, src, len, step, step3, v1, v2
add lenq, lenq
lea srcq, [srcq+2*lenq]
lea step3q, [stepq*3]
neg lenq
.loop:
%ifidn %1, sse2
%if cpuflag(sse2)
cvtps2dq m0, [srcq+2*lenq ]
cvtps2dq m1, [srcq+2*lenq+16]
packssdw m0, m1
@ -179,25 +179,26 @@ cglobal float_to_int16_step_%1, 4,7,%2, dst, src, len, step, step3, v1, v2
%endif
add lenq, 16
js .loop
%ifnidn %1, sse2
%if mmsize == 8
emms
%endif
REP_RET
%endmacro
INIT_XMM
FLOAT_TO_INT16_STEP sse2, 2
INIT_MMX
FLOAT_TO_INT16_STEP sse, 0
INIT_XMM sse2
FLOAT_TO_INT16_STEP 2
INIT_MMX sse
FLOAT_TO_INT16_STEP 0
%define cvtps2pi pf2id
FLOAT_TO_INT16_STEP 3dnow, 0
INIT_MMX 3dnow
FLOAT_TO_INT16_STEP 0
%undef cvtps2pi
;-------------------------------------------------------------------------------
; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len);
;-------------------------------------------------------------------------------
%macro FLOAT_TO_INT16_INTERLEAVE2 1
cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
%macro FLOAT_TO_INT16_INTERLEAVE2 0
cglobal float_to_int16_interleave2, 3, 4, 2, dst, src0, src1, len
lea lenq, [4*r2q]
mov src1q, [src0q+gprsize]
mov src0q, [src0q]
@ -206,7 +207,7 @@ cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
add src1q, lenq
neg lenq
.loop:
%ifidn %1, sse2
%if cpuflag(sse2)
cvtps2dq m0, [src0q+lenq]
cvtps2dq m1, [src1q+lenq]
packssdw m0, m1
@ -228,21 +229,20 @@ cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
%endif
add lenq, 16
js .loop
%ifnidn %1, sse2
%if mmsize == 8
emms
%endif
REP_RET
%endmacro
INIT_MMX
INIT_MMX 3dnow
%define cvtps2pi pf2id
FLOAT_TO_INT16_INTERLEAVE2 3dnow
FLOAT_TO_INT16_INTERLEAVE2
%undef cvtps2pi
%define movdqa movaps
FLOAT_TO_INT16_INTERLEAVE2 sse
%undef movdqa
INIT_XMM
FLOAT_TO_INT16_INTERLEAVE2 sse2
INIT_MMX sse
FLOAT_TO_INT16_INTERLEAVE2
INIT_XMM sse2
FLOAT_TO_INT16_INTERLEAVE2
%macro PSWAPD_SSE 2
@ -254,9 +254,9 @@ FLOAT_TO_INT16_INTERLEAVE2 sse2
punpckldq %1, %2
%endmacro
%macro FLOAT_TO_INT16_INTERLEAVE6 1
%macro FLOAT_TO_INT16_INTERLEAVE6 0
; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
cglobal float_to_int16_interleave6_%1, 2,8,0, dst, src, src1, src2, src3, src4, src5, len
cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, src5, len
%if ARCH_X86_64
mov lend, r2d
%else
@ -302,21 +302,24 @@ cglobal float_to_int16_interleave6_%1, 2,8,0, dst, src, src1, src2, src3, src4,
RET
%endmacro ; FLOAT_TO_INT16_INTERLEAVE6
INIT_MMX sse
%define pswapd PSWAPD_SSE
FLOAT_TO_INT16_INTERLEAVE6 sse
FLOAT_TO_INT16_INTERLEAVE6
INIT_MMX 3dnow
%define cvtps2pi pf2id
%define pswapd PSWAPD_3DNOW
FLOAT_TO_INT16_INTERLEAVE6 3dnow
FLOAT_TO_INT16_INTERLEAVE6
%undef pswapd
FLOAT_TO_INT16_INTERLEAVE6 3dnowext
INIT_MMX 3dnowext
FLOAT_TO_INT16_INTERLEAVE6
%undef cvtps2pi
;-----------------------------------------------------------------------------
; void ff_float_interleave6(float *dst, const float **src, unsigned int len);
;-----------------------------------------------------------------------------
%macro FLOAT_INTERLEAVE6 2
cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, len
%macro FLOAT_INTERLEAVE6 1
cglobal float_interleave6, 2, 8, %1, dst, src, src1, src2, src3, src4, src5, len
%if ARCH_X86_64
mov lend, r2d
%else
@ -334,7 +337,7 @@ cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, le
sub src4q, srcq
sub src5q, srcq
.loop:
%ifidn %1, sse
%if cpuflag(sse)
movaps m0, [srcq]
movaps m1, [srcq+src1q]
movaps m2, [srcq+src2q]
@ -383,62 +386,60 @@ cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, le
add dstq, mmsize*6
sub lend, mmsize/4
jg .loop
%ifidn %1, mmx
%if mmsize == 8
emms
%endif
REP_RET
%endmacro
INIT_MMX
FLOAT_INTERLEAVE6 mmx, 0
INIT_XMM
FLOAT_INTERLEAVE6 sse, 7
INIT_MMX mmx
FLOAT_INTERLEAVE6 0
INIT_XMM sse
FLOAT_INTERLEAVE6 7
;-----------------------------------------------------------------------------
; void ff_float_interleave2(float *dst, const float **src, unsigned int len);
;-----------------------------------------------------------------------------
%macro FLOAT_INTERLEAVE2 2
cglobal float_interleave2_%1, 3,4,%2, dst, src, len, src1
%macro FLOAT_INTERLEAVE2 1
cglobal float_interleave2, 3, 4, %1, dst, src, len, src1
mov src1q, [srcq+gprsize]
mov srcq, [srcq ]
sub src1q, srcq
.loop:
MOVPS m0, [srcq ]
MOVPS m1, [srcq+src1q ]
MOVPS m3, [srcq +mmsize]
MOVPS m4, [srcq+src1q+mmsize]
mova m0, [srcq ]
mova m1, [srcq+src1q ]
mova m3, [srcq +mmsize]
mova m4, [srcq+src1q+mmsize]
MOVPS m2, m0
mova m2, m0
PUNPCKLDQ m0, m1
PUNPCKHDQ m2, m1
MOVPS m1, m3
mova m1, m3
PUNPCKLDQ m3, m4
PUNPCKHDQ m1, m4
MOVPS [dstq ], m0
MOVPS [dstq+1*mmsize], m2
MOVPS [dstq+2*mmsize], m3
MOVPS [dstq+3*mmsize], m1
mova [dstq ], m0
mova [dstq+1*mmsize], m2
mova [dstq+2*mmsize], m3
mova [dstq+3*mmsize], m1
add srcq, mmsize*2
add dstq, mmsize*4
sub lend, mmsize/2
jg .loop
%ifidn %1, mmx
%if mmsize == 8
emms
%endif
REP_RET
%endmacro
INIT_MMX
%define MOVPS movq
INIT_MMX mmx
%define PUNPCKLDQ punpckldq
%define PUNPCKHDQ punpckhdq
FLOAT_INTERLEAVE2 mmx, 0
INIT_XMM
%define MOVPS movaps
FLOAT_INTERLEAVE2 0
INIT_XMM sse
%define PUNPCKLDQ unpcklps
%define PUNPCKHDQ unpckhps
FLOAT_INTERLEAVE2 sse, 5
FLOAT_INTERLEAVE2 5