From be923ed659016350592acb9b3346f706f8170ac5 Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Sun, 15 Jul 2012 15:42:17 +0200
Subject: [PATCH] x86: fmtconvert: port to cpuflags

---
 libavcodec/x86/fmtconvert.asm | 141 +++++++++++++++++-----------------
 1 file changed, 71 insertions(+), 70 deletions(-)

diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index 2951b1672a..969f9ab87d 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -26,11 +26,11 @@ SECTION_TEXT
 ;---------------------------------------------------------------------------------
 ; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int len);
 ;---------------------------------------------------------------------------------
-%macro INT32_TO_FLOAT_FMUL_SCALAR 2
+%macro INT32_TO_FLOAT_FMUL_SCALAR 1
 %if UNIX64
-cglobal int32_to_float_fmul_scalar_%1, 3,3,%2, dst, src, len
+cglobal int32_to_float_fmul_scalar, 3, 3, %1, dst, src, len
 %else
-cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
+cglobal int32_to_float_fmul_scalar, 4, 4, %1, dst, src, mul, len
 %endif
 %if WIN64
     SWAP 0, 2
@@ -43,7 +43,7 @@ cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
     add     dstq, lenq
     neg     lenq
 .loop:
-%ifidn %1, sse2
+%if cpuflag(sse2)
     cvtdq2ps  m1, [srcq+lenq   ]
     cvtdq2ps  m2, [srcq+lenq+16]
 %else
@@ -63,27 +63,26 @@ cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
     REP_RET
 %endmacro
 
-INIT_XMM
+INIT_XMM sse
 %define SPLATD SPLATD_SSE
-%define movdqa movaps
-INT32_TO_FLOAT_FMUL_SCALAR sse, 5
-%undef movdqa
+INT32_TO_FLOAT_FMUL_SCALAR 5
+INIT_XMM sse2
 %define SPLATD SPLATD_SSE2
-INT32_TO_FLOAT_FMUL_SCALAR sse2, 3
+INT32_TO_FLOAT_FMUL_SCALAR 3
 %undef SPLATD
 
 
 ;------------------------------------------------------------------------------
 ; void ff_float_to_int16(int16_t *dst, const float *src, long len);
 ;------------------------------------------------------------------------------
-%macro FLOAT_TO_INT16 2
-cglobal float_to_int16_%1, 3,3,%2, dst, src, len
+%macro FLOAT_TO_INT16 1
+cglobal float_to_int16, 3, 3, %1, dst, src, len
     add       lenq, lenq
     lea       srcq, [srcq+2*lenq]
     add       dstq, lenq
     neg       lenq
 .loop:
-%ifidn %1, sse2
+%if cpuflag(sse2)
     cvtps2dq    m0, [srcq+2*lenq   ]
     cvtps2dq    m1, [srcq+2*lenq+16]
     packssdw    m0, m1
@@ -100,31 +99,32 @@ cglobal float_to_int16_%1, 3,3,%2, dst, src, len
 %endif
     add       lenq, 16
     js .loop
-%ifnidn %1, sse2
+%if mmsize == 8
     emms
 %endif
     REP_RET
 %endmacro
 
-INIT_XMM
-FLOAT_TO_INT16 sse2, 2
-INIT_MMX
-FLOAT_TO_INT16 sse, 0
+INIT_XMM sse2
+FLOAT_TO_INT16 2
+INIT_MMX sse
+FLOAT_TO_INT16 0
 %define cvtps2pi pf2id
-FLOAT_TO_INT16 3dnow, 0
+INIT_MMX 3dnow
+FLOAT_TO_INT16 0
 %undef cvtps2pi
 
 ;------------------------------------------------------------------------------
 ; void ff_float_to_int16_step(int16_t *dst, const float *src, long len, long step);
 ;------------------------------------------------------------------------------
-%macro FLOAT_TO_INT16_STEP 2
-cglobal float_to_int16_step_%1, 4,7,%2, dst, src, len, step, step3, v1, v2
+%macro FLOAT_TO_INT16_STEP 1
+cglobal float_to_int16_step, 4, 7, %1, dst, src, len, step, step3, v1, v2
     add       lenq, lenq
     lea       srcq, [srcq+2*lenq]
     lea     step3q, [stepq*3]
     neg       lenq
 .loop:
-%ifidn %1, sse2
+%if cpuflag(sse2)
     cvtps2dq    m0, [srcq+2*lenq   ]
     cvtps2dq    m1, [srcq+2*lenq+16]
     packssdw    m0, m1
@@ -179,25 +179,26 @@ cglobal float_to_int16_step_%1, 4,7,%2, dst, src, len, step, step3, v1, v2
 %endif
     add       lenq, 16
     js .loop
-%ifnidn %1, sse2
+%if mmsize == 8
     emms
 %endif
     REP_RET
 %endmacro
 
-INIT_XMM
-FLOAT_TO_INT16_STEP sse2, 2
-INIT_MMX
-FLOAT_TO_INT16_STEP sse, 0
+INIT_XMM sse2
+FLOAT_TO_INT16_STEP 2
+INIT_MMX sse
+FLOAT_TO_INT16_STEP 0
 %define cvtps2pi pf2id
-FLOAT_TO_INT16_STEP 3dnow, 0
+INIT_MMX 3dnow
+FLOAT_TO_INT16_STEP 0
 %undef cvtps2pi
 
 ;-------------------------------------------------------------------------------
 ; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len);
 ;-------------------------------------------------------------------------------
-%macro FLOAT_TO_INT16_INTERLEAVE2 1
-cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
+%macro FLOAT_TO_INT16_INTERLEAVE2 0
+cglobal float_to_int16_interleave2, 3, 4, 2, dst, src0, src1, len
     lea      lenq, [4*r2q]
     mov     src1q, [src0q+gprsize]
     mov     src0q, [src0q]
@@ -206,7 +207,7 @@ cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
     add     src1q, lenq
     neg      lenq
 .loop:
-%ifidn %1, sse2
+%if cpuflag(sse2)
     cvtps2dq   m0, [src0q+lenq]
     cvtps2dq   m1, [src1q+lenq]
     packssdw   m0, m1
@@ -228,21 +229,20 @@ cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
 %endif
     add      lenq, 16
     js .loop
-%ifnidn %1, sse2
+%if mmsize == 8
     emms
 %endif
     REP_RET
 %endmacro
 
-INIT_MMX
+INIT_MMX 3dnow
 %define cvtps2pi pf2id
-FLOAT_TO_INT16_INTERLEAVE2 3dnow
+FLOAT_TO_INT16_INTERLEAVE2
 %undef cvtps2pi
-%define movdqa movaps
-FLOAT_TO_INT16_INTERLEAVE2 sse
-%undef movdqa
-INIT_XMM
-FLOAT_TO_INT16_INTERLEAVE2 sse2
+INIT_MMX sse
+FLOAT_TO_INT16_INTERLEAVE2
+INIT_XMM sse2
+FLOAT_TO_INT16_INTERLEAVE2
 
 
 %macro PSWAPD_SSE 2
@@ -254,9 +254,9 @@ FLOAT_TO_INT16_INTERLEAVE2 sse2
     punpckldq %1, %2
 %endmacro
 
-%macro FLOAT_TO_INT16_INTERLEAVE6 1
+%macro FLOAT_TO_INT16_INTERLEAVE6 0
 ; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
-cglobal float_to_int16_interleave6_%1, 2,8,0, dst, src, src1, src2, src3, src4, src5, len
+cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, src5, len
 %if ARCH_X86_64
     mov     lend, r2d
 %else
@@ -302,21 +302,24 @@ cglobal float_to_int16_interleave6_%1, 2,8,0, dst, src, src1, src2, src3, src4,
     RET
 %endmacro ; FLOAT_TO_INT16_INTERLEAVE6
 
+INIT_MMX sse
 %define pswapd PSWAPD_SSE
-FLOAT_TO_INT16_INTERLEAVE6 sse
+FLOAT_TO_INT16_INTERLEAVE6
+INIT_MMX 3dnow
 %define cvtps2pi pf2id
 %define pswapd PSWAPD_3DNOW
-FLOAT_TO_INT16_INTERLEAVE6 3dnow
+FLOAT_TO_INT16_INTERLEAVE6
 %undef pswapd
-FLOAT_TO_INT16_INTERLEAVE6 3dnowext
+INIT_MMX 3dnowext
+FLOAT_TO_INT16_INTERLEAVE6
 %undef cvtps2pi
 
 ;-----------------------------------------------------------------------------
 ; void ff_float_interleave6(float *dst, const float **src, unsigned int len);
 ;-----------------------------------------------------------------------------
 
-%macro FLOAT_INTERLEAVE6 2
-cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, len
+%macro FLOAT_INTERLEAVE6 1
+cglobal float_interleave6, 2, 8, %1, dst, src, src1, src2, src3, src4, src5, len
 %if ARCH_X86_64
     mov     lend, r2d
 %else
@@ -334,7 +337,7 @@ cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, le
     sub    src4q, srcq
     sub    src5q, srcq
 .loop:
-%ifidn %1, sse
+%if cpuflag(sse)
     movaps    m0, [srcq]
     movaps    m1, [srcq+src1q]
     movaps    m2, [srcq+src2q]
@@ -383,62 +386,60 @@ cglobal float_interleave6_%1, 2,8,%2, dst, src, src1, src2, src3, src4, src5, le
     add      dstq, mmsize*6
     sub      lend, mmsize/4
     jg .loop
-%ifidn %1, mmx
+%if mmsize == 8
     emms
 %endif
     REP_RET
 %endmacro
 
-INIT_MMX
-FLOAT_INTERLEAVE6 mmx, 0
-INIT_XMM
-FLOAT_INTERLEAVE6 sse, 7
+INIT_MMX mmx
+FLOAT_INTERLEAVE6 0
+INIT_XMM sse
+FLOAT_INTERLEAVE6 7
 
 ;-----------------------------------------------------------------------------
 ; void ff_float_interleave2(float *dst, const float **src, unsigned int len);
 ;-----------------------------------------------------------------------------
 
-%macro FLOAT_INTERLEAVE2 2
-cglobal float_interleave2_%1, 3,4,%2, dst, src, len, src1
+%macro FLOAT_INTERLEAVE2 1
+cglobal float_interleave2, 3, 4, %1, dst, src, len, src1
     mov     src1q, [srcq+gprsize]
     mov      srcq, [srcq        ]
     sub     src1q, srcq
 .loop:
-    MOVPS      m0, [srcq             ]
-    MOVPS      m1, [srcq+src1q       ]
-    MOVPS      m3, [srcq      +mmsize]
-    MOVPS      m4, [srcq+src1q+mmsize]
+    mova       m0, [srcq             ]
+    mova       m1, [srcq+src1q       ]
+    mova       m3, [srcq      +mmsize]
+    mova       m4, [srcq+src1q+mmsize]
 
-    MOVPS      m2, m0
+    mova       m2, m0
     PUNPCKLDQ  m0, m1
     PUNPCKHDQ  m2, m1
 
-    MOVPS      m1, m3
+    mova       m1, m3
     PUNPCKLDQ  m3, m4
     PUNPCKHDQ  m1, m4
 
-    MOVPS [dstq         ], m0
-    MOVPS [dstq+1*mmsize], m2
-    MOVPS [dstq+2*mmsize], m3
-    MOVPS [dstq+3*mmsize], m1
+    mova  [dstq         ], m0
+    mova  [dstq+1*mmsize], m2
+    mova  [dstq+2*mmsize], m3
+    mova  [dstq+3*mmsize], m1
 
     add      srcq, mmsize*2
     add      dstq, mmsize*4
     sub      lend, mmsize/2
     jg .loop
-%ifidn %1, mmx
+%if mmsize == 8
     emms
 %endif
     REP_RET
 %endmacro
 
-INIT_MMX
-%define MOVPS     movq
+INIT_MMX mmx
 %define PUNPCKLDQ punpckldq
 %define PUNPCKHDQ punpckhdq
-FLOAT_INTERLEAVE2 mmx, 0
-INIT_XMM
-%define MOVPS     movaps
+FLOAT_INTERLEAVE2 0
+INIT_XMM sse
 %define PUNPCKLDQ unpcklps
 %define PUNPCKHDQ unpckhps
-FLOAT_INTERLEAVE2 sse, 5
+FLOAT_INTERLEAVE2 5