1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-12-23 12:43:46 +02:00

x86: dct32: port to cpuflags

This commit is contained in:
Diego Biurrun 2012-08-01 19:28:08 +02:00
parent 239fdf1b4a
commit 0c3ff1982c

View File

@ -42,39 +42,24 @@ ps_cos_vec: dd 0.500603, 0.505471, 0.515447, 0.531043
align 32 align 32
ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000 ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
%macro BUTTERFLY_SSE 4 %macro BUTTERFLY 4
movaps %4, %1 subps %4, %1, %2
subps %1, %2 addps %2, %2, %1
addps %2, %4 mulps %1, %4, %3
mulps %1, %3
%endmacro %endmacro
%macro BUTTERFLY_AVX 4 %macro BUTTERFLY0 5
vsubps %4, %1, %2 %if cpuflag(sse2) && notcpuflag(avx)
vaddps %2, %2, %1
vmulps %1, %4, %3
%endmacro
%macro BUTTERFLY0_SSE 5
movaps %4, %1
shufps %1, %1, %5
xorps %4, %2
addps %1, %4
mulps %1, %3
%endmacro
%macro BUTTERFLY0_SSE2 5
pshufd %4, %1, %5 pshufd %4, %1, %5
xorps %1, %2 xorps %1, %2
addps %1, %4 addps %1, %4
mulps %1, %3 mulps %1, %3
%endmacro %else
shufps %4, %1, %1, %5
%macro BUTTERFLY0_AVX 5 xorps %1, %1, %2
vshufps %4, %1, %1, %5 addps %4, %4, %1
vxorps %1, %1, %2 mulps %1, %4, %3
vaddps %4, %4, %1 %endif
vmulps %1, %4, %3
%endmacro %endmacro
%macro BUTTERFLY2 4 %macro BUTTERFLY2 4
@ -206,14 +191,11 @@ ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
movss [outq+116], m6 movss [outq+116], m6
%endmacro %endmacro
%define BUTTERFLY BUTTERFLY_AVX INIT_YMM avx
%define BUTTERFLY0 BUTTERFLY0_AVX
INIT_YMM
SECTION_TEXT SECTION_TEXT
%if HAVE_AVX %if HAVE_AVX
; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in) ; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in)
cglobal dct32_float_avx, 2,3,8, out, in, tmp cglobal dct32_float, 2,3,8, out, in, tmp
; pass 1 ; pass 1
vmovaps m4, [inq+0] vmovaps m4, [inq+0]
vinsertf128 m5, m5, [inq+96], 1 vinsertf128 m5, m5, [inq+96], 1
@ -286,9 +268,6 @@ INIT_XMM
RET RET
%endif %endif
%define BUTTERFLY BUTTERFLY_SSE
%define BUTTERFLY0 BUTTERFLY0_SSE
%if ARCH_X86_64 %if ARCH_X86_64
%define SPILL SWAP %define SPILL SWAP
%define UNSPILL SWAP %define UNSPILL SWAP
@ -411,10 +390,9 @@ INIT_XMM
%endif %endif
INIT_XMM
%macro DCT32_FUNC 1
; void ff_dct32_float_sse(FFTSample *out, const FFTSample *in) ; void ff_dct32_float_sse(FFTSample *out, const FFTSample *in)
cglobal dct32_float_%1, 2,3,16, out, in, tmp %macro DCT32_FUNC 0
cglobal dct32_float, 2, 3, 16, out, in, tmp
; pass 1 ; pass 1
movaps m0, [inq+0] movaps m0, [inq+0]
@ -498,18 +476,16 @@ cglobal dct32_float_%1, 2,3,16, out, in, tmp
RET RET
%endmacro %endmacro
%macro LOAD_INV_SSE 2 %macro LOAD_INV 2
%if cpuflag(sse2)
pshufd %1, %2, 0x1b
%elif cpuflag(sse)
movaps %1, %2 movaps %1, %2
shufps %1, %1, 0x1b shufps %1, %1, 0x1b
%endif
%endmacro %endmacro
%define LOAD_INV LOAD_INV_SSE INIT_XMM sse
DCT32_FUNC sse DCT32_FUNC
INIT_XMM sse2
%macro LOAD_INV_SSE2 2 DCT32_FUNC
pshufd %1, %2, 0x1b
%endmacro
%define LOAD_INV LOAD_INV_SSE2
%define BUTTERFLY0 BUTTERFLY0_SSE2
DCT32_FUNC sse2