mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
Merge commit '9ce02e14f01de50fcc6f7f459544b140be66d615'
* commit '9ce02e14f01de50fcc6f7f459544b140be66d615': x86: ac3dsp: port to cpuflags x86util: Add cpuflags_mmxext alias for cpuflags_mmx2 x86inc: Only define program_name if the macro is unset Conflicts: libavcodec/x86/ac3dsp.asm Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
commit
1dad486714
@ -41,8 +41,8 @@ SECTION .text
|
|||||||
; void ff_ac3_exponent_min(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
|
; void ff_ac3_exponent_min(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
|
|
||||||
%macro AC3_EXPONENT_MIN 1
|
%macro AC3_EXPONENT_MIN 0
|
||||||
cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset
|
cglobal ac3_exponent_min, 3, 4, 2, exp, reuse_blks, expn, offset
|
||||||
shl reuse_blksq, 8
|
shl reuse_blksq, 8
|
||||||
jz .end
|
jz .end
|
||||||
LOOP_ALIGN
|
LOOP_ALIGN
|
||||||
@ -65,16 +65,17 @@ cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset
|
|||||||
|
|
||||||
%define PMINUB PMINUB_MMX
|
%define PMINUB PMINUB_MMX
|
||||||
%define LOOP_ALIGN
|
%define LOOP_ALIGN
|
||||||
INIT_MMX
|
INIT_MMX mmx
|
||||||
AC3_EXPONENT_MIN mmx
|
AC3_EXPONENT_MIN
|
||||||
%if HAVE_MMXEXT_EXTERNAL
|
%if HAVE_MMXEXT_EXTERNAL
|
||||||
%define PMINUB PMINUB_MMXEXT
|
%define PMINUB PMINUB_MMXEXT
|
||||||
%define LOOP_ALIGN ALIGN 16
|
%define LOOP_ALIGN ALIGN 16
|
||||||
AC3_EXPONENT_MIN mmxext
|
INIT_MMX mmxext
|
||||||
|
AC3_EXPONENT_MIN
|
||||||
%endif
|
%endif
|
||||||
%if HAVE_SSE2_EXTERNAL
|
%if HAVE_SSE2_EXTERNAL
|
||||||
INIT_XMM
|
INIT_XMM sse2
|
||||||
AC3_EXPONENT_MIN sse2
|
AC3_EXPONENT_MIN
|
||||||
%endif
|
%endif
|
||||||
%undef PMINUB
|
%undef PMINUB
|
||||||
%undef LOOP_ALIGN
|
%undef LOOP_ALIGN
|
||||||
@ -168,8 +169,8 @@ AC3_MAX_MSB_ABS_INT16 or_abs
|
|||||||
; macro used for ff_ac3_lshift_int16() and ff_ac3_rshift_int32()
|
; macro used for ff_ac3_lshift_int16() and ff_ac3_rshift_int32()
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
|
|
||||||
%macro AC3_SHIFT 4 ; l/r, 16/32, shift instruction, instruction set
|
%macro AC3_SHIFT 3 ; l/r, 16/32, shift instruction, instruction set
|
||||||
cglobal ac3_%1shift_int%2_%4, 3,3,5, src, len, shift
|
cglobal ac3_%1shift_int%2, 3, 3, 5, src, len, shift
|
||||||
movd m0, shiftd
|
movd m0, shiftd
|
||||||
.loop:
|
.loop:
|
||||||
mova m1, [srcq ]
|
mova m1, [srcq ]
|
||||||
@ -195,19 +196,19 @@ cglobal ac3_%1shift_int%2_%4, 3,3,5, src, len, shift
|
|||||||
; void ff_ac3_lshift_int16(int16_t *src, unsigned int len, unsigned int shift)
|
; void ff_ac3_lshift_int16(int16_t *src, unsigned int len, unsigned int shift)
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
|
|
||||||
INIT_MMX
|
INIT_MMX mmx
|
||||||
AC3_SHIFT l, 16, psllw, mmx
|
AC3_SHIFT l, 16, psllw
|
||||||
INIT_XMM
|
INIT_XMM sse2
|
||||||
AC3_SHIFT l, 16, psllw, sse2
|
AC3_SHIFT l, 16, psllw
|
||||||
|
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
; void ff_ac3_rshift_int32(int32_t *src, unsigned int len, unsigned int shift)
|
; void ff_ac3_rshift_int32(int32_t *src, unsigned int len, unsigned int shift)
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
|
|
||||||
INIT_MMX
|
INIT_MMX mmx
|
||||||
AC3_SHIFT r, 32, psrad, mmx
|
AC3_SHIFT r, 32, psrad
|
||||||
INIT_XMM
|
INIT_XMM sse2
|
||||||
AC3_SHIFT r, 32, psrad, sse2
|
AC3_SHIFT r, 32, psrad
|
||||||
|
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
; void ff_float_to_fixed24(int32_t *dst, const float *src, unsigned int len)
|
; void ff_float_to_fixed24(int32_t *dst, const float *src, unsigned int len)
|
||||||
@ -215,8 +216,8 @@ AC3_SHIFT r, 32, psrad, sse2
|
|||||||
|
|
||||||
; The 3DNow! version is not bit-identical because pf2id uses truncation rather
|
; The 3DNow! version is not bit-identical because pf2id uses truncation rather
|
||||||
; than round-to-nearest.
|
; than round-to-nearest.
|
||||||
INIT_MMX
|
INIT_MMX 3dnow
|
||||||
cglobal float_to_fixed24_3dnow, 3,3,0, dst, src, len
|
cglobal float_to_fixed24, 3, 3, 0, dst, src, len
|
||||||
movq m0, [pf_1_24]
|
movq m0, [pf_1_24]
|
||||||
.loop:
|
.loop:
|
||||||
movq m1, [srcq ]
|
movq m1, [srcq ]
|
||||||
@ -242,8 +243,8 @@ cglobal float_to_fixed24_3dnow, 3,3,0, dst, src, len
|
|||||||
femms
|
femms
|
||||||
RET
|
RET
|
||||||
|
|
||||||
INIT_XMM
|
INIT_XMM sse
|
||||||
cglobal float_to_fixed24_sse, 3,3,3, dst, src, len
|
cglobal float_to_fixed24, 3, 3, 3, dst, src, len
|
||||||
movaps m0, [pf_1_24]
|
movaps m0, [pf_1_24]
|
||||||
.loop:
|
.loop:
|
||||||
movaps m1, [srcq ]
|
movaps m1, [srcq ]
|
||||||
@ -267,8 +268,8 @@ cglobal float_to_fixed24_sse, 3,3,3, dst, src, len
|
|||||||
emms
|
emms
|
||||||
RET
|
RET
|
||||||
|
|
||||||
INIT_XMM
|
INIT_XMM sse2
|
||||||
cglobal float_to_fixed24_sse2, 3,3,9, dst, src, len
|
cglobal float_to_fixed24, 3, 3, 9, dst, src, len
|
||||||
movaps m0, [pf_1_24]
|
movaps m0, [pf_1_24]
|
||||||
.loop:
|
.loop:
|
||||||
movaps m1, [srcq ]
|
movaps m1, [srcq ]
|
||||||
@ -332,8 +333,8 @@ cglobal float_to_fixed24_sse2, 3,3,9, dst, src, len
|
|||||||
paddd %1, %2
|
paddd %1, %2
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
INIT_XMM
|
INIT_XMM sse2
|
||||||
cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum
|
cglobal ac3_compute_mantissa_size, 1, 2, 4, mant_cnt, sum
|
||||||
movdqa m0, [mant_cntq ]
|
movdqa m0, [mant_cntq ]
|
||||||
movdqa m1, [mant_cntq+ 1*16]
|
movdqa m1, [mant_cntq+ 1*16]
|
||||||
paddw m0, [mant_cntq+ 2*16]
|
paddw m0, [mant_cntq+ 2*16]
|
||||||
@ -373,20 +374,20 @@ cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum
|
|||||||
; void ff_ac3_extract_exponents(uint8_t *exp, int32_t *coef, int nb_coefs)
|
; void ff_ac3_extract_exponents(uint8_t *exp, int32_t *coef, int nb_coefs)
|
||||||
;------------------------------------------------------------------------------
|
;------------------------------------------------------------------------------
|
||||||
|
|
||||||
%macro PABSD_MMX 2 ; src/dst, tmp
|
%macro PABSD 1-2 ; src/dst, unused
|
||||||
|
%if cpuflag(ssse3)
|
||||||
|
pabsd %1, %1
|
||||||
|
%else ; src/dst, tmp
|
||||||
pxor %2, %2
|
pxor %2, %2
|
||||||
pcmpgtd %2, %1
|
pcmpgtd %2, %1
|
||||||
pxor %1, %2
|
pxor %1, %2
|
||||||
psubd %1, %2
|
psubd %1, %2
|
||||||
%endmacro
|
%endif
|
||||||
|
|
||||||
%macro PABSD_SSSE3 1-2 ; src/dst, unused
|
|
||||||
pabsd %1, %1
|
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%if HAVE_AMD3DNOW_EXTERNAL
|
%if HAVE_AMD3DNOW_EXTERNAL
|
||||||
INIT_MMX
|
INIT_MMX 3dnow
|
||||||
cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len
|
cglobal ac3_extract_exponents, 3, 3, 0, exp, coef, len
|
||||||
add expq, lenq
|
add expq, lenq
|
||||||
lea coefq, [coefq+4*lenq]
|
lea coefq, [coefq+4*lenq]
|
||||||
neg lenq
|
neg lenq
|
||||||
@ -395,8 +396,8 @@ cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len
|
|||||||
.loop:
|
.loop:
|
||||||
movq m0, [coefq+4*lenq ]
|
movq m0, [coefq+4*lenq ]
|
||||||
movq m1, [coefq+4*lenq+8]
|
movq m1, [coefq+4*lenq+8]
|
||||||
PABSD_MMX m0, m2
|
PABSD m0, m2
|
||||||
PABSD_MMX m1, m2
|
PABSD m1, m2
|
||||||
pslld m0, 1
|
pslld m0, 1
|
||||||
por m0, m3
|
por m0, m3
|
||||||
pi2fd m2, m0
|
pi2fd m2, m0
|
||||||
@ -420,8 +421,8 @@ cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len
|
|||||||
REP_RET
|
REP_RET
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
%macro AC3_EXTRACT_EXPONENTS 1
|
%macro AC3_EXTRACT_EXPONENTS 0
|
||||||
cglobal ac3_extract_exponents_%1, 3,3,4, exp, coef, len
|
cglobal ac3_extract_exponents, 3, 3, 4, exp, coef, len
|
||||||
add expq, lenq
|
add expq, lenq
|
||||||
lea coefq, [coefq+4*lenq]
|
lea coefq, [coefq+4*lenq]
|
||||||
neg lenq
|
neg lenq
|
||||||
@ -453,11 +454,10 @@ cglobal ac3_extract_exponents_%1, 3,3,4, exp, coef, len
|
|||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%if HAVE_SSE2_EXTERNAL
|
%if HAVE_SSE2_EXTERNAL
|
||||||
INIT_XMM
|
INIT_XMM sse2
|
||||||
%define PABSD PABSD_MMX
|
AC3_EXTRACT_EXPONENTS
|
||||||
AC3_EXTRACT_EXPONENTS sse2
|
%endif
|
||||||
%if HAVE_SSSE3_EXTERNAL
|
%if HAVE_SSSE3_EXTERNAL
|
||||||
%define PABSD PABSD_SSSE3
|
INIT_XMM ssse3
|
||||||
AC3_EXTRACT_EXPONENTS ssse3
|
AC3_EXTRACT_EXPONENTS
|
||||||
%endif
|
|
||||||
%endif
|
%endif
|
||||||
|
@ -34,7 +34,9 @@
|
|||||||
; as this feature might be useful for others as well. Send patches or ideas
|
; as this feature might be useful for others as well. Send patches or ideas
|
||||||
; to x264-devel@videolan.org .
|
; to x264-devel@videolan.org .
|
||||||
|
|
||||||
%define program_name ff
|
%ifndef program_name
|
||||||
|
%define program_name ff
|
||||||
|
%endif
|
||||||
|
|
||||||
%define WIN64 0
|
%define WIN64 0
|
||||||
%define UNIX64 0
|
%define UNIX64 0
|
||||||
|
@ -23,6 +23,8 @@
|
|||||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
;******************************************************************************
|
;******************************************************************************
|
||||||
|
|
||||||
|
%define cpuflags_mmxext cpuflags_mmx2
|
||||||
|
|
||||||
%include "libavutil/x86/x86inc.asm"
|
%include "libavutil/x86/x86inc.asm"
|
||||||
|
|
||||||
%macro SBUTTERFLY 4
|
%macro SBUTTERFLY 4
|
||||||
|
Loading…
Reference in New Issue
Block a user