You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-07-16 22:42:38 +02:00
x86: move XOP emulation code back to x86inc
Only two functions that use xop multiply-accumulate instructions where the first operand is the same as the fourth actually took advantage of the macros. This further reduces differences with x264's x86inc. Reviewed-by: Ronald S. Bultje <rsbultje@gmail.com> Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
@ -25,6 +25,15 @@
|
|||||||
|
|
||||||
SECTION .text
|
SECTION .text
|
||||||
|
|
||||||
|
%macro PMACSDQL 5
|
||||||
|
%if cpuflag(xop)
|
||||||
|
pmacsdql %1, %2, %3, %1
|
||||||
|
%else
|
||||||
|
pmuldq %2, %3
|
||||||
|
paddq %1, %2
|
||||||
|
%endif
|
||||||
|
%endmacro
|
||||||
|
|
||||||
%macro LPC_32 1
|
%macro LPC_32 1
|
||||||
INIT_XMM %1
|
INIT_XMM %1
|
||||||
cglobal flac_lpc_32, 5,6,5, decoded, coeffs, pred_order, qlevel, len, j
|
cglobal flac_lpc_32, 5,6,5, decoded, coeffs, pred_order, qlevel, len, j
|
||||||
|
@ -1427,6 +1427,22 @@ AVX_INSTR pfmul, 3dnow, 1, 0, 1
|
|||||||
%undef i
|
%undef i
|
||||||
%undef j
|
%undef j
|
||||||
|
|
||||||
|
%macro FMA_INSTR 3
|
||||||
|
%macro %1 4-7 %1, %2, %3
|
||||||
|
%if cpuflag(xop)
|
||||||
|
v%5 %1, %2, %3, %4
|
||||||
|
%else
|
||||||
|
%6 %1, %2, %3
|
||||||
|
%7 %1, %4
|
||||||
|
%endif
|
||||||
|
%endmacro
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
FMA_INSTR pmacsww, pmullw, paddw
|
||||||
|
FMA_INSTR pmacsdd, pmulld, paddd ; sse4 emulation
|
||||||
|
FMA_INSTR pmacsdql, pmuldq, paddq ; sse4 emulation
|
||||||
|
FMA_INSTR pmadcswd, pmaddwd, paddd
|
||||||
|
|
||||||
; tzcnt is equivalent to "rep bsf" and is backwards-compatible with bsf.
|
; tzcnt is equivalent to "rep bsf" and is backwards-compatible with bsf.
|
||||||
; This lets us use tzcnt without bumping the yasm version requirement yet.
|
; This lets us use tzcnt without bumping the yasm version requirement yet.
|
||||||
%define tzcnt rep bsf
|
%define tzcnt rep bsf
|
||||||
|
@ -765,25 +765,6 @@
|
|||||||
%endif
|
%endif
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro PMA_EMU 4
|
|
||||||
%macro %1 5-8 %2, %3, %4
|
|
||||||
%if cpuflag(xop)
|
|
||||||
v%6 %1, %2, %3, %4
|
|
||||||
%elifidn %1, %4
|
|
||||||
%7 %5, %2, %3
|
|
||||||
%8 %1, %4, %5
|
|
||||||
%else
|
|
||||||
%7 %1, %2, %3
|
|
||||||
%8 %1, %4
|
|
||||||
%endif
|
|
||||||
%endmacro
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
PMA_EMU PMACSWW, pmacsww, pmullw, paddw
|
|
||||||
PMA_EMU PMACSDD, pmacsdd, pmulld, paddd ; sse4 emulation
|
|
||||||
PMA_EMU PMACSDQL, pmacsdql, pmuldq, paddq ; sse4 emulation
|
|
||||||
PMA_EMU PMADCSWD, pmadcswd, pmaddwd, paddd
|
|
||||||
|
|
||||||
; Wrapper for non-FMA version of fmaddps
|
; Wrapper for non-FMA version of fmaddps
|
||||||
%macro FMULADD_PS 5
|
%macro FMULADD_PS 5
|
||||||
%if cpuflag(fma3) || cpuflag(fma4)
|
%if cpuflag(fma3) || cpuflag(fma4)
|
||||||
|
@ -176,7 +176,12 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_shift, dst, frac, \
|
|||||||
.inner_loop:
|
.inner_loop:
|
||||||
movu m1, [srcq+min_filter_count_x4q*1]
|
movu m1, [srcq+min_filter_count_x4q*1]
|
||||||
%ifidn %1, int16
|
%ifidn %1, int16
|
||||||
PMADCSWD m0, m1, [filterq+min_filter_count_x4q*1], m0, m1
|
%if cpuflag(xop)
|
||||||
|
vpmadcswd m0, m1, [filterq+min_filter_count_x4q*1], m0
|
||||||
|
%else
|
||||||
|
pmaddwd m1, [filterq+min_filter_count_x4q*1]
|
||||||
|
paddd m0, m1
|
||||||
|
%endif
|
||||||
%else ; float/double
|
%else ; float/double
|
||||||
%if cpuflag(fma4) || cpuflag(fma3)
|
%if cpuflag(fma4) || cpuflag(fma3)
|
||||||
fmaddp%4 m0, m1, [filterq+min_filter_count_x4q*1], m0
|
fmaddp%4 m0, m1, [filterq+min_filter_count_x4q*1], m0
|
||||||
|
Reference in New Issue
Block a user