1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-12-23 12:43:46 +02:00

x86util: import MOVHL macro

Originally committed to x264 in 1637239a by Henrik Gramner who has
agreed to re-license it as LGPL.  Original commit message follows.

    x86: Avoid some bypass delays and false dependencies

    A bypass delay of 1-3 clock cycles may occur on some CPUs when transitioning
    between int and float domains, so try to avoid that if possible.
This commit is contained in:
James Darnley 2017-02-11 13:25:09 +01:00
parent e18bc2114f
commit 7627df15d4

View File

@ -876,3 +876,15 @@
psrlq %1, 8*(%2)
%endif
%endmacro
%macro MOVHL 2 ; dst, src
%ifidn %1, %2
punpckhqdq %1, %2
%elif cpuflag(avx)
punpckhqdq %1, %2, %2
%elif cpuflag(sse4)
pshufd %1, %2, q3232 ; pshufd is slow on some older CPUs, so only use it on more modern ones
%else
movhlps %1, %2 ; may cause an int/float domain transition and has a dependency on dst
%endif
%endmacro