mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
float_dsp: add vector_dmul_scalar() to multiply a vector of doubles
Include x86-optimized versions for SSE2 and AVX.
This commit is contained in:
parent
da025d115a
commit
ac7eb4cb20
@ -44,11 +44,20 @@ static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
|
||||
dst[i] = src[i] * mul;
|
||||
}
|
||||
|
||||
static void vector_dmul_scalar_c(double *dst, const double *src, double mul,
|
||||
int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len; i++)
|
||||
dst[i] = src[i] * mul;
|
||||
}
|
||||
|
||||
void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
|
||||
{
|
||||
fdsp->vector_fmul = vector_fmul_c;
|
||||
fdsp->vector_fmac_scalar = vector_fmac_scalar_c;
|
||||
fdsp->vector_fmul_scalar = vector_fmul_scalar_c;
|
||||
fdsp->vector_dmul_scalar = vector_dmul_scalar_c;
|
||||
|
||||
#if ARCH_ARM
|
||||
ff_float_dsp_init_arm(fdsp);
|
||||
|
@ -66,6 +66,21 @@ typedef struct AVFloatDSPContext {
|
||||
*/
|
||||
void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
|
||||
int len);
|
||||
|
||||
/**
|
||||
* Multiply a vector of double by a scalar double. Source and
|
||||
* destination vectors must overlap exactly or not at all.
|
||||
*
|
||||
* @param dst result vector
|
||||
* constraints: 32-byte aligned
|
||||
* @param src input vector
|
||||
* constraints: 32-byte aligned
|
||||
* @param mul scalar value
|
||||
* @param len length of vector
|
||||
* constraints: multiple of 8
|
||||
*/
|
||||
void (*vector_dmul_scalar)(double *dst, const double *src, double mul,
|
||||
int len);
|
||||
} AVFloatDSPContext;
|
||||
|
||||
/**
|
||||
|
@ -114,3 +114,48 @@ cglobal vector_fmul_scalar, 4,4,3, dst, src, mul, len
|
||||
|
||||
INIT_XMM sse
|
||||
VECTOR_FMUL_SCALAR
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; void ff_vector_dmul_scalar(double *dst, const double *src, double mul,
|
||||
; int len)
|
||||
;------------------------------------------------------------------------------
|
||||
|
||||
%macro VECTOR_DMUL_SCALAR 0
|
||||
%if UNIX64
|
||||
cglobal vector_dmul_scalar, 3,3,3, dst, src, len
|
||||
%else
|
||||
cglobal vector_dmul_scalar, 4,4,3, dst, src, mul, len
|
||||
%endif
|
||||
%if ARCH_X86_32
|
||||
VBROADCASTSD xmm0, mulm
|
||||
%else
|
||||
%if WIN64
|
||||
movlhps xmm2, xmm2
|
||||
%if cpuflag(avx)
|
||||
vinsertf128 ymm2, ymm2, xmm2, 1
|
||||
%endif
|
||||
SWAP 0, 2
|
||||
%else
|
||||
movlhps xmm0, xmm0
|
||||
%if cpuflag(avx)
|
||||
vinsertf128 ymm0, ymm0, xmm0, 1
|
||||
%endif
|
||||
%endif
|
||||
%endif
|
||||
lea lenq, [lend*8-2*mmsize]
|
||||
.loop:
|
||||
mulpd m1, m0, [srcq+lenq ]
|
||||
mulpd m2, m0, [srcq+lenq+mmsize]
|
||||
mova [dstq+lenq ], m1
|
||||
mova [dstq+lenq+mmsize], m2
|
||||
sub lenq, 2*mmsize
|
||||
jge .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse2
|
||||
VECTOR_DMUL_SCALAR
|
||||
%if HAVE_AVX_EXTERNAL
|
||||
INIT_YMM avx
|
||||
VECTOR_DMUL_SCALAR
|
||||
%endif
|
||||
|
@ -35,6 +35,11 @@ extern void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul,
|
||||
extern void ff_vector_fmul_scalar_sse(float *dst, const float *src, float mul,
|
||||
int len);
|
||||
|
||||
extern void ff_vector_dmul_scalar_sse2(double *dst, const double *src,
|
||||
double mul, int len);
|
||||
extern void ff_vector_dmul_scalar_avx(double *dst, const double *src,
|
||||
double mul, int len);
|
||||
|
||||
void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
|
||||
{
|
||||
int mm_flags = av_get_cpu_flags();
|
||||
@ -44,8 +49,12 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
|
||||
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse;
|
||||
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse;
|
||||
}
|
||||
if (EXTERNAL_SSE2(mm_flags)) {
|
||||
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;
|
||||
}
|
||||
if (EXTERNAL_AVX(mm_flags)) {
|
||||
fdsp->vector_fmul = ff_vector_fmul_avx;
|
||||
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx;
|
||||
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_avx;
|
||||
}
|
||||
}
|
||||
|
@ -631,6 +631,17 @@
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro VBROADCASTSD 2 ; dst xmm/ymm, src m64
|
||||
%if cpuflag(avx) && mmsize == 32
|
||||
vbroadcastsd %1, %2
|
||||
%elif cpuflag(sse3)
|
||||
movddup %1, %2
|
||||
%else ; sse2
|
||||
movsd %1, %2
|
||||
movlhps %1, %1
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro SHUFFLE_MASK_W 8
|
||||
%rep 8
|
||||
%if %1>=0x80
|
||||
|
Loading…
Reference in New Issue
Block a user