diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm index 10cb2983aa..1a725978a3 100644 --- a/libavcodec/x86/dcadsp.asm +++ b/libavcodec/x86/dcadsp.asm @@ -292,7 +292,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ %define scale m0 %if ARCH_X86_32 || WIN64 %if cpuflag(sse2) && notcpuflag(avx) - movd m0, scalem + movd scale, scalem SPLATD m0 %else VBROADCASTSS m0, scalem @@ -311,7 +311,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ sub r5q, offmp and r5q, -64 shl r5q, 2 -%if ARCH_X86_32 || mmsize < 32 +%if ARCH_X86_32 || notcpuflag(avx) mov OFFQ, r5q %define i r5q mov i, 16 * 4 - (ARCH_X86_64 + 1) * mmsize ; main loop counter @@ -337,7 +337,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ %define j r3q mov win, windowm mov ptr1, synth_bufm -%if ARCH_X86_32 || mmsize < 32 +%if ARCH_X86_32 || notcpuflag(avx) add win, i add ptr1, i %endif @@ -356,7 +356,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ mov ptr2, synth_bufmp ; prepare the inner loop counter mov j, OFFQ -%if ARCH_X86_32 || mmsize < 32 +%if ARCH_X86_32 || notcpuflag(avx) sub ptr2, i %endif .loop1: @@ -403,7 +403,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ mova [outq + i + 0 * 4 + mmsize], m7 mova [outq + i + 16 * 4 + mmsize], m8 %endif -%if ARCH_X86_32 || mmsize < 32 +%if ARCH_X86_32 || notcpuflag(avx) sub i, (ARCH_X86_64 + 1) * mmsize jge .mainloop %endif diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c index 48880d628a..bc94b44376 100644 --- a/libavcodec/x86/dcadsp_init.c +++ b/libavcodec/x86/dcadsp_init.c @@ -62,9 +62,9 @@ void ff_synth_filter_inner_##opt(float *synth_buf_ptr, float synth_buf2[32], \ const float window[512], \ float out[32], intptr_t offset, float scale); \ static void synth_filter_##opt(FFTContext *imdct, \ - float *synth_buf_ptr, int *synth_buf_offset, \ - float synth_buf2[32], const float window[512], \ - float out[32], const float in[32], float scale) \ + float *synth_buf_ptr, int *synth_buf_offset, \ + float synth_buf2[32], const float window[512], \ + float out[32], const float in[32], float scale) \ { \ float *synth_buf= synth_buf_ptr + *synth_buf_offset; \ \ @@ -82,7 +82,9 @@ SYNTH_FILTER_FUNC(sse) #endif SYNTH_FILTER_FUNC(sse2) SYNTH_FILTER_FUNC(avx) +#if HAVE_FMA3_EXTERNAL SYNTH_FILTER_FUNC(fma3) +#endif #endif /* HAVE_YASM */ av_cold void ff_synth_filter_init_x86(SynthFilterContext *s)