swr: add int32_to_float_sse2

could be done for sse/3dnow too if someone wants Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
2025-10-30 23:18:11 +02:00 · 2012-04-28 17:04:42 +02:00
parent 95057b1972
commit 832c3b10d2
2 changed files with 51 additions and 0 deletions
--- a/libswresample/x86/audio_convert.asm
+++ b/libswresample/x86/audio_convert.asm
@@ -21,6 +21,10 @@
 %include "libavutil/x86/x86inc.asm"
 %include "libavutil/x86/x86util.asm"

+SECTION_RODATA
+
+flt2pm31: times 8 dd 4.6566129e-10
+
 SECTION .text

 %macro INT16_TO_INT32 1
@@ -55,6 +59,42 @@ int16_to_int32_u_int %+ SUFFIX
    REP_RET
 %endmacro

+%macro INT32_TO_FLOAT 1
+cglobal int32_to_float_%1, 3, 3, 3, dst, src, len
+    mov srcq, [srcq]
+    mov dstq, [dstq]
+%ifidn %1, a
+    test dstq, mmsize-1
+        jne int32_to_float_u_int %+ SUFFIX
+    test srcq, mmsize-1
+        jne int32_to_float_u_int %+ SUFFIX
+%else
+int32_to_float_u_int %+ SUFFIX
+%endif
+    add     srcq, lenq
+    add     dstq, lenq
+    neg     lenq
+    mova      m2, [flt2pm31]
+.next:
+%ifidn %1, a
+    cvtdq2ps  m0, [         srcq+lenq]
+    cvtdq2ps  m1, [mmsize + srcq+lenq]
+%else
+    movu      m0, [         srcq+lenq]
+    movu      m1, [mmsize + srcq+lenq]
+    cvtdq2ps  m0, m0
+    cvtdq2ps  m1, m1
+%endif
+    mulps m0, m2
+    mulps m1, m2
+    mov%1 [         dstq+lenq], m0
+    mov%1 [mmsize + dstq+lenq], m1
+    add lenq, 2*mmsize
+        jl .next
+    REP_RET
+%endmacro
+
+
 INIT_MMX mmx
 INT16_TO_INT32 u
 INT16_TO_INT32 a
@@ -62,3 +102,7 @@ INT16_TO_INT32 a
 INIT_XMM sse
 INT16_TO_INT32 u
 INT16_TO_INT32 a
+
+INIT_XMM sse2
+INT32_TO_FLOAT u
+INT32_TO_FLOAT a
--- a/libswresample/x86/swresample_x86.c
+++ b/libswresample/x86/swresample_x86.c
@@ -26,6 +26,8 @@
 MULTI_CAPS_FUNC_DECL(mmx)
 MULTI_CAPS_FUNC_DECL(sse)

+void ff_int32_to_float_a_sse2(uint8_t **dst, const uint8_t **src, int len);
+
 void swri_audio_convert_init_x86(struct AudioConvert *ac,
                                 enum AVSampleFormat out_fmt,
                                 enum AVSampleFormat in_fmt,
@@ -44,4 +46,9 @@ void swri_audio_convert_init_x86(struct AudioConvert *ac,

 MULTI_CAPS_FUNC(AV_CPU_FLAG_MMX, mmx)
 MULTI_CAPS_FUNC(AV_CPU_FLAG_SSE, sse)
+
+    if(mm_flags & AV_CPU_FLAG_SSE2) {
+        if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
+            ac->simd_f =  ff_int32_to_float_a_sse2;
+    }
 }