From fa5daaca0d6ffcfaa9e9d19089910ee7ebf9a8b7 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Sat, 28 Apr 2012 13:01:50 +0200 Subject: [PATCH] swr: seperate functions for aligned & unaligned If someone has an idea on how to do this cleaner, its welcome Signed-off-by: Michael Niedermayer --- libswresample/x86/audio_convert.asm | 22 ++++++++++++++++------ libswresample/x86/swresample_x86.c | 4 ++-- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/libswresample/x86/audio_convert.asm b/libswresample/x86/audio_convert.asm index c7ce8c6c53..d98c35cf5d 100644 --- a/libswresample/x86/audio_convert.asm +++ b/libswresample/x86/audio_convert.asm @@ -23,18 +23,26 @@ SECTION .text -%macro INT16_TO_INT32 0 +%macro INT16_TO_INT32 1 cglobal int16_to_int32_%1, 3, 3, 0, dst, src, len mov srcq, [srcq] mov dstq, [dstq] +%ifidn %1, a + test dstq, mmsize-1 + jne int16_to_int32_u_int %+ SUFFIX + test srcq, mmsize-1 + jne int16_to_int32_u_int %+ SUFFIX +%else +int16_to_int32_u_int %+ SUFFIX +%endif .next - movu m4, [srcq] + mov%1 m4, [srcq] pxor m0, m0 pxor m1, m1 punpcklwd m0, m4 punpckhwd m1, m4 - movu [ dstq], m0 - movu [mmsize + dstq], m1 + mov%1 [ dstq], m0 + mov%1 [mmsize + dstq], m1 add srcq, mmsize add dstq, 2*mmsize sub lenq, 2*mmsize @@ -46,7 +54,9 @@ cglobal int16_to_int32_%1, 3, 3, 0, dst, src, len %endmacro INIT_MMX mmx -INT16_TO_INT32 +INT16_TO_INT32 u +INT16_TO_INT32 a INIT_XMM sse -INT16_TO_INT32 +INT16_TO_INT32 u +INT16_TO_INT32 a diff --git a/libswresample/x86/swresample_x86.c b/libswresample/x86/swresample_x86.c index 5c8d8290cd..f2a14c1faf 100644 --- a/libswresample/x86/swresample_x86.c +++ b/libswresample/x86/swresample_x86.c @@ -22,7 +22,7 @@ #include "libswresample/audioconvert.h" #define MULTI_CAPS_FUNC_DECL(cap) \ - void ff_int16_to_int32_ ## cap(uint8_t **dst, const uint8_t **src, int len); + void ff_int16_to_int32_a_ ## cap(uint8_t **dst, const uint8_t **src, int len); MULTI_CAPS_FUNC_DECL(mmx) MULTI_CAPS_FUNC_DECL(sse) @@ -39,7 +39,7 @@ void swri_audio_convert_init_x86(struct AudioConvert *ac, #define MULTI_CAPS_FUNC(flag, cap) \ if (mm_flags & flag) {\ if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S16 || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S16P)\ - ac->simd_f = ff_int16_to_int32_ ## cap;\ + ac->simd_f = ff_int16_to_int32_a_ ## cap;\ } MULTI_CAPS_FUNC(AV_CPU_FLAG_MMX, mmx)