1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-12-23 12:43:46 +02:00

dsputil: use cpuflags in x86 versions of vector_clip_int32()

This commit is contained in:
Justin Ruggles 2011-11-06 15:27:55 -05:00
parent f2bd8a0786
commit b8f02f5b4e
2 changed files with 27 additions and 21 deletions

View File

@ -2419,9 +2419,9 @@ void ff_vector_clip_int32_mmx (int32_t *dst, const int32_t *src, int32_t min
int32_t max, unsigned int len); int32_t max, unsigned int len);
void ff_vector_clip_int32_sse2 (int32_t *dst, const int32_t *src, int32_t min, void ff_vector_clip_int32_sse2 (int32_t *dst, const int32_t *src, int32_t min,
int32_t max, unsigned int len); int32_t max, unsigned int len);
void ff_vector_clip_int32_sse2_int(int32_t *dst, const int32_t *src, int32_t min, void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, int32_t min,
int32_t max, unsigned int len); int32_t max, unsigned int len);
void ff_vector_clip_int32_sse41 (int32_t *dst, const int32_t *src, int32_t min, void ff_vector_clip_int32_sse4 (int32_t *dst, const int32_t *src, int32_t min,
int32_t max, unsigned int len); int32_t max, unsigned int len);
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
@ -2877,7 +2877,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->scalarproduct_int16 = ff_scalarproduct_int16_sse2; c->scalarproduct_int16 = ff_scalarproduct_int16_sse2;
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2; c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2;
if (mm_flags & AV_CPU_FLAG_ATOM) { if (mm_flags & AV_CPU_FLAG_ATOM) {
c->vector_clip_int32 = ff_vector_clip_int32_sse2_int; c->vector_clip_int32 = ff_vector_clip_int32_int_sse2;
} else { } else {
c->vector_clip_int32 = ff_vector_clip_int32_sse2; c->vector_clip_int32 = ff_vector_clip_int32_sse2;
} }
@ -2909,7 +2909,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) {
#if HAVE_YASM #if HAVE_YASM
c->vector_clip_int32 = ff_vector_clip_int32_sse41; c->vector_clip_int32 = ff_vector_clip_int32_sse4;
#endif #endif
} }

View File

@ -1055,9 +1055,14 @@ emu_edge mmx
; int32_t max, unsigned int len) ; int32_t max, unsigned int len)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro VECTOR_CLIP_INT32 4 ; %1 = number of xmm registers used
cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len ; %2 = number of inline load/process/store loops per asm loop
%ifidn %1, sse2 ; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop
; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2)
; %5 = suffix
%macro VECTOR_CLIP_INT32 4-5
cglobal vector_clip_int32%5, 5,5,%2, dst, src, min, max, len
%if %4
cvtsi2ss m4, minm cvtsi2ss m4, minm
cvtsi2ss m5, maxm cvtsi2ss m5, maxm
%else %else
@ -1068,12 +1073,12 @@ cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
SPLATD m5 SPLATD m5
.loop: .loop:
%assign %%i 1 %assign %%i 1
%rep %3 %rep %2
mova m0, [srcq+mmsize*0*%%i] mova m0, [srcq+mmsize*0*%%i]
mova m1, [srcq+mmsize*1*%%i] mova m1, [srcq+mmsize*1*%%i]
mova m2, [srcq+mmsize*2*%%i] mova m2, [srcq+mmsize*2*%%i]
mova m3, [srcq+mmsize*3*%%i] mova m3, [srcq+mmsize*3*%%i]
%if %4 %if %3
mova m7, [srcq+mmsize*4*%%i] mova m7, [srcq+mmsize*4*%%i]
mova m8, [srcq+mmsize*5*%%i] mova m8, [srcq+mmsize*5*%%i]
mova m9, [srcq+mmsize*6*%%i] mova m9, [srcq+mmsize*6*%%i]
@ -1083,7 +1088,7 @@ cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
CLIPD m1, m4, m5, m6 CLIPD m1, m4, m5, m6
CLIPD m2, m4, m5, m6 CLIPD m2, m4, m5, m6
CLIPD m3, m4, m5, m6 CLIPD m3, m4, m5, m6
%if %4 %if %3
CLIPD m7, m4, m5, m6 CLIPD m7, m4, m5, m6
CLIPD m8, m4, m5, m6 CLIPD m8, m4, m5, m6
CLIPD m9, m4, m5, m6 CLIPD m9, m4, m5, m6
@ -1093,7 +1098,7 @@ cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
mova [dstq+mmsize*1*%%i], m1 mova [dstq+mmsize*1*%%i], m1
mova [dstq+mmsize*2*%%i], m2 mova [dstq+mmsize*2*%%i], m2
mova [dstq+mmsize*3*%%i], m3 mova [dstq+mmsize*3*%%i], m3
%if %4 %if %3
mova [dstq+mmsize*4*%%i], m7 mova [dstq+mmsize*4*%%i], m7
mova [dstq+mmsize*5*%%i], m8 mova [dstq+mmsize*5*%%i], m8
mova [dstq+mmsize*6*%%i], m9 mova [dstq+mmsize*6*%%i], m9
@ -1101,25 +1106,26 @@ cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
%endif %endif
%assign %%i %%i+1 %assign %%i %%i+1
%endrep %endrep
add srcq, mmsize*4*(%3+%4) add srcq, mmsize*4*(%2+%3)
add dstq, mmsize*4*(%3+%4) add dstq, mmsize*4*(%2+%3)
sub lend, mmsize*(%3+%4) sub lend, mmsize*(%2+%3)
jg .loop jg .loop
REP_RET REP_RET
%endmacro %endmacro
INIT_MMX INIT_MMX mmx
%define SPLATD SPLATD_MMX %define SPLATD SPLATD_MMX
%define CLIPD CLIPD_MMX %define CLIPD CLIPD_MMX
VECTOR_CLIP_INT32 mmx, 0, 1, 0 VECTOR_CLIP_INT32 0, 1, 0, 0
INIT_XMM INIT_XMM sse2
%define SPLATD SPLATD_SSE2 %define SPLATD SPLATD_SSE2
VECTOR_CLIP_INT32 sse2_int, 6, 1, 0 VECTOR_CLIP_INT32 6, 1, 0, 0, _int
%define CLIPD CLIPD_SSE2 %define CLIPD CLIPD_SSE2
VECTOR_CLIP_INT32 sse2, 6, 2, 0 VECTOR_CLIP_INT32 6, 2, 0, 1
INIT_XMM sse4
%define CLIPD CLIPD_SSE41 %define CLIPD CLIPD_SSE41
%ifdef m8 %ifdef m8
VECTOR_CLIP_INT32 sse41, 11, 1, 1 VECTOR_CLIP_INT32 11, 1, 1, 0
%else %else
VECTOR_CLIP_INT32 sse41, 6, 1, 0 VECTOR_CLIP_INT32 6, 1, 0, 0
%endif %endif