1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-08 13:22:53 +02:00

x86/dsputilenc: make the SUM_ABS_DCTELEM macro more readable

Signed-off-by: James Almer <jamrial@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
James Almer 2014-05-24 20:30:42 -03:00 committed by Michael Niedermayer
parent 46e3883519
commit d94e255dd1

View File

@ -487,34 +487,28 @@ cglobal pix_norm1, 2, 4
movd eax, m1 movd eax, m1
RET RET
%macro DCT_SAD4 1
mova m2, [blockq+%1+0 ]
mova m3, [blockq+%1+16]
mova m4, [blockq+%1+32]
mova m5, [blockq+%1+48]
ABS1_SUM m2, m6, m0
ABS1_SUM m3, m6, m1
ABS1_SUM m4, m6, m0
ABS1_SUM m5, m6, m1
%endmacro
;----------------------------------------------- ;-----------------------------------------------
;int ff_sum_abs_dctelem(int16_t *block) ;int ff_sum_abs_dctelem(int16_t *block)
;----------------------------------------------- ;-----------------------------------------------
; %1 = number of xmm registers used ; %1 = number of xmm registers used
; %2 = number of inline loops
%macro SUM_ABS_DCTELEM 1 %macro SUM_ABS_DCTELEM 2
cglobal sum_abs_dctelem, 1, 1, %1, block cglobal sum_abs_dctelem, 1, 1, %1, block
pxor m0, m0 pxor m0, m0
pxor m1, m1 pxor m1, m1
DCT_SAD4 0 %assign %%i 0
%if mmsize == 8 %rep %2
DCT_SAD4 8 mova m2, [blockq+mmsize*(0+%%i)]
%endif mova m3, [blockq+mmsize*(1+%%i)]
DCT_SAD4 64 mova m4, [blockq+mmsize*(2+%%i)]
%if mmsize == 8 mova m5, [blockq+mmsize*(3+%%i)]
DCT_SAD4 72 ABS1_SUM m2, m6, m0
%endif ABS1_SUM m3, m6, m1
ABS1_SUM m4, m6, m0
ABS1_SUM m5, m6, m1
%assign %%i %%i+4
%endrep
paddusw m0, m1 paddusw m0, m1
HSUM m0, m1, eax HSUM m0, m1, eax
and eax, 0xFFFF and eax, 0xFFFF
@ -522,10 +516,10 @@ cglobal sum_abs_dctelem, 1, 1, %1, block
%endmacro %endmacro
INIT_MMX mmx INIT_MMX mmx
SUM_ABS_DCTELEM 0 SUM_ABS_DCTELEM 0, 4
INIT_MMX mmxext INIT_MMX mmxext
SUM_ABS_DCTELEM 0 SUM_ABS_DCTELEM 0, 4
INIT_XMM sse2 INIT_XMM sse2
SUM_ABS_DCTELEM 7 SUM_ABS_DCTELEM 7, 2
INIT_XMM ssse3 INIT_XMM ssse3
SUM_ABS_DCTELEM 6 SUM_ABS_DCTELEM 6, 2