1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-24 13:56:33 +02:00

x86: hpeldsp: better factorization

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Christophe Gisquet 2014-05-26 21:59:14 +02:00 committed by Michael Niedermayer
parent bf7e9cc82a
commit 2267003981
2 changed files with 18 additions and 38 deletions

View File

@ -372,16 +372,6 @@ AVG_PIXELS8
; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro PAVGB_MMX 4
movu %3, %1
por %3, %2
pxor %2, %1
pand %2, %4
psrlq %2, 1
psubb %3, %2
SWAP %2, %3
%endmacro
%macro AVG_PIXELS8_X2 0
%if cpuflag(sse2)
cglobal avg_pixels16_x2, 4,5,4
@ -396,53 +386,35 @@ cglobal avg_pixels8_x2, 4,5
.loop:
movu m0, [r1]
movu m2, [r1+r2]
%if notcpuflag(mmxext)
PAVGB_MMX [r1+1], m0, m3, m5
PAVGB_MMX [r1+r2+1], m2, m4, m5
PAVGB_MMX [r0], m0, m3, m5
PAVGB_MMX [r0+r2], m2, m4, m5
%else
%if cpuflag(sse2)
movu m1, [r1+1]
movu m3, [r1+r2+1]
pavgb m0, m1
pavgb m2, m3
%else
PAVGB m0, [r1+1]
PAVGB m2, [r1+r2+1]
%endif
PAVGB m0, [r0]
PAVGB m2, [r0+r2]
PAVGB m0, [r1+1], m3, m5
PAVGB m2, [r1+r2+1], m4, m5
%endif
PAVGB m0, [r0], m3, m5
PAVGB m2, [r0+r2], m4, m5
add r1, r4
mova [r0], m0
mova [r0+r2], m2
movu m0, [r1]
movu m2, [r1+r2]
%if notcpuflag(mmxext)
PAVGB_MMX [r1+1], m0, m3, m5
PAVGB_MMX [r1+r2+1], m2, m4, m5
%elif cpuflag(sse2)
%if cpuflag(sse2)
movu m1, [r1+1]
movu m3, [r1+r2+1]
pavgb m0, m1
pavgb m2, m3
%else
PAVGB m0, [r1+1]
PAVGB m2, [r1+r2+1]
PAVGB m0, [r1+1], m3, m5
PAVGB m2, [r1+r2+1], m4, m5
%endif
add r0, r4
add r1, r4
%if notcpuflag(mmxext)
PAVGB_MMX [r0], m0, m3, m5
PAVGB_MMX [r0+r2], m2, m4, m5
%elif cpuflag(sse2)
pavgb m0, [r0]
pavgb m2, [r0+r2]
%else
PAVGB m0, [r0]
PAVGB m2, [r0+r2]
%endif
PAVGB m0, [r0], m3, m5
PAVGB m2, [r0+r2], m4, m5
mova [r0], m0
mova [r0+r2], m2
add r0, r4

View File

@ -340,11 +340,19 @@
%endif
%endmacro
%macro PAVGB 2
%macro PAVGB 2-4
%if cpuflag(mmxext)
pavgb %1, %2
%elif cpuflag(3dnow)
pavgusb %1, %2
%elif cpuflag(mmx)
movu %3, %2
por %3, %1
pxor %1, %2
pand %1, %4
psrlq %1, 1
psubb %3, %1
SWAP %1, %3
%endif
%endmacro