You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-15 14:13:16 +02:00
x86: hpeldsp: avg_pixels_xy2 for mmx2&3dnow
This is a port of the inline assembly of the mmx version to use the pavg(us|)b instruction. 8 16 mmx 1498 4355 mmx2 1242 3509 Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
committed by
Michael Niedermayer
parent
17ac998055
commit
c081ca851c
@@ -29,6 +29,7 @@
|
|||||||
|
|
||||||
SECTION_RODATA
|
SECTION_RODATA
|
||||||
cextern pb_1
|
cextern pb_1
|
||||||
|
cextern pw_2
|
||||||
|
|
||||||
SECTION_TEXT
|
SECTION_TEXT
|
||||||
|
|
||||||
@@ -494,3 +495,79 @@ INIT_MMX mmxext
|
|||||||
AVG_APPROX_PIXELS8_XY2
|
AVG_APPROX_PIXELS8_XY2
|
||||||
INIT_MMX 3dnow
|
INIT_MMX 3dnow
|
||||||
AVG_APPROX_PIXELS8_XY2
|
AVG_APPROX_PIXELS8_XY2
|
||||||
|
|
||||||
|
|
||||||
|
; void ff_avg_pixels16_xy2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
|
||||||
|
%macro AVG_PIXELS_XY2 0
|
||||||
|
%if cpuflag(sse2)
|
||||||
|
cglobal avg_pixels16_xy2, 4,5,8
|
||||||
|
%else
|
||||||
|
cglobal avg_pixels8_xy2, 4,5
|
||||||
|
%endif
|
||||||
|
pxor m7, m7
|
||||||
|
mova m6, [pw_2]
|
||||||
|
movu m0, [r1]
|
||||||
|
movu m4, [r1+1]
|
||||||
|
mova m1, m0
|
||||||
|
mova m5, m4
|
||||||
|
punpcklbw m0, m7
|
||||||
|
punpcklbw m4, m7
|
||||||
|
punpckhbw m1, m7
|
||||||
|
punpckhbw m5, m7
|
||||||
|
paddusw m4, m0
|
||||||
|
paddusw m5, m1
|
||||||
|
xor r4, r4
|
||||||
|
add r1, r2
|
||||||
|
.loop:
|
||||||
|
movu m0, [r1+r4]
|
||||||
|
movu m2, [r1+r4+1]
|
||||||
|
mova m1, m0
|
||||||
|
mova m3, m2
|
||||||
|
punpcklbw m0, m7
|
||||||
|
punpcklbw m2, m7
|
||||||
|
punpckhbw m1, m7
|
||||||
|
punpckhbw m3, m7
|
||||||
|
paddusw m0, m2
|
||||||
|
paddusw m1, m3
|
||||||
|
paddusw m4, m6
|
||||||
|
paddusw m5, m6
|
||||||
|
paddusw m4, m0
|
||||||
|
paddusw m5, m1
|
||||||
|
psrlw m4, 2
|
||||||
|
psrlw m5, 2
|
||||||
|
mova m3, [r0+r4]
|
||||||
|
packuswb m4, m5
|
||||||
|
PAVGB m4, m3
|
||||||
|
mova [r0+r4], m4
|
||||||
|
add r4, r2
|
||||||
|
|
||||||
|
movu m2, [r1+r4]
|
||||||
|
movu m4, [r1+r4+1]
|
||||||
|
mova m3, m2
|
||||||
|
mova m5, m4
|
||||||
|
punpcklbw m2, m7
|
||||||
|
punpcklbw m4, m7
|
||||||
|
punpckhbw m3, m7
|
||||||
|
punpckhbw m5, m7
|
||||||
|
paddusw m4, m2
|
||||||
|
paddusw m5, m3
|
||||||
|
paddusw m0, m6
|
||||||
|
paddusw m1, m6
|
||||||
|
paddusw m0, m4
|
||||||
|
paddusw m1, m5
|
||||||
|
psrlw m0, 2
|
||||||
|
psrlw m1, 2
|
||||||
|
mova m3, [r0+r4]
|
||||||
|
packuswb m0, m1
|
||||||
|
PAVGB m0, m3
|
||||||
|
mova [r0+r4], m0
|
||||||
|
add r4, r2
|
||||||
|
sub r3d, 2
|
||||||
|
jnz .loop
|
||||||
|
REP_RET
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
INIT_MMX mmxext
|
||||||
|
AVG_PIXELS_XY2
|
||||||
|
INIT_MMX 3dnow
|
||||||
|
AVG_PIXELS_XY2
|
||||||
|
@@ -74,6 +74,10 @@ void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
|
|||||||
ptrdiff_t line_size, int h);
|
ptrdiff_t line_size, int h);
|
||||||
void ff_avg_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
|
void ff_avg_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
|
||||||
ptrdiff_t line_size, int h);
|
ptrdiff_t line_size, int h);
|
||||||
|
void ff_avg_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels,
|
||||||
|
ptrdiff_t line_size, int h);
|
||||||
|
void ff_avg_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels,
|
||||||
|
ptrdiff_t line_size, int h);
|
||||||
void ff_avg_approx_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels,
|
void ff_avg_approx_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels,
|
||||||
ptrdiff_t line_size, int h);
|
ptrdiff_t line_size, int h);
|
||||||
void ff_avg_approx_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels,
|
void ff_avg_approx_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels,
|
||||||
@@ -156,6 +160,7 @@ CALL_2X_PIXELS_EXPORT(ff_put_pixels16_xy2_mmx, ff_put_pixels8_xy2_mmx, 8)
|
|||||||
CALL_2X_PIXELS(avg_pixels16 ## CPUEXT, ff_avg_pixels8 ## CPUEXT, 8) \
|
CALL_2X_PIXELS(avg_pixels16 ## CPUEXT, ff_avg_pixels8 ## CPUEXT, 8) \
|
||||||
CALL_2X_PIXELS(avg_pixels16_x2 ## CPUEXT, ff_avg_pixels8_x2 ## CPUEXT, 8) \
|
CALL_2X_PIXELS(avg_pixels16_x2 ## CPUEXT, ff_avg_pixels8_x2 ## CPUEXT, 8) \
|
||||||
CALL_2X_PIXELS(avg_pixels16_y2 ## CPUEXT, ff_avg_pixels8_y2 ## CPUEXT, 8) \
|
CALL_2X_PIXELS(avg_pixels16_y2 ## CPUEXT, ff_avg_pixels8_y2 ## CPUEXT, 8) \
|
||||||
|
CALL_2X_PIXELS(avg_pixels16_xy2 ## CPUEXT, ff_avg_pixels8_xy2 ## CPUEXT, 8) \
|
||||||
CALL_2X_PIXELS(avg_approx_pixels16_xy2## CPUEXT, ff_avg_approx_pixels8_xy2## CPUEXT, 8)
|
CALL_2X_PIXELS(avg_approx_pixels16_xy2## CPUEXT, ff_avg_approx_pixels8_xy2## CPUEXT, 8)
|
||||||
|
|
||||||
HPELDSP_AVG_PIXELS16(_3dnow)
|
HPELDSP_AVG_PIXELS16(_3dnow)
|
||||||
@@ -209,6 +214,7 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int cpu_flags)
|
|||||||
c->avg_pixels_tab[0][0] = avg_pixels16_mmxext;
|
c->avg_pixels_tab[0][0] = avg_pixels16_mmxext;
|
||||||
c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmxext;
|
c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmxext;
|
||||||
c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmxext;
|
c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmxext;
|
||||||
|
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmxext;
|
||||||
|
|
||||||
c->put_pixels_tab[1][1] = ff_put_pixels8_x2_mmxext;
|
c->put_pixels_tab[1][1] = ff_put_pixels8_x2_mmxext;
|
||||||
c->put_pixels_tab[1][2] = ff_put_pixels8_y2_mmxext;
|
c->put_pixels_tab[1][2] = ff_put_pixels8_y2_mmxext;
|
||||||
@@ -216,6 +222,7 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int cpu_flags)
|
|||||||
c->avg_pixels_tab[1][0] = ff_avg_pixels8_mmxext;
|
c->avg_pixels_tab[1][0] = ff_avg_pixels8_mmxext;
|
||||||
c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_mmxext;
|
c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_mmxext;
|
||||||
c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_mmxext;
|
c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_mmxext;
|
||||||
|
c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext;
|
||||||
|
|
||||||
if (!(flags & CODEC_FLAG_BITEXACT)) {
|
if (!(flags & CODEC_FLAG_BITEXACT)) {
|
||||||
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmxext;
|
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmxext;
|
||||||
@@ -243,6 +250,7 @@ static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int cpu_flags)
|
|||||||
c->avg_pixels_tab[0][0] = avg_pixels16_3dnow;
|
c->avg_pixels_tab[0][0] = avg_pixels16_3dnow;
|
||||||
c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
|
c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
|
||||||
c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
|
c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
|
||||||
|
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
|
||||||
|
|
||||||
c->put_pixels_tab[1][1] = ff_put_pixels8_x2_3dnow;
|
c->put_pixels_tab[1][1] = ff_put_pixels8_x2_3dnow;
|
||||||
c->put_pixels_tab[1][2] = ff_put_pixels8_y2_3dnow;
|
c->put_pixels_tab[1][2] = ff_put_pixels8_y2_3dnow;
|
||||||
@@ -250,6 +258,7 @@ static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int cpu_flags)
|
|||||||
c->avg_pixels_tab[1][0] = ff_avg_pixels8_3dnow;
|
c->avg_pixels_tab[1][0] = ff_avg_pixels8_3dnow;
|
||||||
c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_3dnow;
|
c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_3dnow;
|
||||||
c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_3dnow;
|
c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_3dnow;
|
||||||
|
c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_3dnow;
|
||||||
|
|
||||||
if (!(flags & CODEC_FLAG_BITEXACT)){
|
if (!(flags & CODEC_FLAG_BITEXACT)){
|
||||||
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
|
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
|
||||||
|
Reference in New Issue
Block a user