You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-10-06 05:47:18 +02:00
avcodec/x86/hpeldsp: Add SSE2 avg_no_rnd size 16 versions
These currently only exist as MMX versions. The added functions occupy 320B here. So far, they are only for the x2 and y2 (i.e. right and down, not down-right) directions. Reviewed-by: Lynne <dev@lynne.ee> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
@@ -125,12 +125,12 @@ cglobal put_no_rnd_pixels8_x2, 4,5
|
||||
RET
|
||||
|
||||
|
||||
%macro NO_RND_PIXELS_X2 0
|
||||
%macro NO_RND_PIXELS_X2 1
|
||||
%if cpuflag(sse2)
|
||||
cglobal put_no_rnd_pixels16_x2, 4,5,5
|
||||
cglobal %1_no_rnd_pixels16_x2, 4,5,5
|
||||
%else
|
||||
; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
|
||||
cglobal put_no_rnd_pixels8_x2_exact, 4,5
|
||||
cglobal %1_no_rnd_pixels8_x2_exact, 4,5
|
||||
%endif
|
||||
lea r4, [r2*3]
|
||||
pcmpeqb m4, m4
|
||||
@@ -147,6 +147,10 @@ cglobal put_no_rnd_pixels8_x2_exact, 4,5
|
||||
PAVGB m2, m3
|
||||
pxor m0, m4
|
||||
pxor m2, m4
|
||||
%ifidn %1, avg
|
||||
pavgb m0, [r0]
|
||||
pavgb m2, [r0+r2]
|
||||
%endif
|
||||
mova [r0], m0
|
||||
mova [r0+r2], m2
|
||||
movu m0, [r1+r2*2]
|
||||
@@ -161,6 +165,10 @@ cglobal put_no_rnd_pixels8_x2_exact, 4,5
|
||||
PAVGB m2, m3
|
||||
pxor m0, m4
|
||||
pxor m2, m4
|
||||
%ifidn %1, avg
|
||||
pavgb m0, [r0+r2*2]
|
||||
pavgb m2, [r0+r4]
|
||||
%endif
|
||||
mova [r0+r2*2], m0
|
||||
mova [r0+r4], m2
|
||||
lea r1, [r1+r2*4]
|
||||
@@ -171,9 +179,10 @@ cglobal put_no_rnd_pixels8_x2_exact, 4,5
|
||||
%endmacro
|
||||
|
||||
INIT_MMX mmxext
|
||||
NO_RND_PIXELS_X2
|
||||
NO_RND_PIXELS_X2 put
|
||||
INIT_XMM sse2
|
||||
NO_RND_PIXELS_X2
|
||||
NO_RND_PIXELS_X2 avg
|
||||
NO_RND_PIXELS_X2 put
|
||||
|
||||
; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
|
||||
%macro PUT_PIXELS8_Y2 0
|
||||
@@ -245,12 +254,12 @@ cglobal put_no_rnd_pixels8_y2, 4,5
|
||||
RET
|
||||
|
||||
|
||||
%macro NO_RND_PIXELS_Y2 0
|
||||
%macro NO_RND_PIXELS_Y2 1
|
||||
%if cpuflag(sse2)
|
||||
cglobal put_no_rnd_pixels16_y2, 4,5,4
|
||||
cglobal %1_no_rnd_pixels16_y2, 4,5,4
|
||||
%else
|
||||
; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
|
||||
cglobal put_no_rnd_pixels8_y2_exact, 4,5
|
||||
cglobal %1_no_rnd_pixels8_y2_exact, 4,5
|
||||
%endif
|
||||
lea r4, [r2*3]
|
||||
movu m0, [r1]
|
||||
@@ -266,6 +275,10 @@ cglobal put_no_rnd_pixels8_y2_exact, 4,5
|
||||
PAVGB m1, m2
|
||||
pxor m0, m3
|
||||
pxor m1, m3
|
||||
%ifidn %1, avg
|
||||
pavgb m0, [r0]
|
||||
pavgb m1, [r0+r2]
|
||||
%endif
|
||||
mova [r0], m0
|
||||
mova [r0+r2], m1
|
||||
movu m1, [r1+r2*2]
|
||||
@@ -276,6 +289,10 @@ cglobal put_no_rnd_pixels8_y2_exact, 4,5
|
||||
PAVGB m1, m0
|
||||
pxor m2, m3
|
||||
pxor m1, m3
|
||||
%ifidn %1, avg
|
||||
pavgb m2,[r0+r2*2]
|
||||
pavgb m1,[r0+r4]
|
||||
%endif
|
||||
mova [r0+r2*2], m2
|
||||
mova [r0+r4], m1
|
||||
lea r1, [r1+r2*4]
|
||||
@@ -286,9 +303,10 @@ cglobal put_no_rnd_pixels8_y2_exact, 4,5
|
||||
%endmacro
|
||||
|
||||
INIT_MMX mmxext
|
||||
NO_RND_PIXELS_Y2
|
||||
NO_RND_PIXELS_Y2 put
|
||||
INIT_XMM sse2
|
||||
NO_RND_PIXELS_Y2
|
||||
NO_RND_PIXELS_Y2 avg
|
||||
NO_RND_PIXELS_Y2 put
|
||||
|
||||
; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
|
||||
%macro AVG_PIXELS8_X2 0
|
||||
|
@@ -51,6 +51,8 @@ void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_put_no_rnd_pixels16_x2_sse2(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_avg_no_rnd_pixels16_x2_sse2(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
|
||||
@@ -60,6 +62,8 @@ void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_put_no_rnd_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_avg_no_rnd_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
|
||||
@@ -385,7 +389,10 @@ static void hpeldsp_init_sse2(HpelDSPContext *c, int flags)
|
||||
c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_sse2;
|
||||
c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_sse2;
|
||||
c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_sse2;
|
||||
|
||||
c->avg_no_rnd_pixels_tab[0] = ff_avg_pixels16_sse2;
|
||||
c->avg_no_rnd_pixels_tab[1] = ff_avg_no_rnd_pixels16_x2_sse2;
|
||||
c->avg_no_rnd_pixels_tab[2] = ff_avg_no_rnd_pixels16_y2_sse2;
|
||||
#endif /* HAVE_SSE2_EXTERNAL */
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user