1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-10-06 05:47:18 +02:00

avcodec/x86/hpeldsp: Add SSE2 avg_no_rnd size 16 versions

These currently only exist as MMX versions.
The added functions occupy 320B here. So far, they are only for
the x2 and y2 (i.e. right and down, not down-right) directions.

Reviewed-by: Lynne <dev@lynne.ee>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
Andreas Rheinhardt
2025-09-23 04:15:22 +02:00
parent 1e677e6964
commit 30c4007c65
2 changed files with 35 additions and 10 deletions

View File

@@ -125,12 +125,12 @@ cglobal put_no_rnd_pixels8_x2, 4,5
RET
%macro NO_RND_PIXELS_X2 0
%macro NO_RND_PIXELS_X2 1
%if cpuflag(sse2)
cglobal put_no_rnd_pixels16_x2, 4,5,5
cglobal %1_no_rnd_pixels16_x2, 4,5,5
%else
; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
cglobal put_no_rnd_pixels8_x2_exact, 4,5
cglobal %1_no_rnd_pixels8_x2_exact, 4,5
%endif
lea r4, [r2*3]
pcmpeqb m4, m4
@@ -147,6 +147,10 @@ cglobal put_no_rnd_pixels8_x2_exact, 4,5
PAVGB m2, m3
pxor m0, m4
pxor m2, m4
%ifidn %1, avg
pavgb m0, [r0]
pavgb m2, [r0+r2]
%endif
mova [r0], m0
mova [r0+r2], m2
movu m0, [r1+r2*2]
@@ -161,6 +165,10 @@ cglobal put_no_rnd_pixels8_x2_exact, 4,5
PAVGB m2, m3
pxor m0, m4
pxor m2, m4
%ifidn %1, avg
pavgb m0, [r0+r2*2]
pavgb m2, [r0+r4]
%endif
mova [r0+r2*2], m0
mova [r0+r4], m2
lea r1, [r1+r2*4]
@@ -171,9 +179,10 @@ cglobal put_no_rnd_pixels8_x2_exact, 4,5
%endmacro
INIT_MMX mmxext
NO_RND_PIXELS_X2
NO_RND_PIXELS_X2 put
INIT_XMM sse2
NO_RND_PIXELS_X2
NO_RND_PIXELS_X2 avg
NO_RND_PIXELS_X2 put
; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro PUT_PIXELS8_Y2 0
@@ -245,12 +254,12 @@ cglobal put_no_rnd_pixels8_y2, 4,5
RET
%macro NO_RND_PIXELS_Y2 0
%macro NO_RND_PIXELS_Y2 1
%if cpuflag(sse2)
cglobal put_no_rnd_pixels16_y2, 4,5,4
cglobal %1_no_rnd_pixels16_y2, 4,5,4
%else
; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
cglobal put_no_rnd_pixels8_y2_exact, 4,5
cglobal %1_no_rnd_pixels8_y2_exact, 4,5
%endif
lea r4, [r2*3]
movu m0, [r1]
@@ -266,6 +275,10 @@ cglobal put_no_rnd_pixels8_y2_exact, 4,5
PAVGB m1, m2
pxor m0, m3
pxor m1, m3
%ifidn %1, avg
pavgb m0, [r0]
pavgb m1, [r0+r2]
%endif
mova [r0], m0
mova [r0+r2], m1
movu m1, [r1+r2*2]
@@ -276,6 +289,10 @@ cglobal put_no_rnd_pixels8_y2_exact, 4,5
PAVGB m1, m0
pxor m2, m3
pxor m1, m3
%ifidn %1, avg
pavgb m2,[r0+r2*2]
pavgb m1,[r0+r4]
%endif
mova [r0+r2*2], m2
mova [r0+r4], m1
lea r1, [r1+r2*4]
@@ -286,9 +303,10 @@ cglobal put_no_rnd_pixels8_y2_exact, 4,5
%endmacro
INIT_MMX mmxext
NO_RND_PIXELS_Y2
NO_RND_PIXELS_Y2 put
INIT_XMM sse2
NO_RND_PIXELS_Y2
NO_RND_PIXELS_Y2 avg
NO_RND_PIXELS_Y2 put
; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro AVG_PIXELS8_X2 0

View File

@@ -51,6 +51,8 @@ void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels16_x2_sse2(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_no_rnd_pixels16_x2_sse2(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
@@ -60,6 +62,8 @@ void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_no_rnd_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
@@ -385,7 +389,10 @@ static void hpeldsp_init_sse2(HpelDSPContext *c, int flags)
c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_sse2;
c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_sse2;
c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_sse2;
c->avg_no_rnd_pixels_tab[0] = ff_avg_pixels16_sse2;
c->avg_no_rnd_pixels_tab[1] = ff_avg_no_rnd_pixels16_x2_sse2;
c->avg_no_rnd_pixels_tab[2] = ff_avg_no_rnd_pixels16_y2_sse2;
#endif /* HAVE_SSE2_EXTERNAL */
}