1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-10-06 05:47:18 +02:00

avcodec/x86/qpeldsp_init: Use SSE2 versions where possible

The mc00 versions (i.e. the qdsp functions with no subpixel
interpolation) are just wrappers around their fpel versions.
There are SSE2 versions of these, yet the qpel code only
uses the MMX(EXT) versions. This commit changes this and
also removes the MMX(EXT) versions.

This also allowed to remove ff_avg_pixels16_mmxext,
ff_put_pixels16_mmx.

Reviewed-by: Lynne <dev@lynne.ee>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
Andreas Rheinhardt
2025-09-21 02:08:03 +02:00
parent 1f9ef6a8dc
commit 2cf9e733c6
3 changed files with 21 additions and 26 deletions

View File

@@ -67,12 +67,10 @@ cglobal %1_pixels%2, 4,5,4
INIT_MMX mmx INIT_MMX mmx
OP_PIXELS put, 8 OP_PIXELS put, 8
OP_PIXELS put, 16
INIT_MMX mmxext INIT_MMX mmxext
OP_PIXELS avg, 4 OP_PIXELS avg, 4
OP_PIXELS avg, 8 OP_PIXELS avg, 8
OP_PIXELS avg, 16
INIT_XMM sse2 INIT_XMM sse2
OP_PIXELS put, 16 OP_PIXELS put, 16

View File

@@ -26,14 +26,10 @@ void ff_avg_pixels4_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);
void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h); ptrdiff_t line_size, int h);

View File

@@ -79,22 +79,10 @@ void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst,
const uint8_t *src, const uint8_t *src,
int dstStride, int srcStride); int dstStride, int srcStride);
#define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmx
#define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmx
#if HAVE_X86ASM #if HAVE_X86ASM
#define ff_put_pixels16_mmxext ff_put_pixels16_mmx
#define ff_put_pixels8_mmxext ff_put_pixels8_mmx
#define QPEL_OP(OPNAME, RND, MMX) \ #define QPEL_OP(OPNAME, RND, MMX) \
static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t stride) \
{ \
ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8); \
} \
\
static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, \ static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, \
const uint8_t *src, \ const uint8_t *src, \
ptrdiff_t stride) \ ptrdiff_t stride) \
@@ -291,13 +279,6 @@ static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, \
stride, 8); \ stride, 8); \
} \ } \
\ \
static void OPNAME ## qpel16_mc00_ ## MMX(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t stride) \
{ \
ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16); \
} \
\
static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, \ static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, \
const uint8_t *src, \ const uint8_t *src, \
ptrdiff_t stride) \ ptrdiff_t stride) \
@@ -504,11 +485,23 @@ QPEL_OP(put_, _, mmxext)
QPEL_OP(avg_, _, mmxext) QPEL_OP(avg_, _, mmxext)
QPEL_OP(put_no_rnd_, _no_rnd_, mmxext) QPEL_OP(put_no_rnd_, _no_rnd_, mmxext)
#define MC00(OPNAME, SIZE, EXT) \
static void OPNAME ## _qpel ## SIZE ## _mc00_ ## EXT(uint8_t *dst, \
const uint8_t *src,\
ptrdiff_t stride) \
{ \
ff_ ## OPNAME ## _pixels ## SIZE ##_ ## EXT(dst, src, stride, SIZE);\
}
MC00(put, 8, mmx)
MC00(avg, 8, mmxext)
MC00(put, 16, sse2)
MC00(avg, 16, sse2)
#endif /* HAVE_X86ASM */ #endif /* HAVE_X86ASM */
#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \ #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
do { \ do { \
c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \ c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \ c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \ c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
@@ -533,12 +526,20 @@ av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
if (X86_MMXEXT(cpu_flags)) { if (X86_MMXEXT(cpu_flags)) {
#if HAVE_MMXEXT_EXTERNAL #if HAVE_MMXEXT_EXTERNAL
SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, ); SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, );
c->avg_qpel_pixels_tab[1][0] = avg_qpel8_mc00_mmxext;
SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, ); SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, );
SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, ); SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, );
c->put_no_rnd_qpel_pixels_tab[1][0] =
c->put_qpel_pixels_tab[1][0] = put_qpel8_mc00_mmx;
SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, ); SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, );
SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, ); SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, ); SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, );
#endif /* HAVE_MMXEXT_EXTERNAL */ #endif /* HAVE_MMXEXT_EXTERNAL */
} }
if (EXTERNAL_SSE2(cpu_flags)) {
c->put_no_rnd_qpel_pixels_tab[0][0] =
c->put_qpel_pixels_tab[0][0] = put_qpel16_mc00_sse2;
c->avg_qpel_pixels_tab[0][0] = avg_qpel16_mc00_sse2;
}
} }