You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-10-06 05:47:18 +02:00
avcodec/x86/qpeldsp_init: Use SSE2 versions where possible
The mc00 versions (i.e. the qdsp functions with no subpixel interpolation) are just wrappers around their fpel versions. There are SSE2 versions of these, yet the qpel code only uses the MMX(EXT) versions. This commit changes this and also removes the MMX(EXT) versions. This also allowed to remove ff_avg_pixels16_mmxext, ff_put_pixels16_mmx. Reviewed-by: Lynne <dev@lynne.ee> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
@@ -67,12 +67,10 @@ cglobal %1_pixels%2, 4,5,4
|
|||||||
|
|
||||||
INIT_MMX mmx
|
INIT_MMX mmx
|
||||||
OP_PIXELS put, 8
|
OP_PIXELS put, 8
|
||||||
OP_PIXELS put, 16
|
|
||||||
|
|
||||||
INIT_MMX mmxext
|
INIT_MMX mmxext
|
||||||
OP_PIXELS avg, 4
|
OP_PIXELS avg, 4
|
||||||
OP_PIXELS avg, 8
|
OP_PIXELS avg, 8
|
||||||
OP_PIXELS avg, 16
|
|
||||||
|
|
||||||
INIT_XMM sse2
|
INIT_XMM sse2
|
||||||
OP_PIXELS put, 16
|
OP_PIXELS put, 16
|
||||||
|
@@ -26,14 +26,10 @@ void ff_avg_pixels4_mmxext(uint8_t *block, const uint8_t *pixels,
|
|||||||
ptrdiff_t line_size, int h);
|
ptrdiff_t line_size, int h);
|
||||||
void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
|
void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
|
||||||
ptrdiff_t line_size, int h);
|
ptrdiff_t line_size, int h);
|
||||||
void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
|
|
||||||
ptrdiff_t line_size, int h);
|
|
||||||
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
|
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
|
||||||
ptrdiff_t line_size, int h);
|
ptrdiff_t line_size, int h);
|
||||||
void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
|
void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
|
||||||
ptrdiff_t line_size, int h);
|
ptrdiff_t line_size, int h);
|
||||||
void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
|
|
||||||
ptrdiff_t line_size, int h);
|
|
||||||
void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
|
void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
|
||||||
ptrdiff_t line_size, int h);
|
ptrdiff_t line_size, int h);
|
||||||
|
|
||||||
|
@@ -79,22 +79,10 @@ void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
|
|||||||
void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst,
|
void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst,
|
||||||
const uint8_t *src,
|
const uint8_t *src,
|
||||||
int dstStride, int srcStride);
|
int dstStride, int srcStride);
|
||||||
#define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmx
|
|
||||||
#define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmx
|
|
||||||
|
|
||||||
#if HAVE_X86ASM
|
#if HAVE_X86ASM
|
||||||
|
|
||||||
#define ff_put_pixels16_mmxext ff_put_pixels16_mmx
|
|
||||||
#define ff_put_pixels8_mmxext ff_put_pixels8_mmx
|
|
||||||
|
|
||||||
#define QPEL_OP(OPNAME, RND, MMX) \
|
#define QPEL_OP(OPNAME, RND, MMX) \
|
||||||
static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst, \
|
|
||||||
const uint8_t *src, \
|
|
||||||
ptrdiff_t stride) \
|
|
||||||
{ \
|
|
||||||
ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8); \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, \
|
static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, \
|
||||||
const uint8_t *src, \
|
const uint8_t *src, \
|
||||||
ptrdiff_t stride) \
|
ptrdiff_t stride) \
|
||||||
@@ -291,13 +279,6 @@ static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, \
|
|||||||
stride, 8); \
|
stride, 8); \
|
||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
static void OPNAME ## qpel16_mc00_ ## MMX(uint8_t *dst, \
|
|
||||||
const uint8_t *src, \
|
|
||||||
ptrdiff_t stride) \
|
|
||||||
{ \
|
|
||||||
ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16); \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, \
|
static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, \
|
||||||
const uint8_t *src, \
|
const uint8_t *src, \
|
||||||
ptrdiff_t stride) \
|
ptrdiff_t stride) \
|
||||||
@@ -504,11 +485,23 @@ QPEL_OP(put_, _, mmxext)
|
|||||||
QPEL_OP(avg_, _, mmxext)
|
QPEL_OP(avg_, _, mmxext)
|
||||||
QPEL_OP(put_no_rnd_, _no_rnd_, mmxext)
|
QPEL_OP(put_no_rnd_, _no_rnd_, mmxext)
|
||||||
|
|
||||||
|
#define MC00(OPNAME, SIZE, EXT) \
|
||||||
|
static void OPNAME ## _qpel ## SIZE ## _mc00_ ## EXT(uint8_t *dst, \
|
||||||
|
const uint8_t *src,\
|
||||||
|
ptrdiff_t stride) \
|
||||||
|
{ \
|
||||||
|
ff_ ## OPNAME ## _pixels ## SIZE ##_ ## EXT(dst, src, stride, SIZE);\
|
||||||
|
}
|
||||||
|
|
||||||
|
MC00(put, 8, mmx)
|
||||||
|
MC00(avg, 8, mmxext)
|
||||||
|
MC00(put, 16, sse2)
|
||||||
|
MC00(avg, 16, sse2)
|
||||||
|
|
||||||
#endif /* HAVE_X86ASM */
|
#endif /* HAVE_X86ASM */
|
||||||
|
|
||||||
#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
|
#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
|
||||||
do { \
|
do { \
|
||||||
c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
|
|
||||||
c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
|
c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
|
||||||
c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
|
c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
|
||||||
c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
|
c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
|
||||||
@@ -533,12 +526,20 @@ av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
|
|||||||
if (X86_MMXEXT(cpu_flags)) {
|
if (X86_MMXEXT(cpu_flags)) {
|
||||||
#if HAVE_MMXEXT_EXTERNAL
|
#if HAVE_MMXEXT_EXTERNAL
|
||||||
SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, );
|
SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, );
|
||||||
|
c->avg_qpel_pixels_tab[1][0] = avg_qpel8_mc00_mmxext;
|
||||||
SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, );
|
SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, );
|
||||||
|
|
||||||
SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, );
|
SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, );
|
||||||
|
c->put_no_rnd_qpel_pixels_tab[1][0] =
|
||||||
|
c->put_qpel_pixels_tab[1][0] = put_qpel8_mc00_mmx;
|
||||||
SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, );
|
SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, );
|
||||||
SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
|
SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
|
||||||
SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, );
|
SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, );
|
||||||
#endif /* HAVE_MMXEXT_EXTERNAL */
|
#endif /* HAVE_MMXEXT_EXTERNAL */
|
||||||
}
|
}
|
||||||
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
|
c->put_no_rnd_qpel_pixels_tab[0][0] =
|
||||||
|
c->put_qpel_pixels_tab[0][0] = put_qpel16_mc00_sse2;
|
||||||
|
c->avg_qpel_pixels_tab[0][0] = avg_qpel16_mc00_sse2;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user