mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-26 19:01:44 +02:00
avcodec/x86/hpeldsp: Remove obsolete MMX/3dnow functions
x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
parent
aa8a201992
commit
a51279bbde
@ -91,7 +91,6 @@ cglobal %1_pixels%2, 4,5,4
|
||||
INIT_MMX mmx
|
||||
OP_PIXELS put, 4
|
||||
OP_PIXELS put, 8
|
||||
OP_PIXELS avg, 8
|
||||
OP_PIXELS put, 16
|
||||
OP_PIXELS avg, 16
|
||||
|
||||
|
@ -83,8 +83,6 @@ cglobal put_pixels8_x2, 4,5
|
||||
|
||||
INIT_MMX mmxext
|
||||
PUT_PIXELS8_X2
|
||||
INIT_MMX 3dnow
|
||||
PUT_PIXELS8_X2
|
||||
|
||||
|
||||
; void ff_put_pixels16_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
|
||||
@ -127,15 +125,13 @@ cglobal put_pixels16_x2, 4,5
|
||||
|
||||
INIT_MMX mmxext
|
||||
PUT_PIXELS_16
|
||||
INIT_MMX 3dnow
|
||||
PUT_PIXELS_16
|
||||
; The 8_X2 macro can easily be used here
|
||||
INIT_XMM sse2
|
||||
PUT_PIXELS8_X2
|
||||
|
||||
|
||||
; void ff_put_no_rnd_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
|
||||
%macro PUT_NO_RND_PIXELS8_X2 0
|
||||
INIT_MMX mmxext
|
||||
cglobal put_no_rnd_pixels8_x2, 4,5
|
||||
mova m6, [pb_1]
|
||||
lea r4, [r2*2]
|
||||
@ -167,12 +163,6 @@ cglobal put_no_rnd_pixels8_x2, 4,5
|
||||
sub r3d, 4
|
||||
jne .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX mmxext
|
||||
PUT_NO_RND_PIXELS8_X2
|
||||
INIT_MMX 3dnow
|
||||
PUT_NO_RND_PIXELS8_X2
|
||||
|
||||
|
||||
; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
|
||||
@ -209,15 +199,13 @@ cglobal put_pixels8_y2, 4,5
|
||||
|
||||
INIT_MMX mmxext
|
||||
PUT_PIXELS8_Y2
|
||||
INIT_MMX 3dnow
|
||||
PUT_PIXELS8_Y2
|
||||
; actually, put_pixels16_y2_sse2
|
||||
INIT_XMM sse2
|
||||
PUT_PIXELS8_Y2
|
||||
|
||||
|
||||
; void ff_put_no_rnd_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
|
||||
%macro PUT_NO_RND_PIXELS8_Y2 0
|
||||
INIT_MMX mmxext
|
||||
cglobal put_no_rnd_pixels8_y2, 4,5
|
||||
mova m6, [pb_1]
|
||||
lea r4, [r2+r2]
|
||||
@ -245,42 +233,6 @@ cglobal put_no_rnd_pixels8_y2, 4,5
|
||||
sub r3d, 4
|
||||
jne .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX mmxext
|
||||
PUT_NO_RND_PIXELS8_Y2
|
||||
INIT_MMX 3dnow
|
||||
PUT_NO_RND_PIXELS8_Y2
|
||||
|
||||
|
||||
; void ff_avg_pixels8(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
|
||||
%macro AVG_PIXELS8 0
|
||||
cglobal avg_pixels8, 4,5
|
||||
lea r4, [r2*2]
|
||||
.loop:
|
||||
mova m0, [r0]
|
||||
mova m1, [r0+r2]
|
||||
PAVGB m0, [r1]
|
||||
PAVGB m1, [r1+r2]
|
||||
mova [r0], m0
|
||||
mova [r0+r2], m1
|
||||
add r1, r4
|
||||
add r0, r4
|
||||
mova m0, [r0]
|
||||
mova m1, [r0+r2]
|
||||
PAVGB m0, [r1]
|
||||
PAVGB m1, [r1+r2]
|
||||
add r1, r4
|
||||
mova [r0], m0
|
||||
mova [r0+r2], m1
|
||||
add r0, r4
|
||||
sub r3d, 4
|
||||
jne .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX 3dnow
|
||||
AVG_PIXELS8
|
||||
|
||||
|
||||
; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
|
||||
@ -291,10 +243,6 @@ cglobal avg_pixels16_x2, 4,5,4
|
||||
cglobal avg_pixels8_x2, 4,5
|
||||
%endif
|
||||
lea r4, [r2*2]
|
||||
%if notcpuflag(mmxext)
|
||||
pcmpeqd m5, m5
|
||||
paddb m5, m5
|
||||
%endif
|
||||
.loop:
|
||||
movu m0, [r1]
|
||||
movu m2, [r1+r2]
|
||||
@ -335,12 +283,8 @@ cglobal avg_pixels8_x2, 4,5
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX mmx
|
||||
AVG_PIXELS8_X2
|
||||
INIT_MMX mmxext
|
||||
AVG_PIXELS8_X2
|
||||
INIT_MMX 3dnow
|
||||
AVG_PIXELS8_X2
|
||||
; actually avg_pixels16_x2
|
||||
INIT_XMM sse2
|
||||
AVG_PIXELS8_X2
|
||||
@ -384,8 +328,6 @@ cglobal avg_pixels8_y2, 4,5
|
||||
|
||||
INIT_MMX mmxext
|
||||
AVG_PIXELS8_Y2
|
||||
INIT_MMX 3dnow
|
||||
AVG_PIXELS8_Y2
|
||||
; actually avg_pixels16_y2
|
||||
INIT_XMM sse2
|
||||
AVG_PIXELS8_Y2
|
||||
@ -394,7 +336,7 @@ AVG_PIXELS8_Y2
|
||||
; void ff_avg_pixels8_xy2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
|
||||
; Note this is not correctly rounded, and is therefore used for
|
||||
; not-bitexact output
|
||||
%macro AVG_APPROX_PIXELS8_XY2 0
|
||||
INIT_MMX mmxext
|
||||
cglobal avg_approx_pixels8_xy2, 4,5
|
||||
mova m6, [pb_1]
|
||||
lea r4, [r2*2]
|
||||
@ -429,12 +371,6 @@ cglobal avg_approx_pixels8_xy2, 4,5
|
||||
sub r3d, 4
|
||||
jne .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX mmxext
|
||||
AVG_APPROX_PIXELS8_XY2
|
||||
INIT_MMX 3dnow
|
||||
AVG_APPROX_PIXELS8_XY2
|
||||
|
||||
|
||||
; void ff_avg_pixels16_xy2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
|
||||
@ -517,8 +453,6 @@ cglobal %1_pixels8_xy2, 4,5
|
||||
|
||||
INIT_MMX mmxext
|
||||
SET_PIXELS_XY2 avg
|
||||
INIT_MMX 3dnow
|
||||
SET_PIXELS_XY2 avg
|
||||
INIT_XMM sse2
|
||||
SET_PIXELS_XY2 put
|
||||
SET_PIXELS_XY2 avg
|
||||
|
@ -45,8 +45,6 @@ void ff_put_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_put_pixels8_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_put_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_put_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_put_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
|
||||
|
@ -35,12 +35,8 @@
|
||||
|
||||
void ff_put_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_put_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_put_pixels16_x2_mmxext(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_put_pixels16_x2_3dnow(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_put_pixels16_x2_sse2(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_avg_pixels16_x2_sse2(uint8_t *block, const uint8_t *pixels,
|
||||
@ -51,42 +47,21 @@ void ff_avg_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_put_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_put_no_rnd_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_avg_pixels8_3dnow(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_avg_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_avg_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_avg_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_avg_approx_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
void ff_avg_approx_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels,
|
||||
ptrdiff_t line_size, int h);
|
||||
|
||||
#define avg_pixels8_mmx ff_avg_pixels8_mmx
|
||||
#define avg_pixels8_x2_mmx ff_avg_pixels8_x2_mmx
|
||||
#define avg_pixels16_mmx ff_avg_pixels16_mmx
|
||||
#define avg_pixels8_xy2_mmx ff_avg_pixels8_xy2_mmx
|
||||
#define avg_pixels16_xy2_mmx ff_avg_pixels16_xy2_mmx
|
||||
#define put_pixels8_mmx ff_put_pixels8_mmx
|
||||
#define put_pixels16_mmx ff_put_pixels16_mmx
|
||||
#define put_pixels8_xy2_mmx ff_put_pixels8_xy2_mmx
|
||||
#define put_pixels16_xy2_mmx ff_put_pixels16_xy2_mmx
|
||||
#define avg_no_rnd_pixels16_mmx ff_avg_pixels16_mmx
|
||||
#define put_no_rnd_pixels8_mmx ff_put_pixels8_mmx
|
||||
#define put_no_rnd_pixels16_mmx ff_put_pixels16_mmx
|
||||
@ -121,30 +96,19 @@ CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_mmx, put_no_rnd_pixels8_xy2_mmx, 8)
|
||||
/***********************************/
|
||||
/* MMX rounding */
|
||||
|
||||
#define DEF(x, y) x ## _ ## y ## _mmx
|
||||
#define SET_RND MOVQ_WTWO
|
||||
#define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX(a, b, c, d, e, f)
|
||||
#define PAVGB(a, b, c, e) PAVGB_MMX(a, b, c, e)
|
||||
|
||||
#include "hpeldsp_rnd_template.c"
|
||||
|
||||
#undef DEF
|
||||
#define DEF(x, y) ff_ ## x ## _ ## y ## _mmx
|
||||
#define STATIC
|
||||
#define NO_AVG
|
||||
|
||||
#include "rnd_template.c"
|
||||
|
||||
#undef NO_AVG
|
||||
#undef DEF
|
||||
#undef SET_RND
|
||||
#undef PAVGBP
|
||||
#undef PAVGB
|
||||
|
||||
#if HAVE_MMX
|
||||
CALL_2X_PIXELS(avg_pixels16_y2_mmx, avg_pixels8_y2_mmx, 8)
|
||||
CALL_2X_PIXELS(put_pixels16_y2_mmx, put_pixels8_y2_mmx, 8)
|
||||
|
||||
CALL_2X_PIXELS_EXPORT(ff_avg_pixels16_xy2_mmx, ff_avg_pixels8_xy2_mmx, 8)
|
||||
CALL_2X_PIXELS_EXPORT(ff_put_pixels16_xy2_mmx, ff_put_pixels8_xy2_mmx, 8)
|
||||
CALL_2X_PIXELS(put_pixels16_xy2_mmx, ff_put_pixels8_xy2_mmx, 8)
|
||||
#endif
|
||||
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
@ -162,46 +126,42 @@ CALL_2X_PIXELS_EXPORT(ff_put_pixels16_xy2_mmx, ff_put_pixels8_xy2_mmx, 8)
|
||||
CALL_2X_PIXELS(avg_pixels16_xy2 ## CPUEXT, ff_avg_pixels8_xy2 ## CPUEXT, 8) \
|
||||
CALL_2X_PIXELS(avg_approx_pixels16_xy2## CPUEXT, ff_avg_approx_pixels8_xy2## CPUEXT, 8)
|
||||
|
||||
HPELDSP_AVG_PIXELS16(_3dnow)
|
||||
HPELDSP_AVG_PIXELS16(_mmxext)
|
||||
|
||||
#endif /* HAVE_X86ASM */
|
||||
|
||||
#define SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU) \
|
||||
if (HAVE_MMX_EXTERNAL) \
|
||||
c->PFX ## _pixels_tab IDX [0] = PFX ## _pixels ## SIZE ## _ ## CPU;
|
||||
c->PFX ## _pixels_tab IDX [0] = PFX ## _pixels ## SIZE ## _ ## CPU
|
||||
|
||||
#if HAVE_MMX_INLINE
|
||||
#define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
|
||||
#define SET_HPEL_FUNCS03(PFX, IDX, SIZE, CPU) \
|
||||
do { \
|
||||
SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU) \
|
||||
c->PFX ## _pixels_tab IDX [1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \
|
||||
c->PFX ## _pixels_tab IDX [2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \
|
||||
SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU); \
|
||||
c->PFX ## _pixels_tab IDX [3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU; \
|
||||
} while (0)
|
||||
#define SET_HPEL_FUNCS12(PFX, IDX, SIZE, CPU) \
|
||||
do { \
|
||||
c->PFX ## _pixels_tab IDX [1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \
|
||||
c->PFX ## _pixels_tab IDX [2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \
|
||||
} while (0)
|
||||
#else
|
||||
#define SET_HPEL_FUNCS03(PFX, IDX, SIZE, CPU) SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU)
|
||||
#define SET_HPEL_FUNCS12(PFX, IDX, SIZE, CPU) ((void)0)
|
||||
#endif
|
||||
#define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
|
||||
do { \
|
||||
SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU) \
|
||||
SET_HPEL_FUNCS03(PFX, IDX, SIZE, CPU); \
|
||||
SET_HPEL_FUNCS12(PFX, IDX, SIZE, CPU); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
static void hpeldsp_init_mmx(HpelDSPContext *c, int flags)
|
||||
{
|
||||
SET_HPEL_FUNCS(put, [0], 16, mmx);
|
||||
SET_HPEL_FUNCS03(put, [0], 16, mmx);
|
||||
SET_HPEL_FUNCS(put_no_rnd, [0], 16, mmx);
|
||||
SET_HPEL_FUNCS(avg, [0], 16, mmx);
|
||||
SET_HPEL_FUNCS(avg_no_rnd, , 16, mmx);
|
||||
SET_HPEL_FUNCS(put, [1], 8, mmx);
|
||||
SET_HPEL_FUNCS03(put, [1], 8, mmx);
|
||||
SET_HPEL_FUNCS(put_no_rnd, [1], 8, mmx);
|
||||
if (HAVE_MMX_EXTERNAL) {
|
||||
c->avg_pixels_tab[1][0] = ff_avg_pixels8_mmx;
|
||||
c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_mmx;
|
||||
}
|
||||
#if HAVE_MMX_INLINE
|
||||
c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx;
|
||||
c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmx;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags)
|
||||
@ -235,37 +195,6 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags)
|
||||
#endif /* HAVE_MMXEXT_EXTERNAL */
|
||||
}
|
||||
|
||||
static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags)
|
||||
{
|
||||
#if HAVE_AMD3DNOW_EXTERNAL
|
||||
c->put_pixels_tab[0][1] = ff_put_pixels16_x2_3dnow;
|
||||
c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
|
||||
|
||||
c->avg_pixels_tab[0][0] = avg_pixels16_3dnow;
|
||||
c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
|
||||
c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
|
||||
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
|
||||
|
||||
c->put_pixels_tab[1][1] = ff_put_pixels8_x2_3dnow;
|
||||
c->put_pixels_tab[1][2] = ff_put_pixels8_y2_3dnow;
|
||||
|
||||
c->avg_pixels_tab[1][0] = ff_avg_pixels8_3dnow;
|
||||
c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_3dnow;
|
||||
c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_3dnow;
|
||||
c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_3dnow;
|
||||
|
||||
if (!(flags & AV_CODEC_FLAG_BITEXACT)){
|
||||
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
|
||||
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow;
|
||||
c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_3dnow;
|
||||
c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_3dnow;
|
||||
|
||||
c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_3dnow;
|
||||
c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_3dnow;
|
||||
}
|
||||
#endif /* HAVE_AMD3DNOW_EXTERNAL */
|
||||
}
|
||||
|
||||
static void hpeldsp_init_sse2_fast(HpelDSPContext *c, int flags)
|
||||
{
|
||||
#if HAVE_SSE2_EXTERNAL
|
||||
@ -298,9 +227,6 @@ av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
|
||||
if (INLINE_MMX(cpu_flags))
|
||||
hpeldsp_init_mmx(c, flags);
|
||||
|
||||
if (EXTERNAL_AMD3DNOW(cpu_flags))
|
||||
hpeldsp_init_3dnow(c, flags);
|
||||
|
||||
if (EXTERNAL_MMXEXT(cpu_flags))
|
||||
hpeldsp_init_mmxext(c, flags);
|
||||
|
||||
|
@ -97,6 +97,7 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
|
||||
:FF_REG_a, "memory");
|
||||
}
|
||||
|
||||
#ifndef NO_AVG
|
||||
// avg_pixels
|
||||
// this routine is 'slightly' suboptimal but mostly unused
|
||||
av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels,
|
||||
@ -173,3 +174,4 @@ av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
|
||||
:"D"(block), "r"((x86_reg)line_size)
|
||||
:FF_REG_a, "memory");
|
||||
}
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user