mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-28 20:53:54 +02:00
Merge commit 'ad01ba6ceaea7d71c4b9887795523438689b5a96'
* commit 'ad01ba6ceaea7d71c4b9887795523438689b5a96': x86: h264: Remove 3dnow QPEL code Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
commit
66c3bac2b9
@ -55,6 +55,7 @@ static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_
|
||||
:"%"REG_a, "memory");
|
||||
}
|
||||
|
||||
#ifndef SKIP_FOR_3DNOW
|
||||
static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
|
||||
{
|
||||
__asm__ volatile(
|
||||
@ -332,6 +333,7 @@ static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
|
||||
:"r"(src1Stride), "r"(dstStride)
|
||||
:"memory");*/
|
||||
}
|
||||
#endif /* SKIP_FOR_3DNOW */
|
||||
|
||||
static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||
{
|
||||
@ -373,6 +375,7 @@ static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line
|
||||
:"%"REG_a, "memory");
|
||||
}
|
||||
|
||||
#ifndef SKIP_FOR_3DNOW
|
||||
static void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
|
||||
{
|
||||
__asm__ volatile(
|
||||
@ -547,6 +550,7 @@ static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *sr
|
||||
:"r"(src1Stride), "r"(dstStride)
|
||||
:"memory");*/
|
||||
}
|
||||
#endif /* SKIP_FOR_3DNOW */
|
||||
|
||||
/* GL: this function does incorrect rounding if overflow */
|
||||
static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||
@ -872,6 +876,7 @@ static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line
|
||||
:"%"REG_a, "memory");
|
||||
}
|
||||
|
||||
#ifndef SKIP_FOR_3DNOW
|
||||
static void DEF(avg_pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||
{
|
||||
do {
|
||||
@ -896,6 +901,7 @@ static void DEF(avg_pixels4)(uint8_t *block, const uint8_t *pixels, int line_siz
|
||||
h -= 4;
|
||||
} while(h > 0);
|
||||
}
|
||||
#endif /* SKIP_FOR_3DNOW */
|
||||
|
||||
//FIXME the following could be optimized too ...
|
||||
static void DEF(put_no_rnd_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
|
||||
@ -968,6 +974,7 @@ static void DEF(OPNAME ## 2tap_qpel8_l3)(uint8_t *dst, uint8_t *src, int stride,
|
||||
);\
|
||||
}
|
||||
|
||||
#ifndef SKIP_FOR_3DNOW
|
||||
#define STORE_OP(a,b) PAVGB" "#a","#b" \n\t"
|
||||
QPEL_2TAP_L3(avg_)
|
||||
#undef STORE_OP
|
||||
@ -975,3 +982,4 @@ QPEL_2TAP_L3(avg_)
|
||||
QPEL_2TAP_L3(put_)
|
||||
#undef STORE_OP
|
||||
#undef QPEL_2TAP_L3
|
||||
#endif /* SKIP_FOR_3DNOW */
|
||||
|
@ -198,12 +198,14 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };
|
||||
#define DEF(x) x ## _3dnow
|
||||
#define PAVGB "pavgusb"
|
||||
#define OP_AVG PAVGB
|
||||
#define SKIP_FOR_3DNOW
|
||||
|
||||
#include "dsputil_avg_template.c"
|
||||
|
||||
#undef DEF
|
||||
#undef PAVGB
|
||||
#undef OP_AVG
|
||||
#undef SKIP_FOR_3DNOW
|
||||
|
||||
/***********************************/
|
||||
/* MMXEXT specific */
|
||||
@ -227,11 +229,6 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };
|
||||
#define put_pixels4_mmxext put_pixels4_mmx
|
||||
#define put_no_rnd_pixels16_mmxext put_no_rnd_pixels16_mmx
|
||||
#define put_no_rnd_pixels8_mmxext put_no_rnd_pixels8_mmx
|
||||
#define put_pixels16_3dnow put_pixels16_mmx
|
||||
#define put_pixels8_3dnow put_pixels8_mmx
|
||||
#define put_pixels4_3dnow put_pixels4_mmx
|
||||
#define put_no_rnd_pixels16_3dnow put_no_rnd_pixels16_mmx
|
||||
#define put_no_rnd_pixels8_3dnow put_no_rnd_pixels8_mmx
|
||||
|
||||
/***********************************/
|
||||
/* standard MMX */
|
||||
@ -943,7 +940,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
|
||||
"packuswb %%mm5, %%mm5 \n\t" \
|
||||
OP(%%mm5, out, %%mm7, d)
|
||||
|
||||
#define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMXEXT, OP_3DNOW) \
|
||||
#define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMXEXT) \
|
||||
static void OPNAME ## mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, \
|
||||
uint8_t *src, \
|
||||
int dstStride, \
|
||||
@ -1071,73 +1068,6 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, \
|
||||
); \
|
||||
} \
|
||||
\
|
||||
static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t *dst, \
|
||||
uint8_t *src, \
|
||||
int dstStride, \
|
||||
int srcStride, \
|
||||
int h) \
|
||||
{ \
|
||||
int i; \
|
||||
int16_t temp[16]; \
|
||||
/* quick HACK, XXX FIXME MUST be optimized */ \
|
||||
for (i = 0; i < h; i++) { \
|
||||
temp[ 0] = (src[ 0] + src[ 1]) * 20 - (src[ 0] + src[ 2]) * 6 + \
|
||||
(src[ 1] + src[ 3]) * 3 - (src[ 2] + src[ 4]); \
|
||||
temp[ 1] = (src[ 1] + src[ 2]) * 20 - (src[ 0] + src[ 3]) * 6 + \
|
||||
(src[ 0] + src[ 4]) * 3 - (src[ 1] + src[ 5]); \
|
||||
temp[ 2] = (src[ 2] + src[ 3]) * 20 - (src[ 1] + src[ 4]) * 6 + \
|
||||
(src[ 0] + src[ 5]) * 3 - (src[ 0] + src[ 6]); \
|
||||
temp[ 3] = (src[ 3] + src[ 4]) * 20 - (src[ 2] + src[ 5]) * 6 + \
|
||||
(src[ 1] + src[ 6]) * 3 - (src[ 0] + src[ 7]); \
|
||||
temp[ 4] = (src[ 4] + src[ 5]) * 20 - (src[ 3] + src[ 6]) * 6 + \
|
||||
(src[ 2] + src[ 7]) * 3 - (src[ 1] + src[ 8]); \
|
||||
temp[ 5] = (src[ 5] + src[ 6]) * 20 - (src[ 4] + src[ 7]) * 6 + \
|
||||
(src[ 3] + src[ 8]) * 3 - (src[ 2] + src[ 9]); \
|
||||
temp[ 6] = (src[ 6] + src[ 7]) * 20 - (src[ 5] + src[ 8]) * 6 + \
|
||||
(src[ 4] + src[ 9]) * 3 - (src[ 3] + src[10]); \
|
||||
temp[ 7] = (src[ 7] + src[ 8]) * 20 - (src[ 6] + src[ 9]) * 6 + \
|
||||
(src[ 5] + src[10]) * 3 - (src[ 4] + src[11]); \
|
||||
temp[ 8] = (src[ 8] + src[ 9]) * 20 - (src[ 7] + src[10]) * 6 + \
|
||||
(src[ 6] + src[11]) * 3 - (src[ 5] + src[12]); \
|
||||
temp[ 9] = (src[ 9] + src[10]) * 20 - (src[ 8] + src[11]) * 6 + \
|
||||
(src[ 7] + src[12]) * 3 - (src[ 6] + src[13]); \
|
||||
temp[10] = (src[10] + src[11]) * 20 - (src[ 9] + src[12]) * 6 + \
|
||||
(src[ 8] + src[13]) * 3 - (src[ 7] + src[14]); \
|
||||
temp[11] = (src[11] + src[12]) * 20 - (src[10] + src[13]) * 6 + \
|
||||
(src[ 9] + src[14]) * 3 - (src[ 8] + src[15]); \
|
||||
temp[12] = (src[12] + src[13]) * 20 - (src[11] + src[14]) * 6 + \
|
||||
(src[10] + src[15]) * 3 - (src[ 9] + src[16]); \
|
||||
temp[13] = (src[13] + src[14]) * 20 - (src[12] + src[15]) * 6 + \
|
||||
(src[11] + src[16]) * 3 - (src[10] + src[16]); \
|
||||
temp[14] = (src[14] + src[15]) * 20 - (src[13] + src[16]) * 6 + \
|
||||
(src[12] + src[16]) * 3 - (src[11] + src[15]); \
|
||||
temp[15] = (src[15] + src[16]) * 20 - (src[14] + src[16]) * 6 + \
|
||||
(src[13] + src[15]) * 3 - (src[12] + src[14]); \
|
||||
__asm__ volatile ( \
|
||||
"movq (%0), %%mm0 \n\t" \
|
||||
"movq 8(%0), %%mm1 \n\t" \
|
||||
"paddw %2, %%mm0 \n\t" \
|
||||
"paddw %2, %%mm1 \n\t" \
|
||||
"psraw $5, %%mm0 \n\t" \
|
||||
"psraw $5, %%mm1 \n\t" \
|
||||
"packuswb %%mm1, %%mm0 \n\t" \
|
||||
OP_3DNOW(%%mm0, (%1), %%mm1, q) \
|
||||
"movq 16(%0), %%mm0 \n\t" \
|
||||
"movq 24(%0), %%mm1 \n\t" \
|
||||
"paddw %2, %%mm0 \n\t" \
|
||||
"paddw %2, %%mm1 \n\t" \
|
||||
"psraw $5, %%mm0 \n\t" \
|
||||
"psraw $5, %%mm1 \n\t" \
|
||||
"packuswb %%mm1, %%mm0 \n\t" \
|
||||
OP_3DNOW(%%mm0, 8(%1), %%mm1, q) \
|
||||
:: "r"(temp), "r"(dst), "m"(ROUNDER) \
|
||||
: "memory" \
|
||||
); \
|
||||
dst += dstStride; \
|
||||
src += srcStride; \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
static void OPNAME ## mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, \
|
||||
uint8_t *src, \
|
||||
int dstStride, \
|
||||
@ -1206,49 +1136,6 @@ static void OPNAME ## mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, \
|
||||
/* "m"(ff_pw_20), "m"(ff_pw_3), */ "m"(ROUNDER) \
|
||||
: "memory" \
|
||||
); \
|
||||
} \
|
||||
\
|
||||
static void OPNAME ## mpeg4_qpel8_h_lowpass_3dnow(uint8_t *dst, \
|
||||
uint8_t *src, \
|
||||
int dstStride, \
|
||||
int srcStride, \
|
||||
int h) \
|
||||
{ \
|
||||
int i; \
|
||||
int16_t temp[8]; \
|
||||
/* quick HACK, XXX FIXME MUST be optimized */ \
|
||||
for (i = 0; i < h; i++) { \
|
||||
temp[0] = (src[0] + src[1]) * 20 - (src[0] + src[2]) * 6 + \
|
||||
(src[1] + src[3]) * 3 - (src[2] + src[4]); \
|
||||
temp[1] = (src[1] + src[2]) * 20 - (src[0] + src[3]) * 6 + \
|
||||
(src[0] + src[4]) * 3 - (src[1] + src[5]); \
|
||||
temp[2] = (src[2] + src[3]) * 20 - (src[1] + src[4]) * 6 + \
|
||||
(src[0] + src[5]) * 3 - (src[0] + src[6]); \
|
||||
temp[3] = (src[3] + src[4]) * 20 - (src[2] + src[5]) * 6 + \
|
||||
(src[1] + src[6]) * 3 - (src[0] + src[7]); \
|
||||
temp[4] = (src[4] + src[5]) * 20 - (src[3] + src[6]) * 6 + \
|
||||
(src[2] + src[7]) * 3 - (src[1] + src[8]); \
|
||||
temp[5] = (src[5] + src[6]) * 20 - (src[4] + src[7]) * 6 + \
|
||||
(src[3] + src[8]) * 3 - (src[2] + src[8]); \
|
||||
temp[6] = (src[6] + src[7]) * 20 - (src[5] + src[8]) * 6 + \
|
||||
(src[4] + src[8]) * 3 - (src[3] + src[7]); \
|
||||
temp[7] = (src[7] + src[8]) * 20 - (src[6] + src[8]) * 6 + \
|
||||
(src[5] + src[7]) * 3 - (src[4] + src[6]); \
|
||||
__asm__ volatile ( \
|
||||
"movq (%0), %%mm0 \n\t" \
|
||||
"movq 8(%0), %%mm1 \n\t" \
|
||||
"paddw %2, %%mm0 \n\t" \
|
||||
"paddw %2, %%mm1 \n\t" \
|
||||
"psraw $5, %%mm0 \n\t" \
|
||||
"psraw $5, %%mm1 \n\t" \
|
||||
"packuswb %%mm1, %%mm0 \n\t" \
|
||||
OP_3DNOW(%%mm0, (%1), %%mm1, q) \
|
||||
:: "r"(temp), "r"(dst), "m"(ROUNDER) \
|
||||
: "memory" \
|
||||
); \
|
||||
dst += dstStride; \
|
||||
src += srcStride; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define QPEL_OP(OPNAME, ROUNDER, RND, OP, MMX) \
|
||||
@ -1759,22 +1646,14 @@ static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \
|
||||
#define PUT_OP(a, b, temp, size) \
|
||||
"mov"#size" "#a", "#b" \n\t"
|
||||
|
||||
#define AVG_3DNOW_OP(a, b, temp, size) \
|
||||
"mov"#size" "#b", "#temp" \n\t" \
|
||||
"pavgusb "#temp", "#a" \n\t" \
|
||||
"mov"#size" "#a", "#b" \n\t"
|
||||
|
||||
#define AVG_MMXEXT_OP(a, b, temp, size) \
|
||||
"mov"#size" "#b", "#temp" \n\t" \
|
||||
"pavgb "#temp", "#a" \n\t" \
|
||||
"mov"#size" "#a", "#b" \n\t"
|
||||
|
||||
QPEL_BASE(put_, ff_pw_16, _, PUT_OP, PUT_OP)
|
||||
QPEL_BASE(avg_, ff_pw_16, _, AVG_MMXEXT_OP, AVG_3DNOW_OP)
|
||||
QPEL_BASE(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, PUT_OP)
|
||||
QPEL_OP(put_, ff_pw_16, _, PUT_OP, 3dnow)
|
||||
QPEL_OP(avg_, ff_pw_16, _, AVG_3DNOW_OP, 3dnow)
|
||||
QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, 3dnow)
|
||||
QPEL_BASE(put_, ff_pw_16, _, PUT_OP)
|
||||
QPEL_BASE(avg_, ff_pw_16, _, AVG_MMXEXT_OP)
|
||||
QPEL_BASE(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP)
|
||||
QPEL_OP(put_, ff_pw_16, _, PUT_OP, mmxext)
|
||||
QPEL_OP(avg_, ff_pw_16, _, AVG_MMXEXT_OP, mmxext)
|
||||
QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmxext)
|
||||
@ -1835,10 +1714,6 @@ QPEL_2TAP(put_, 16, mmxext)
|
||||
QPEL_2TAP(avg_, 16, mmxext)
|
||||
QPEL_2TAP(put_, 8, mmxext)
|
||||
QPEL_2TAP(avg_, 8, mmxext)
|
||||
QPEL_2TAP(put_, 16, 3dnow)
|
||||
QPEL_2TAP(avg_, 16, 3dnow)
|
||||
QPEL_2TAP(put_, 8, 3dnow)
|
||||
QPEL_2TAP(avg_, 8, 3dnow)
|
||||
|
||||
void ff_put_rv40_qpel8_mc33_mmx(uint8_t *dst, uint8_t *src, int stride)
|
||||
{
|
||||
@ -2775,29 +2650,6 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
|
||||
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow;
|
||||
}
|
||||
|
||||
if (CONFIG_H264QPEL) {
|
||||
SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow, );
|
||||
SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow, );
|
||||
SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow, );
|
||||
SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, 3dnow, );
|
||||
SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow, );
|
||||
SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow, );
|
||||
|
||||
if (!high_bit_depth) {
|
||||
SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow, );
|
||||
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow, );
|
||||
SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow, );
|
||||
SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow, );
|
||||
SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow, );
|
||||
SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow, );
|
||||
}
|
||||
|
||||
SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow, );
|
||||
SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow, );
|
||||
SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow, );
|
||||
SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow, );
|
||||
}
|
||||
|
||||
c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
|
||||
|
@ -1163,9 +1163,6 @@ QPEL(put_, 16,XMM, 16)\
|
||||
QPEL(avg_, 8, XMM, 16)\
|
||||
QPEL(avg_, 16,XMM, 16)\
|
||||
|
||||
#define PAVGB "pavgusb"
|
||||
QPEL_H264(put_, PUT_OP, 3dnow)
|
||||
QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow)
|
||||
#undef PAVGB
|
||||
#define PAVGB "pavgb"
|
||||
QPEL_H264(put_, PUT_OP, mmxext)
|
||||
@ -1184,7 +1181,6 @@ QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3)
|
||||
#endif
|
||||
#undef PAVGB
|
||||
|
||||
H264_MC_4816(3dnow)
|
||||
H264_MC_4816(mmxext)
|
||||
H264_MC_816(H264_MC_V, sse2)
|
||||
H264_MC_816(H264_MC_HV, sse2)
|
||||
|
Loading…
Reference in New Issue
Block a user