mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-24 13:56:33 +02:00
x86: mmx2 ---> mmxext in function names
This commit is contained in:
parent
38fdf72580
commit
d8eda37080
@ -83,7 +83,7 @@ static const struct algo fdct_tab[] = {
|
|||||||
|
|
||||||
#if HAVE_MMX_INLINE
|
#if HAVE_MMX_INLINE
|
||||||
{ "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX },
|
{ "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX },
|
||||||
{ "MMXEXT", ff_fdct_mmx2, NO_PERM, AV_CPU_FLAG_MMXEXT },
|
{ "MMXEXT", ff_fdct_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT },
|
||||||
{ "SSE2", ff_fdct_sse2, NO_PERM, AV_CPU_FLAG_SSE2 },
|
{ "SSE2", ff_fdct_sse2, NO_PERM, AV_CPU_FLAG_SSE2 },
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -107,7 +107,7 @@ static const struct algo idct_tab[] = {
|
|||||||
#if HAVE_MMX_INLINE
|
#if HAVE_MMX_INLINE
|
||||||
{ "SIMPLE-MMX", ff_simple_idct_mmx, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX },
|
{ "SIMPLE-MMX", ff_simple_idct_mmx, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX },
|
||||||
{ "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 },
|
{ "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 },
|
||||||
{ "XVID-MMXEXT", ff_idct_xvid_mmx2, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 },
|
{ "XVID-MMXEXT", ff_idct_xvid_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 },
|
||||||
{ "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 },
|
{ "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 },
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -49,7 +49,7 @@ void ff_j_rev_dct (DCTELEM *data);
|
|||||||
void ff_wmv2_idct_c(DCTELEM *data);
|
void ff_wmv2_idct_c(DCTELEM *data);
|
||||||
|
|
||||||
void ff_fdct_mmx(DCTELEM *block);
|
void ff_fdct_mmx(DCTELEM *block);
|
||||||
void ff_fdct_mmx2(DCTELEM *block);
|
void ff_fdct_mmxext(DCTELEM *block);
|
||||||
void ff_fdct_sse2(DCTELEM *block);
|
void ff_fdct_sse2(DCTELEM *block);
|
||||||
|
|
||||||
#define H264_IDCT(depth) \
|
#define H264_IDCT(depth) \
|
||||||
|
@ -438,21 +438,22 @@ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, ui
|
|||||||
#endif /* (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE) */
|
#endif /* (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE) */
|
||||||
|
|
||||||
#if HAVE_MMXEXT_INLINE
|
#if HAVE_MMXEXT_INLINE
|
||||||
QPEL_CAVS(put_, PUT_OP, mmx2)
|
QPEL_CAVS(put_, PUT_OP, mmxext)
|
||||||
QPEL_CAVS(avg_,AVG_MMXEXT_OP, mmx2)
|
QPEL_CAVS(avg_, AVG_MMXEXT_OP, mmxext)
|
||||||
|
|
||||||
CAVS_MC(put_, 8, mmx2)
|
CAVS_MC(put_, 8, mmxext)
|
||||||
CAVS_MC(put_, 16,mmx2)
|
CAVS_MC(put_, 16, mmxext)
|
||||||
CAVS_MC(avg_, 8, mmx2)
|
CAVS_MC(avg_, 8, mmxext)
|
||||||
CAVS_MC(avg_, 16,mmx2)
|
CAVS_MC(avg_, 16, mmxext)
|
||||||
|
|
||||||
static void ff_cavsdsp_init_mmx2(CAVSDSPContext* c, AVCodecContext *avctx) {
|
static void ff_cavsdsp_init_mmxext(CAVSDSPContext *c, AVCodecContext *avctx)
|
||||||
|
{
|
||||||
#define dspfunc(PFX, IDX, NUM) \
|
#define dspfunc(PFX, IDX, NUM) \
|
||||||
c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \
|
c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmxext; \
|
||||||
c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmx2; \
|
c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmxext; \
|
||||||
c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmx2; \
|
c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmxext; \
|
||||||
c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmx2; \
|
c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmxext; \
|
||||||
c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmx2; \
|
c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmxext; \
|
||||||
|
|
||||||
dspfunc(put_cavs_qpel, 0, 16);
|
dspfunc(put_cavs_qpel, 0, 16);
|
||||||
dspfunc(put_cavs_qpel, 1, 8);
|
dspfunc(put_cavs_qpel, 1, 8);
|
||||||
@ -475,7 +476,7 @@ CAVS_MC(avg_, 16,3dnow)
|
|||||||
|
|
||||||
static void ff_cavsdsp_init_3dnow(CAVSDSPContext* c, AVCodecContext *avctx) {
|
static void ff_cavsdsp_init_3dnow(CAVSDSPContext* c, AVCodecContext *avctx) {
|
||||||
#define dspfunc(PFX, IDX, NUM) \
|
#define dspfunc(PFX, IDX, NUM) \
|
||||||
c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \
|
c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmxext; \
|
||||||
c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_3dnow; \
|
c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_3dnow; \
|
||||||
c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_3dnow; \
|
c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_3dnow; \
|
||||||
c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_3dnow; \
|
c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_3dnow; \
|
||||||
@ -496,7 +497,7 @@ av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx)
|
|||||||
int mm_flags = av_get_cpu_flags();
|
int mm_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
#if HAVE_MMXEXT_INLINE
|
#if HAVE_MMXEXT_INLINE
|
||||||
if (mm_flags & AV_CPU_FLAG_MMXEXT) ff_cavsdsp_init_mmx2(c, avctx);
|
if (mm_flags & AV_CPU_FLAG_MMXEXT) ff_cavsdsp_init_mmxext(c, avctx);
|
||||||
#endif /* HAVE_MMXEXT_INLINE */
|
#endif /* HAVE_MMXEXT_INLINE */
|
||||||
#if HAVE_AMD3DNOW_INLINE
|
#if HAVE_AMD3DNOW_INLINE
|
||||||
if (mm_flags & AV_CPU_FLAG_3DNOW) ff_cavsdsp_init_3dnow(c, avctx);
|
if (mm_flags & AV_CPU_FLAG_3DNOW) ff_cavsdsp_init_3dnow(c, avctx);
|
||||||
|
@ -207,7 +207,7 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };
|
|||||||
/***********************************/
|
/***********************************/
|
||||||
/* MMXEXT specific */
|
/* MMXEXT specific */
|
||||||
|
|
||||||
#define DEF(x) x ## _mmx2
|
#define DEF(x) x ## _mmxext
|
||||||
|
|
||||||
/* Introduced only in MMXEXT set */
|
/* Introduced only in MMXEXT set */
|
||||||
#define PAVGB "pavgb"
|
#define PAVGB "pavgb"
|
||||||
@ -221,11 +221,11 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };
|
|||||||
|
|
||||||
#define put_no_rnd_pixels16_mmx put_pixels16_mmx
|
#define put_no_rnd_pixels16_mmx put_pixels16_mmx
|
||||||
#define put_no_rnd_pixels8_mmx put_pixels8_mmx
|
#define put_no_rnd_pixels8_mmx put_pixels8_mmx
|
||||||
#define put_pixels16_mmx2 put_pixels16_mmx
|
#define put_pixels16_mmxext put_pixels16_mmx
|
||||||
#define put_pixels8_mmx2 put_pixels8_mmx
|
#define put_pixels8_mmxext put_pixels8_mmx
|
||||||
#define put_pixels4_mmx2 put_pixels4_mmx
|
#define put_pixels4_mmxext put_pixels4_mmx
|
||||||
#define put_no_rnd_pixels16_mmx2 put_no_rnd_pixels16_mmx
|
#define put_no_rnd_pixels16_mmxext put_no_rnd_pixels16_mmx
|
||||||
#define put_no_rnd_pixels8_mmx2 put_no_rnd_pixels8_mmx
|
#define put_no_rnd_pixels8_mmxext put_no_rnd_pixels8_mmx
|
||||||
#define put_pixels16_3dnow put_pixels16_mmx
|
#define put_pixels16_3dnow put_pixels16_mmx
|
||||||
#define put_pixels8_3dnow put_pixels8_mmx
|
#define put_pixels8_3dnow put_pixels8_mmx
|
||||||
#define put_pixels4_3dnow put_pixels4_mmx
|
#define put_pixels4_3dnow put_pixels4_mmx
|
||||||
@ -924,11 +924,11 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
|
|||||||
OP(%%mm5, out, %%mm7, d)
|
OP(%%mm5, out, %%mm7, d)
|
||||||
|
|
||||||
#define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMXEXT, OP_3DNOW) \
|
#define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMXEXT, OP_3DNOW) \
|
||||||
static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, \
|
static void OPNAME ## mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, \
|
||||||
uint8_t *src, \
|
uint8_t *src, \
|
||||||
int dstStride, \
|
int dstStride, \
|
||||||
int srcStride, \
|
int srcStride, \
|
||||||
int h) \
|
int h) \
|
||||||
{ \
|
{ \
|
||||||
uint64_t temp; \
|
uint64_t temp; \
|
||||||
\
|
\
|
||||||
@ -1118,11 +1118,11 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t *dst, \
|
|||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, \
|
static void OPNAME ## mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, \
|
||||||
uint8_t *src, \
|
uint8_t *src, \
|
||||||
int dstStride, \
|
int dstStride, \
|
||||||
int srcStride, \
|
int srcStride, \
|
||||||
int h) \
|
int h) \
|
||||||
{ \
|
{ \
|
||||||
__asm__ volatile ( \
|
__asm__ volatile ( \
|
||||||
"pxor %%mm7, %%mm7 \n\t" \
|
"pxor %%mm7, %%mm7 \n\t" \
|
||||||
@ -1755,9 +1755,9 @@ QPEL_BASE(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, PUT_OP)
|
|||||||
QPEL_OP(put_, ff_pw_16, _, PUT_OP, 3dnow)
|
QPEL_OP(put_, ff_pw_16, _, PUT_OP, 3dnow)
|
||||||
QPEL_OP(avg_, ff_pw_16, _, AVG_3DNOW_OP, 3dnow)
|
QPEL_OP(avg_, ff_pw_16, _, AVG_3DNOW_OP, 3dnow)
|
||||||
QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, 3dnow)
|
QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, 3dnow)
|
||||||
QPEL_OP(put_, ff_pw_16, _, PUT_OP, mmx2)
|
QPEL_OP(put_, ff_pw_16, _, PUT_OP, mmxext)
|
||||||
QPEL_OP(avg_, ff_pw_16, _, AVG_MMXEXT_OP, mmx2)
|
QPEL_OP(avg_, ff_pw_16, _, AVG_MMXEXT_OP, mmxext)
|
||||||
QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmx2)
|
QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmxext)
|
||||||
|
|
||||||
/***********************************/
|
/***********************************/
|
||||||
/* bilinear qpel: not compliant to any spec, only for -lavdopts fast */
|
/* bilinear qpel: not compliant to any spec, only for -lavdopts fast */
|
||||||
@ -1811,10 +1811,10 @@ QPEL_2TAP_L3(OPNAME, SIZE, MMX, 31, 1, stride, -1) \
|
|||||||
QPEL_2TAP_L3(OPNAME, SIZE, MMX, 13, stride, -stride, 1) \
|
QPEL_2TAP_L3(OPNAME, SIZE, MMX, 13, stride, -stride, 1) \
|
||||||
QPEL_2TAP_L3(OPNAME, SIZE, MMX, 33, stride + 1, -stride, -1) \
|
QPEL_2TAP_L3(OPNAME, SIZE, MMX, 33, stride + 1, -stride, -1) \
|
||||||
|
|
||||||
QPEL_2TAP(put_, 16, mmx2)
|
QPEL_2TAP(put_, 16, mmxext)
|
||||||
QPEL_2TAP(avg_, 16, mmx2)
|
QPEL_2TAP(avg_, 16, mmxext)
|
||||||
QPEL_2TAP(put_, 8, mmx2)
|
QPEL_2TAP(put_, 8, mmxext)
|
||||||
QPEL_2TAP(avg_, 8, mmx2)
|
QPEL_2TAP(avg_, 8, mmxext)
|
||||||
QPEL_2TAP(put_, 16, 3dnow)
|
QPEL_2TAP(put_, 16, 3dnow)
|
||||||
QPEL_2TAP(avg_, 16, 3dnow)
|
QPEL_2TAP(avg_, 16, 3dnow)
|
||||||
QPEL_2TAP(put_, 8, 3dnow)
|
QPEL_2TAP(put_, 8, 3dnow)
|
||||||
@ -2035,7 +2035,7 @@ static void name(void *mem, int stride, int h) \
|
|||||||
} while (--h); \
|
} while (--h); \
|
||||||
}
|
}
|
||||||
|
|
||||||
PREFETCH(prefetch_mmx2, prefetcht0)
|
PREFETCH(prefetch_mmxext, prefetcht0)
|
||||||
PREFETCH(prefetch_3dnow, prefetch)
|
PREFETCH(prefetch_3dnow, prefetch)
|
||||||
#undef PREFETCH
|
#undef PREFETCH
|
||||||
|
|
||||||
@ -2089,22 +2089,22 @@ CHROMA_MC(avg, 8, 10, avx)
|
|||||||
#if HAVE_INLINE_ASM
|
#if HAVE_INLINE_ASM
|
||||||
|
|
||||||
/* CAVS-specific */
|
/* CAVS-specific */
|
||||||
void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride)
|
void ff_put_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride)
|
||||||
{
|
{
|
||||||
put_pixels8_mmx(dst, src, stride, 8);
|
put_pixels8_mmx(dst, src, stride, 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride)
|
void ff_avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride)
|
||||||
{
|
{
|
||||||
avg_pixels8_mmx(dst, src, stride, 8);
|
avg_pixels8_mmx(dst, src, stride, 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride)
|
void ff_put_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride)
|
||||||
{
|
{
|
||||||
put_pixels16_mmx(dst, src, stride, 16);
|
put_pixels16_mmx(dst, src, stride, 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride)
|
void ff_avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride)
|
||||||
{
|
{
|
||||||
avg_pixels16_mmx(dst, src, stride, 16);
|
avg_pixels16_mmx(dst, src, stride, 16);
|
||||||
}
|
}
|
||||||
@ -2116,10 +2116,10 @@ void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src,
|
|||||||
put_pixels8_mmx(dst, src, stride, 8);
|
put_pixels8_mmx(dst, src, stride, 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src,
|
void ff_avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src,
|
||||||
int stride, int rnd)
|
int stride, int rnd)
|
||||||
{
|
{
|
||||||
avg_pixels8_mmx2(dst, src, stride, 8);
|
avg_pixels8_mmxext(dst, src, stride, 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize)
|
static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize)
|
||||||
@ -2456,74 +2456,74 @@ static void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx,
|
static void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
|
||||||
int mm_flags)
|
int mm_flags)
|
||||||
{
|
{
|
||||||
const int bit_depth = avctx->bits_per_raw_sample;
|
const int bit_depth = avctx->bits_per_raw_sample;
|
||||||
const int high_bit_depth = bit_depth > 8;
|
const int high_bit_depth = bit_depth > 8;
|
||||||
|
|
||||||
#if HAVE_INLINE_ASM
|
#if HAVE_INLINE_ASM
|
||||||
c->prefetch = prefetch_mmx2;
|
c->prefetch = prefetch_mmxext;
|
||||||
|
|
||||||
if (!high_bit_depth) {
|
if (!high_bit_depth) {
|
||||||
c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
|
c->put_pixels_tab[0][1] = put_pixels16_x2_mmxext;
|
||||||
c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
|
c->put_pixels_tab[0][2] = put_pixels16_y2_mmxext;
|
||||||
|
|
||||||
c->avg_pixels_tab[0][0] = avg_pixels16_mmx2;
|
c->avg_pixels_tab[0][0] = avg_pixels16_mmxext;
|
||||||
c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2;
|
c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmxext;
|
||||||
c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2;
|
c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmxext;
|
||||||
|
|
||||||
c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2;
|
c->put_pixels_tab[1][1] = put_pixels8_x2_mmxext;
|
||||||
c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2;
|
c->put_pixels_tab[1][2] = put_pixels8_y2_mmxext;
|
||||||
|
|
||||||
c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;
|
c->avg_pixels_tab[1][0] = avg_pixels8_mmxext;
|
||||||
c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
|
c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmxext;
|
||||||
c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
|
c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmxext;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
|
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
|
||||||
if (!high_bit_depth) {
|
if (!high_bit_depth) {
|
||||||
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
|
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmxext;
|
||||||
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
|
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmxext;
|
||||||
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
|
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmxext;
|
||||||
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
|
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmxext;
|
||||||
|
|
||||||
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
|
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmxext;
|
||||||
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
|
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmxext;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) {
|
if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) {
|
||||||
c->idct_put = ff_idct_xvid_mmx2_put;
|
c->idct_put = ff_idct_xvid_mmxext_put;
|
||||||
c->idct_add = ff_idct_xvid_mmx2_add;
|
c->idct_add = ff_idct_xvid_mmxext_add;
|
||||||
c->idct = ff_idct_xvid_mmx2;
|
c->idct = ff_idct_xvid_mmxext;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (CONFIG_VP3_DECODER && (avctx->codec_id == AV_CODEC_ID_VP3 ||
|
if (CONFIG_VP3_DECODER && (avctx->codec_id == AV_CODEC_ID_VP3 ||
|
||||||
avctx->codec_id == AV_CODEC_ID_THEORA)) {
|
avctx->codec_id == AV_CODEC_ID_THEORA)) {
|
||||||
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_mmx2;
|
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_mmxext;
|
||||||
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx2;
|
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmxext;
|
||||||
}
|
}
|
||||||
#endif /* HAVE_INLINE_ASM */
|
#endif /* HAVE_INLINE_ASM */
|
||||||
|
|
||||||
if (CONFIG_H264QPEL) {
|
if (CONFIG_H264QPEL) {
|
||||||
#if HAVE_INLINE_ASM
|
#if HAVE_INLINE_ASM
|
||||||
SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2, );
|
SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, );
|
||||||
SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2, );
|
SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, );
|
||||||
SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2, );
|
SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
|
||||||
SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmx2, );
|
SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, );
|
||||||
SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2, );
|
SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, );
|
||||||
SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2, );
|
SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, );
|
||||||
#endif /* HAVE_INLINE_ASM */
|
#endif /* HAVE_INLINE_ASM */
|
||||||
|
|
||||||
if (!high_bit_depth) {
|
if (!high_bit_depth) {
|
||||||
#if HAVE_INLINE_ASM
|
#if HAVE_INLINE_ASM
|
||||||
SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2, );
|
SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, );
|
||||||
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2, );
|
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmxext, );
|
||||||
SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2, );
|
SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmxext, );
|
||||||
SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2, );
|
SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, );
|
||||||
SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2, );
|
SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmxext, );
|
||||||
SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2, );
|
SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, );
|
||||||
#endif /* HAVE_INLINE_ASM */
|
#endif /* HAVE_INLINE_ASM */
|
||||||
} else if (bit_depth == 10) {
|
} else if (bit_depth == 10) {
|
||||||
#if HAVE_YASM
|
#if HAVE_YASM
|
||||||
@ -2539,10 +2539,10 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if HAVE_INLINE_ASM
|
#if HAVE_INLINE_ASM
|
||||||
SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2, );
|
SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmxext, );
|
||||||
SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2, );
|
SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmxext, );
|
||||||
SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2, );
|
SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmxext, );
|
||||||
SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2, );
|
SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmxext, );
|
||||||
#endif /* HAVE_INLINE_ASM */
|
#endif /* HAVE_INLINE_ASM */
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2861,7 +2861,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
|
|||||||
dsputil_init_mmx(c, avctx, mm_flags);
|
dsputil_init_mmx(c, avctx, mm_flags);
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_MMXEXT)
|
if (mm_flags & AV_CPU_FLAG_MMXEXT)
|
||||||
dsputil_init_mmx2(c, avctx, mm_flags);
|
dsputil_init_mmxext(c, avctx, mm_flags);
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_3DNOW)
|
if (mm_flags & AV_CPU_FLAG_3DNOW)
|
||||||
dsputil_init_3dnow(c, avctx, mm_flags);
|
dsputil_init_3dnow(c, avctx, mm_flags);
|
||||||
|
@ -89,13 +89,13 @@ void ff_add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_s
|
|||||||
void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
|
void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
|
||||||
void ff_put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
|
void ff_put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
|
||||||
|
|
||||||
void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
|
void ff_put_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride);
|
||||||
void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
|
void ff_avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride);
|
||||||
void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
|
void ff_put_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride);
|
||||||
void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
|
void ff_avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride);
|
||||||
|
|
||||||
void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd);
|
void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd);
|
||||||
void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd);
|
void ff_avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src, int stride, int rnd);
|
||||||
|
|
||||||
void ff_put_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size);
|
void ff_put_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size);
|
||||||
void ff_put_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size);
|
void ff_put_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size);
|
||||||
|
@ -647,7 +647,9 @@ static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_si
|
|||||||
}
|
}
|
||||||
#undef SUM
|
#undef SUM
|
||||||
|
|
||||||
static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) {
|
static int vsad_intra16_mmxext(void *v, uint8_t *pix, uint8_t *dummy,
|
||||||
|
int line_size, int h)
|
||||||
|
{
|
||||||
int tmp;
|
int tmp;
|
||||||
|
|
||||||
assert( (((int)pix) & 7) == 0);
|
assert( (((int)pix) & 7) == 0);
|
||||||
@ -765,7 +767,9 @@ static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in
|
|||||||
}
|
}
|
||||||
#undef SUM
|
#undef SUM
|
||||||
|
|
||||||
static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
|
static int vsad16_mmxext(void *v, uint8_t *pix1, uint8_t *pix2,
|
||||||
|
int line_size, int h)
|
||||||
|
{
|
||||||
int tmp;
|
int tmp;
|
||||||
|
|
||||||
assert( (((int)pix1) & 7) == 0);
|
assert( (((int)pix1) & 7) == 0);
|
||||||
@ -844,7 +848,10 @@ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
|
|||||||
dst[i+0] = src1[i+0]-src2[i+0];
|
dst[i+0] = src1[i+0]-src2[i+0];
|
||||||
}
|
}
|
||||||
|
|
||||||
static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
|
static void sub_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *src1,
|
||||||
|
const uint8_t *src2, int w,
|
||||||
|
int *left, int *left_top)
|
||||||
|
{
|
||||||
x86_reg i=0;
|
x86_reg i=0;
|
||||||
uint8_t l, lt;
|
uint8_t l, lt;
|
||||||
|
|
||||||
@ -976,7 +983,7 @@ DCT_SAD_FUNC(mmx)
|
|||||||
|
|
||||||
#define HSUM(a,t,dst) HSUM_MMXEXT(a,t,dst)
|
#define HSUM(a,t,dst) HSUM_MMXEXT(a,t,dst)
|
||||||
#define MMABS(a,z) MMABS_MMXEXT(a,z)
|
#define MMABS(a,z) MMABS_MMXEXT(a,z)
|
||||||
DCT_SAD_FUNC(mmx2)
|
DCT_SAD_FUNC(mmxext)
|
||||||
#undef HSUM
|
#undef HSUM
|
||||||
#undef DCT_SAD
|
#undef DCT_SAD
|
||||||
|
|
||||||
@ -1115,7 +1122,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
|||||||
if(mm_flags & AV_CPU_FLAG_SSE2){
|
if(mm_flags & AV_CPU_FLAG_SSE2){
|
||||||
c->fdct = ff_fdct_sse2;
|
c->fdct = ff_fdct_sse2;
|
||||||
} else if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
} else if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
c->fdct = ff_fdct_mmx2;
|
c->fdct = ff_fdct_mmxext;
|
||||||
}else{
|
}else{
|
||||||
c->fdct = ff_fdct_mmx;
|
c->fdct = ff_fdct_mmx;
|
||||||
}
|
}
|
||||||
@ -1148,14 +1155,14 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
|||||||
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
|
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
c->sum_abs_dctelem= sum_abs_dctelem_mmx2;
|
c->sum_abs_dctelem = sum_abs_dctelem_mmxext;
|
||||||
c->vsad[4]= vsad_intra16_mmx2;
|
c->vsad[4] = vsad_intra16_mmxext;
|
||||||
|
|
||||||
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
|
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
|
||||||
c->vsad[0] = vsad16_mmx2;
|
c->vsad[0] = vsad16_mmxext;
|
||||||
}
|
}
|
||||||
|
|
||||||
c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2;
|
c->sub_hfyu_median_prediction = sub_hfyu_median_prediction_mmxext;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(mm_flags & AV_CPU_FLAG_SSE2){
|
if(mm_flags & AV_CPU_FLAG_SSE2){
|
||||||
|
@ -440,7 +440,8 @@ static av_always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
static av_always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table)
|
static av_always_inline void fdct_row_mmxext(const int16_t *in, int16_t *out,
|
||||||
|
const int16_t *table)
|
||||||
{
|
{
|
||||||
__asm__ volatile (
|
__asm__ volatile (
|
||||||
"pshufw $0x1B, 8(%0), %%mm5 \n\t"
|
"pshufw $0x1B, 8(%0), %%mm5 \n\t"
|
||||||
@ -555,7 +556,7 @@ void ff_fdct_mmx(int16_t *block)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ff_fdct_mmx2(int16_t *block)
|
void ff_fdct_mmxext(int16_t *block)
|
||||||
{
|
{
|
||||||
DECLARE_ALIGNED(8, int64_t, align_tmp)[16];
|
DECLARE_ALIGNED(8, int64_t, align_tmp)[16];
|
||||||
int16_t *block1= (int16_t*)align_tmp;
|
int16_t *block1= (int16_t*)align_tmp;
|
||||||
@ -566,7 +567,7 @@ void ff_fdct_mmx2(int16_t *block)
|
|||||||
fdct_col_mmx(block, block1, 4);
|
fdct_col_mmx(block, block1, 4);
|
||||||
|
|
||||||
for(i=8;i>0;i--) {
|
for(i=8;i>0;i--) {
|
||||||
fdct_row_mmx2(block1, block, table);
|
fdct_row_mmxext(block1, block, table);
|
||||||
block1 += 8;
|
block1 += 8;
|
||||||
table += 32;
|
table += 32;
|
||||||
block += 8;
|
block += 8;
|
||||||
|
@ -1002,36 +1002,36 @@ static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
|
|||||||
OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 16);\
|
OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 16);\
|
||||||
}\
|
}\
|
||||||
|
|
||||||
#define put_pixels8_l2_sse2 put_pixels8_l2_mmx2
|
#define put_pixels8_l2_sse2 put_pixels8_l2_mmxext
|
||||||
#define avg_pixels8_l2_sse2 avg_pixels8_l2_mmx2
|
#define avg_pixels8_l2_sse2 avg_pixels8_l2_mmxext
|
||||||
#define put_pixels16_l2_sse2 put_pixels16_l2_mmx2
|
#define put_pixels16_l2_sse2 put_pixels16_l2_mmxext
|
||||||
#define avg_pixels16_l2_sse2 avg_pixels16_l2_mmx2
|
#define avg_pixels16_l2_sse2 avg_pixels16_l2_mmxext
|
||||||
#define put_pixels8_l2_ssse3 put_pixels8_l2_mmx2
|
#define put_pixels8_l2_ssse3 put_pixels8_l2_mmxext
|
||||||
#define avg_pixels8_l2_ssse3 avg_pixels8_l2_mmx2
|
#define avg_pixels8_l2_ssse3 avg_pixels8_l2_mmxext
|
||||||
#define put_pixels16_l2_ssse3 put_pixels16_l2_mmx2
|
#define put_pixels16_l2_ssse3 put_pixels16_l2_mmxext
|
||||||
#define avg_pixels16_l2_ssse3 avg_pixels16_l2_mmx2
|
#define avg_pixels16_l2_ssse3 avg_pixels16_l2_mmxext
|
||||||
|
|
||||||
#define put_pixels8_l2_shift5_sse2 put_pixels8_l2_shift5_mmx2
|
#define put_pixels8_l2_shift5_sse2 put_pixels8_l2_shift5_mmxext
|
||||||
#define avg_pixels8_l2_shift5_sse2 avg_pixels8_l2_shift5_mmx2
|
#define avg_pixels8_l2_shift5_sse2 avg_pixels8_l2_shift5_mmxext
|
||||||
#define put_pixels16_l2_shift5_sse2 put_pixels16_l2_shift5_mmx2
|
#define put_pixels16_l2_shift5_sse2 put_pixels16_l2_shift5_mmxext
|
||||||
#define avg_pixels16_l2_shift5_sse2 avg_pixels16_l2_shift5_mmx2
|
#define avg_pixels16_l2_shift5_sse2 avg_pixels16_l2_shift5_mmxext
|
||||||
#define put_pixels8_l2_shift5_ssse3 put_pixels8_l2_shift5_mmx2
|
#define put_pixels8_l2_shift5_ssse3 put_pixels8_l2_shift5_mmxext
|
||||||
#define avg_pixels8_l2_shift5_ssse3 avg_pixels8_l2_shift5_mmx2
|
#define avg_pixels8_l2_shift5_ssse3 avg_pixels8_l2_shift5_mmxext
|
||||||
#define put_pixels16_l2_shift5_ssse3 put_pixels16_l2_shift5_mmx2
|
#define put_pixels16_l2_shift5_ssse3 put_pixels16_l2_shift5_mmxext
|
||||||
#define avg_pixels16_l2_shift5_ssse3 avg_pixels16_l2_shift5_mmx2
|
#define avg_pixels16_l2_shift5_ssse3 avg_pixels16_l2_shift5_mmxext
|
||||||
|
|
||||||
#define put_h264_qpel8_h_lowpass_l2_sse2 put_h264_qpel8_h_lowpass_l2_mmx2
|
#define put_h264_qpel8_h_lowpass_l2_sse2 put_h264_qpel8_h_lowpass_l2_mmxext
|
||||||
#define avg_h264_qpel8_h_lowpass_l2_sse2 avg_h264_qpel8_h_lowpass_l2_mmx2
|
#define avg_h264_qpel8_h_lowpass_l2_sse2 avg_h264_qpel8_h_lowpass_l2_mmxext
|
||||||
#define put_h264_qpel16_h_lowpass_l2_sse2 put_h264_qpel16_h_lowpass_l2_mmx2
|
#define put_h264_qpel16_h_lowpass_l2_sse2 put_h264_qpel16_h_lowpass_l2_mmxext
|
||||||
#define avg_h264_qpel16_h_lowpass_l2_sse2 avg_h264_qpel16_h_lowpass_l2_mmx2
|
#define avg_h264_qpel16_h_lowpass_l2_sse2 avg_h264_qpel16_h_lowpass_l2_mmxext
|
||||||
|
|
||||||
#define put_h264_qpel8_v_lowpass_ssse3 put_h264_qpel8_v_lowpass_sse2
|
#define put_h264_qpel8_v_lowpass_ssse3 put_h264_qpel8_v_lowpass_sse2
|
||||||
#define avg_h264_qpel8_v_lowpass_ssse3 avg_h264_qpel8_v_lowpass_sse2
|
#define avg_h264_qpel8_v_lowpass_ssse3 avg_h264_qpel8_v_lowpass_sse2
|
||||||
#define put_h264_qpel16_v_lowpass_ssse3 put_h264_qpel16_v_lowpass_sse2
|
#define put_h264_qpel16_v_lowpass_ssse3 put_h264_qpel16_v_lowpass_sse2
|
||||||
#define avg_h264_qpel16_v_lowpass_ssse3 avg_h264_qpel16_v_lowpass_sse2
|
#define avg_h264_qpel16_v_lowpass_ssse3 avg_h264_qpel16_v_lowpass_sse2
|
||||||
|
|
||||||
#define put_h264_qpel8or16_hv2_lowpass_sse2 put_h264_qpel8or16_hv2_lowpass_mmx2
|
#define put_h264_qpel8or16_hv2_lowpass_sse2 put_h264_qpel8or16_hv2_lowpass_mmxext
|
||||||
#define avg_h264_qpel8or16_hv2_lowpass_sse2 avg_h264_qpel8or16_hv2_lowpass_mmx2
|
#define avg_h264_qpel8or16_hv2_lowpass_sse2 avg_h264_qpel8or16_hv2_lowpass_mmxext
|
||||||
|
|
||||||
#define H264_MC(OPNAME, SIZE, MMX, ALIGN) \
|
#define H264_MC(OPNAME, SIZE, MMX, ALIGN) \
|
||||||
H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\
|
H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\
|
||||||
@ -1045,8 +1045,8 @@ static void put_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){
|
|||||||
static void avg_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){
|
static void avg_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){
|
||||||
avg_pixels16_sse2(dst, src, stride, 16);
|
avg_pixels16_sse2(dst, src, stride, 16);
|
||||||
}
|
}
|
||||||
#define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmx2
|
#define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmxext
|
||||||
#define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmx2
|
#define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmxext
|
||||||
|
|
||||||
#define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \
|
#define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \
|
||||||
static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\
|
static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\
|
||||||
@ -1168,8 +1168,8 @@ QPEL_H264(put_, PUT_OP, 3dnow)
|
|||||||
QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow)
|
QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow)
|
||||||
#undef PAVGB
|
#undef PAVGB
|
||||||
#define PAVGB "pavgb"
|
#define PAVGB "pavgb"
|
||||||
QPEL_H264(put_, PUT_OP, mmx2)
|
QPEL_H264(put_, PUT_OP, mmxext)
|
||||||
QPEL_H264(avg_,AVG_MMXEXT_OP, mmx2)
|
QPEL_H264(avg_, AVG_MMXEXT_OP, mmxext)
|
||||||
QPEL_H264_V_XMM(put_, PUT_OP, sse2)
|
QPEL_H264_V_XMM(put_, PUT_OP, sse2)
|
||||||
QPEL_H264_V_XMM(avg_,AVG_MMXEXT_OP, sse2)
|
QPEL_H264_V_XMM(avg_,AVG_MMXEXT_OP, sse2)
|
||||||
QPEL_H264_HV_XMM(put_, PUT_OP, sse2)
|
QPEL_H264_HV_XMM(put_, PUT_OP, sse2)
|
||||||
@ -1185,7 +1185,7 @@ QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3)
|
|||||||
#undef PAVGB
|
#undef PAVGB
|
||||||
|
|
||||||
H264_MC_4816(3dnow)
|
H264_MC_4816(3dnow)
|
||||||
H264_MC_4816(mmx2)
|
H264_MC_4816(mmxext)
|
||||||
H264_MC_816(H264_MC_V, sse2)
|
H264_MC_816(H264_MC_V, sse2)
|
||||||
H264_MC_816(H264_MC_HV, sse2)
|
H264_MC_816(H264_MC_HV, sse2)
|
||||||
#if HAVE_SSSE3_INLINE
|
#if HAVE_SSSE3_INLINE
|
||||||
|
@ -130,18 +130,17 @@ LF_FUNCS(uint16_t, 10)
|
|||||||
|
|
||||||
#if ARCH_X86_32
|
#if ARCH_X86_32
|
||||||
LF_FUNC(v8, luma, 8, mmx2)
|
LF_FUNC(v8, luma, 8, mmx2)
|
||||||
static void ff_deblock_v_luma_8_mmx2(uint8_t *pix, int stride, int alpha,
|
static void ff_deblock_v_luma_8_mmxext(uint8_t *pix, int stride, int alpha,
|
||||||
int beta, int8_t *tc0)
|
int beta, int8_t *tc0)
|
||||||
{
|
{
|
||||||
if ((tc0[0] & tc0[1]) >= 0)
|
if ((tc0[0] & tc0[1]) >= 0)
|
||||||
ff_deblock_v8_luma_8_mmx2(pix + 0, stride, alpha, beta, tc0);
|
ff_deblock_v8_luma_8_mmx2(pix + 0, stride, alpha, beta, tc0);
|
||||||
if ((tc0[2] & tc0[3]) >= 0)
|
if ((tc0[2] & tc0[3]) >= 0)
|
||||||
ff_deblock_v8_luma_8_mmx2(pix + 8, stride, alpha, beta, tc0 + 2);
|
ff_deblock_v8_luma_8_mmx2(pix + 8, stride, alpha, beta, tc0 + 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
LF_IFUNC(v8, luma_intra, 8, mmx2)
|
LF_IFUNC(v8, luma_intra, 8, mmx2)
|
||||||
static void ff_deblock_v_luma_intra_8_mmx2(uint8_t *pix, int stride,
|
static void ff_deblock_v_luma_intra_8_mmxext(uint8_t *pix, int stride,
|
||||||
int alpha, int beta)
|
int alpha, int beta)
|
||||||
{
|
{
|
||||||
ff_deblock_v8_luma_intra_8_mmx2(pix + 0, stride, alpha, beta);
|
ff_deblock_v8_luma_intra_8_mmx2(pix + 0, stride, alpha, beta);
|
||||||
ff_deblock_v8_luma_intra_8_mmx2(pix + 8, stride, alpha, beta);
|
ff_deblock_v8_luma_intra_8_mmx2(pix + 8, stride, alpha, beta);
|
||||||
@ -246,9 +245,9 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
|
|||||||
c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmx2;
|
c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmx2;
|
||||||
}
|
}
|
||||||
#if ARCH_X86_32
|
#if ARCH_X86_32
|
||||||
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_mmx2;
|
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_mmxext;
|
||||||
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_mmx2;
|
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_mmx2;
|
||||||
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmx2;
|
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext;
|
||||||
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmx2;
|
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmx2;
|
||||||
#endif /* ARCH_X86_32 */
|
#endif /* ARCH_X86_32 */
|
||||||
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_mmx2;
|
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_mmx2;
|
||||||
|
@ -512,7 +512,8 @@ __asm__ volatile(
|
|||||||
//-----------------------------------------------------------------------------
|
//-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
void ff_idct_xvid_mmx2(short *block){
|
void ff_idct_xvid_mmxext(short *block)
|
||||||
|
{
|
||||||
__asm__ volatile(
|
__asm__ volatile(
|
||||||
//# Process each row
|
//# Process each row
|
||||||
DCT_8_INV_ROW_XMM(0*16(%0), 0*16(%0), 64*0(%2), 8*0(%1))
|
DCT_8_INV_ROW_XMM(0*16(%0), 0*16(%0), 64*0(%2), 8*0(%1))
|
||||||
@ -542,15 +543,15 @@ void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block)
|
|||||||
ff_add_pixels_clamped_mmx(block, dest, line_size);
|
ff_add_pixels_clamped_mmx(block, dest, line_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ff_idct_xvid_mmx2_put(uint8_t *dest, int line_size, DCTELEM *block)
|
void ff_idct_xvid_mmxext_put(uint8_t *dest, int line_size, DCTELEM *block)
|
||||||
{
|
{
|
||||||
ff_idct_xvid_mmx2(block);
|
ff_idct_xvid_mmxext(block);
|
||||||
ff_put_pixels_clamped_mmx(block, dest, line_size);
|
ff_put_pixels_clamped_mmx(block, dest, line_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block)
|
void ff_idct_xvid_mmxext_add(uint8_t *dest, int line_size, DCTELEM *block)
|
||||||
{
|
{
|
||||||
ff_idct_xvid_mmx2(block);
|
ff_idct_xvid_mmxext(block);
|
||||||
ff_add_pixels_clamped_mmx(block, dest, line_size);
|
ff_add_pixels_clamped_mmx(block, dest, line_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -34,9 +34,9 @@ void ff_idct_xvid_mmx(short *block);
|
|||||||
void ff_idct_xvid_mmx_put(uint8_t *dest, int line_size, DCTELEM *block);
|
void ff_idct_xvid_mmx_put(uint8_t *dest, int line_size, DCTELEM *block);
|
||||||
void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block);
|
void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block);
|
||||||
|
|
||||||
void ff_idct_xvid_mmx2(short *block);
|
void ff_idct_xvid_mmxext(short *block);
|
||||||
void ff_idct_xvid_mmx2_put(uint8_t *dest, int line_size, DCTELEM *block);
|
void ff_idct_xvid_mmxext_put(uint8_t *dest, int line_size, DCTELEM *block);
|
||||||
void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block);
|
void ff_idct_xvid_mmxext_add(uint8_t *dest, int line_size, DCTELEM *block);
|
||||||
|
|
||||||
void ff_idct_xvid_sse2(short *block);
|
void ff_idct_xvid_sse2(short *block);
|
||||||
void ff_idct_xvid_sse2_put(uint8_t *dest, int line_size, short *block);
|
void ff_idct_xvid_sse2_put(uint8_t *dest, int line_size, short *block);
|
||||||
|
@ -74,7 +74,8 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
|
static inline void sad8_1_mmxext(uint8_t *blk1, uint8_t *blk2,
|
||||||
|
int stride, int h)
|
||||||
{
|
{
|
||||||
__asm__ volatile(
|
__asm__ volatile(
|
||||||
".p2align 4 \n\t"
|
".p2align 4 \n\t"
|
||||||
@ -120,7 +121,8 @@ static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
|
static inline void sad8_x2a_mmxext(uint8_t *blk1, uint8_t *blk2,
|
||||||
|
int stride, int h)
|
||||||
{
|
{
|
||||||
__asm__ volatile(
|
__asm__ volatile(
|
||||||
".p2align 4 \n\t"
|
".p2align 4 \n\t"
|
||||||
@ -142,7 +144,8 @@ static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
|
static inline void sad8_y2a_mmxext(uint8_t *blk1, uint8_t *blk2,
|
||||||
|
int stride, int h)
|
||||||
{
|
{
|
||||||
__asm__ volatile(
|
__asm__ volatile(
|
||||||
"movq (%1), %%mm0 \n\t"
|
"movq (%1), %%mm0 \n\t"
|
||||||
@ -167,7 +170,8 @@ static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
|
static inline void sad8_4_mmxext(uint8_t *blk1, uint8_t *blk2,
|
||||||
|
int stride, int h)
|
||||||
{
|
{
|
||||||
__asm__ volatile(
|
__asm__ volatile(
|
||||||
"movq "MANGLE(bone)", %%mm5 \n\t"
|
"movq "MANGLE(bone)", %%mm5 \n\t"
|
||||||
@ -304,7 +308,7 @@ static inline int sum_mmx(void)
|
|||||||
return ret&0xFFFF;
|
return ret&0xFFFF;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int sum_mmx2(void)
|
static inline int sum_mmxext(void)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
__asm__ volatile(
|
__asm__ volatile(
|
||||||
@ -424,7 +428,7 @@ static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride,
|
|||||||
}\
|
}\
|
||||||
|
|
||||||
PIX_SAD(mmx)
|
PIX_SAD(mmx)
|
||||||
PIX_SAD(mmx2)
|
PIX_SAD(mmxext)
|
||||||
|
|
||||||
#endif /* HAVE_INLINE_ASM */
|
#endif /* HAVE_INLINE_ASM */
|
||||||
|
|
||||||
@ -447,19 +451,19 @@ void ff_dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx)
|
|||||||
c->sad[1]= sad8_mmx;
|
c->sad[1]= sad8_mmx;
|
||||||
}
|
}
|
||||||
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
c->pix_abs[0][0] = sad16_mmx2;
|
c->pix_abs[0][0] = sad16_mmxext;
|
||||||
c->pix_abs[1][0] = sad8_mmx2;
|
c->pix_abs[1][0] = sad8_mmxext;
|
||||||
|
|
||||||
c->sad[0]= sad16_mmx2;
|
c->sad[0] = sad16_mmxext;
|
||||||
c->sad[1]= sad8_mmx2;
|
c->sad[1] = sad8_mmxext;
|
||||||
|
|
||||||
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
|
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
|
||||||
c->pix_abs[0][1] = sad16_x2_mmx2;
|
c->pix_abs[0][1] = sad16_x2_mmxext;
|
||||||
c->pix_abs[0][2] = sad16_y2_mmx2;
|
c->pix_abs[0][2] = sad16_y2_mmxext;
|
||||||
c->pix_abs[0][3] = sad16_xy2_mmx2;
|
c->pix_abs[0][3] = sad16_xy2_mmxext;
|
||||||
c->pix_abs[1][1] = sad8_x2_mmx2;
|
c->pix_abs[1][1] = sad8_x2_mmxext;
|
||||||
c->pix_abs[1][2] = sad8_y2_mmx2;
|
c->pix_abs[1][2] = sad8_y2_mmxext;
|
||||||
c->pix_abs[1][3] = sad8_xy2_mmx2;
|
c->pix_abs[1][3] = sad8_xy2_mmxext;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW) && avctx->codec_id != AV_CODEC_ID_SNOW) {
|
if ((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW) && avctx->codec_id != AV_CODEC_ID_SNOW) {
|
||||||
|
@ -47,8 +47,8 @@ extern uint16_t ff_inv_zigzag_direct16[64];
|
|||||||
#define COMPILE_TEMPLATE_SSSE3 0
|
#define COMPILE_TEMPLATE_SSSE3 0
|
||||||
#undef RENAME
|
#undef RENAME
|
||||||
#undef RENAMEl
|
#undef RENAMEl
|
||||||
#define RENAME(a) a ## _MMX2
|
#define RENAME(a) a ## _MMXEXT
|
||||||
#define RENAMEl(a) a ## _mmx2
|
#define RENAMEl(a) a ## _mmxext
|
||||||
#include "mpegvideoenc_template.c"
|
#include "mpegvideoenc_template.c"
|
||||||
#endif /* HAVE_MMXEXT_INLINE */
|
#endif /* HAVE_MMXEXT_INLINE */
|
||||||
|
|
||||||
@ -92,7 +92,7 @@ void ff_MPV_encode_init_x86(MpegEncContext *s)
|
|||||||
#endif
|
#endif
|
||||||
#if HAVE_MMXEXT_INLINE
|
#if HAVE_MMXEXT_INLINE
|
||||||
if (INLINE_MMXEXT(mm_flags))
|
if (INLINE_MMXEXT(mm_flags))
|
||||||
s->dct_quantize = dct_quantize_MMX2;
|
s->dct_quantize = dct_quantize_MMXEXT;
|
||||||
#endif
|
#endif
|
||||||
#if HAVE_SSE2_INLINE
|
#if HAVE_SSE2_INLINE
|
||||||
if (INLINE_SSE2(mm_flags))
|
if (INLINE_SSE2(mm_flags))
|
||||||
|
@ -467,7 +467,10 @@ VC1_MSPEL_MC(avg_)
|
|||||||
static void put_vc1_mspel_mc ## a ## b ## _mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
|
static void put_vc1_mspel_mc ## a ## b ## _mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
|
||||||
put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
|
put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
|
||||||
}\
|
}\
|
||||||
static void avg_vc1_mspel_mc ## a ## b ## _mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
|
static void avg_vc1_mspel_mc ## a ## b ## _mmxext(uint8_t *dst, \
|
||||||
|
const uint8_t *src, \
|
||||||
|
int stride, int rnd) \
|
||||||
|
{ \
|
||||||
avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
|
avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -490,7 +493,8 @@ DECLARE_FUNCTION(3, 1)
|
|||||||
DECLARE_FUNCTION(3, 2)
|
DECLARE_FUNCTION(3, 2)
|
||||||
DECLARE_FUNCTION(3, 3)
|
DECLARE_FUNCTION(3, 3)
|
||||||
|
|
||||||
static void vc1_inv_trans_4x4_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *block)
|
static void vc1_inv_trans_4x4_dc_mmxext(uint8_t *dest, int linesize,
|
||||||
|
DCTELEM *block)
|
||||||
{
|
{
|
||||||
int dc = block[0];
|
int dc = block[0];
|
||||||
dc = (17 * dc + 4) >> 3;
|
dc = (17 * dc + 4) >> 3;
|
||||||
@ -528,7 +532,8 @@ static void vc1_inv_trans_4x4_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *bloc
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vc1_inv_trans_4x8_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *block)
|
static void vc1_inv_trans_4x8_dc_mmxext(uint8_t *dest, int linesize,
|
||||||
|
DCTELEM *block)
|
||||||
{
|
{
|
||||||
int dc = block[0];
|
int dc = block[0];
|
||||||
dc = (17 * dc + 4) >> 3;
|
dc = (17 * dc + 4) >> 3;
|
||||||
@ -589,7 +594,8 @@ static void vc1_inv_trans_4x8_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *bloc
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vc1_inv_trans_8x4_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *block)
|
static void vc1_inv_trans_8x4_dc_mmxext(uint8_t *dest, int linesize,
|
||||||
|
DCTELEM *block)
|
||||||
{
|
{
|
||||||
int dc = block[0];
|
int dc = block[0];
|
||||||
dc = ( 3 * dc + 1) >> 1;
|
dc = ( 3 * dc + 1) >> 1;
|
||||||
@ -627,7 +633,8 @@ static void vc1_inv_trans_8x4_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *bloc
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vc1_inv_trans_8x8_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *block)
|
static void vc1_inv_trans_8x8_dc_mmxext(uint8_t *dest, int linesize,
|
||||||
|
DCTELEM *block)
|
||||||
{
|
{
|
||||||
int dc = block[0];
|
int dc = block[0];
|
||||||
dc = (3 * dc + 1) >> 1;
|
dc = (3 * dc + 1) >> 1;
|
||||||
@ -713,29 +720,29 @@ av_cold void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
|
|||||||
|
|
||||||
av_cold void ff_vc1dsp_init_mmxext(VC1DSPContext *dsp)
|
av_cold void ff_vc1dsp_init_mmxext(VC1DSPContext *dsp)
|
||||||
{
|
{
|
||||||
dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmx2;
|
dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmxext;
|
||||||
dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_mmx2;
|
dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_mmxext;
|
||||||
dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_mmx2;
|
dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_mmxext;
|
||||||
dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_mmx2;
|
dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_mmxext;
|
||||||
|
|
||||||
dsp->avg_vc1_mspel_pixels_tab[ 1] = avg_vc1_mspel_mc10_mmx2;
|
dsp->avg_vc1_mspel_pixels_tab[ 1] = avg_vc1_mspel_mc10_mmxext;
|
||||||
dsp->avg_vc1_mspel_pixels_tab[ 5] = avg_vc1_mspel_mc11_mmx2;
|
dsp->avg_vc1_mspel_pixels_tab[ 5] = avg_vc1_mspel_mc11_mmxext;
|
||||||
dsp->avg_vc1_mspel_pixels_tab[ 9] = avg_vc1_mspel_mc12_mmx2;
|
dsp->avg_vc1_mspel_pixels_tab[ 9] = avg_vc1_mspel_mc12_mmxext;
|
||||||
dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_mmx2;
|
dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_mmxext;
|
||||||
|
|
||||||
dsp->avg_vc1_mspel_pixels_tab[ 2] = avg_vc1_mspel_mc20_mmx2;
|
dsp->avg_vc1_mspel_pixels_tab[ 2] = avg_vc1_mspel_mc20_mmxext;
|
||||||
dsp->avg_vc1_mspel_pixels_tab[ 6] = avg_vc1_mspel_mc21_mmx2;
|
dsp->avg_vc1_mspel_pixels_tab[ 6] = avg_vc1_mspel_mc21_mmxext;
|
||||||
dsp->avg_vc1_mspel_pixels_tab[10] = avg_vc1_mspel_mc22_mmx2;
|
dsp->avg_vc1_mspel_pixels_tab[10] = avg_vc1_mspel_mc22_mmxext;
|
||||||
dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_mmx2;
|
dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_mmxext;
|
||||||
|
|
||||||
dsp->avg_vc1_mspel_pixels_tab[ 3] = avg_vc1_mspel_mc30_mmx2;
|
dsp->avg_vc1_mspel_pixels_tab[ 3] = avg_vc1_mspel_mc30_mmxext;
|
||||||
dsp->avg_vc1_mspel_pixels_tab[ 7] = avg_vc1_mspel_mc31_mmx2;
|
dsp->avg_vc1_mspel_pixels_tab[ 7] = avg_vc1_mspel_mc31_mmxext;
|
||||||
dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_mmx2;
|
dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_mmxext;
|
||||||
dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_mmx2;
|
dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_mmxext;
|
||||||
|
|
||||||
dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_mmx2;
|
dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_mmxext;
|
||||||
dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_mmx2;
|
dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_mmxext;
|
||||||
dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_mmx2;
|
dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_mmxext;
|
||||||
dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_mmx2;
|
dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_mmxext;
|
||||||
}
|
}
|
||||||
#endif /* HAVE_INLINE_ASM */
|
#endif /* HAVE_INLINE_ASM */
|
||||||
|
@ -30,7 +30,9 @@ DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] = {0x7F,0x7F,0x7F,0x7F,0x7F
|
|||||||
DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
|
DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
|
||||||
|
|
||||||
#if HAVE_MMXEXT_INLINE
|
#if HAVE_MMXEXT_INLINE
|
||||||
static void gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers)
|
static void gradfun_filter_line_mmxext(uint8_t *dst, uint8_t *src, uint16_t *dc,
|
||||||
|
int width, int thresh,
|
||||||
|
const uint16_t *dithers)
|
||||||
{
|
{
|
||||||
intptr_t x;
|
intptr_t x;
|
||||||
if (width & 3) {
|
if (width & 3) {
|
||||||
@ -175,7 +177,7 @@ av_cold void ff_gradfun_init_x86(GradFunContext *gf)
|
|||||||
|
|
||||||
#if HAVE_MMXEXT_INLINE
|
#if HAVE_MMXEXT_INLINE
|
||||||
if (cpu_flags & AV_CPU_FLAG_MMXEXT)
|
if (cpu_flags & AV_CPU_FLAG_MMXEXT)
|
||||||
gf->filter_line = gradfun_filter_line_mmx2;
|
gf->filter_line = gradfun_filter_line_mmxext;
|
||||||
#endif
|
#endif
|
||||||
#if HAVE_SSSE3_INLINE
|
#if HAVE_SSSE3_INLINE
|
||||||
if (cpu_flags & AV_CPU_FLAG_SSSE3)
|
if (cpu_flags & AV_CPU_FLAG_SSSE3)
|
||||||
|
@ -49,7 +49,7 @@ DECLARE_ASM_CONST(16, const xmm_reg, pw_1) = {0x0001000100010001ULL, 0x000100010
|
|||||||
|
|
||||||
#if HAVE_MMXEXT_INLINE
|
#if HAVE_MMXEXT_INLINE
|
||||||
#undef RENAME
|
#undef RENAME
|
||||||
#define RENAME(a) a ## _mmx2
|
#define RENAME(a) a ## _mmxext
|
||||||
#include "yadif_template.c"
|
#include "yadif_template.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -61,7 +61,7 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif)
|
|||||||
|
|
||||||
#if HAVE_MMXEXT_INLINE
|
#if HAVE_MMXEXT_INLINE
|
||||||
if (cpu_flags & AV_CPU_FLAG_MMXEXT)
|
if (cpu_flags & AV_CPU_FLAG_MMXEXT)
|
||||||
yadif->filter_line = yadif_filter_line_mmx2;
|
yadif->filter_line = yadif_filter_line_mmxext;
|
||||||
#endif
|
#endif
|
||||||
#if HAVE_SSE2_INLINE
|
#if HAVE_SSE2_INLINE
|
||||||
if (cpu_flags & AV_CPU_FLAG_SSE2)
|
if (cpu_flags & AV_CPU_FLAG_SSE2)
|
||||||
|
@ -600,8 +600,9 @@ fail:
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if HAVE_MMXEXT_INLINE
|
#if HAVE_MMXEXT_INLINE
|
||||||
static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode,
|
static int init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode,
|
||||||
int16_t *filter, int32_t *filterPos, int numSplits)
|
int16_t *filter, int32_t *filterPos,
|
||||||
|
int numSplits)
|
||||||
{
|
{
|
||||||
uint8_t *fragmentA;
|
uint8_t *fragmentA;
|
||||||
x86_reg imm8OfPShufW1A;
|
x86_reg imm8OfPShufW1A;
|
||||||
@ -1043,10 +1044,10 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
|
|||||||
#if HAVE_MMXEXT_INLINE
|
#if HAVE_MMXEXT_INLINE
|
||||||
// can't downscale !!!
|
// can't downscale !!!
|
||||||
if (c->canMMXEXTBeUsed && (flags & SWS_FAST_BILINEAR)) {
|
if (c->canMMXEXTBeUsed && (flags & SWS_FAST_BILINEAR)) {
|
||||||
c->lumMmxextFilterCodeSize = initMMX2HScaler(dstW, c->lumXInc, NULL,
|
c->lumMmxextFilterCodeSize = init_hscaler_mmxext(dstW, c->lumXInc, NULL,
|
||||||
NULL, NULL, 8);
|
NULL, NULL, 8);
|
||||||
c->chrMmxextFilterCodeSize = initMMX2HScaler(c->chrDstW, c->chrXInc,
|
c->chrMmxextFilterCodeSize = init_hscaler_mmxext(c->chrDstW, c->chrXInc,
|
||||||
NULL, NULL, NULL, 4);
|
NULL, NULL, NULL, 4);
|
||||||
|
|
||||||
#if USE_MMAP
|
#if USE_MMAP
|
||||||
c->lumMmxextFilterCode = mmap(NULL, c->lumMmxextFilterCodeSize,
|
c->lumMmxextFilterCode = mmap(NULL, c->lumMmxextFilterCodeSize,
|
||||||
@ -1078,10 +1079,10 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
|
|||||||
FF_ALLOCZ_OR_GOTO(c, c->hLumFilterPos, (dstW / 2 / 8 + 8) * sizeof(int32_t), fail);
|
FF_ALLOCZ_OR_GOTO(c, c->hLumFilterPos, (dstW / 2 / 8 + 8) * sizeof(int32_t), fail);
|
||||||
FF_ALLOCZ_OR_GOTO(c, c->hChrFilterPos, (c->chrDstW / 2 / 4 + 8) * sizeof(int32_t), fail);
|
FF_ALLOCZ_OR_GOTO(c, c->hChrFilterPos, (c->chrDstW / 2 / 4 + 8) * sizeof(int32_t), fail);
|
||||||
|
|
||||||
initMMX2HScaler(dstW, c->lumXInc, c->lumMmxextFilterCode,
|
init_hscaler_mmxext(dstW, c->lumXInc, c->lumMmxextFilterCode,
|
||||||
c->hLumFilter, c->hLumFilterPos, 8);
|
c->hLumFilter, c->hLumFilterPos, 8);
|
||||||
initMMX2HScaler(c->chrDstW, c->chrXInc, c->chrMmxextFilterCode,
|
init_hscaler_mmxext(c->chrDstW, c->chrXInc, c->chrMmxextFilterCode,
|
||||||
c->hChrFilter, c->hChrFilterPos, 4);
|
c->hChrFilter, c->hChrFilterPos, 4);
|
||||||
|
|
||||||
#if USE_MMAP
|
#if USE_MMAP
|
||||||
mprotect(c->lumMmxextFilterCode, c->lumMmxextFilterCodeSize, PROT_EXEC | PROT_READ);
|
mprotect(c->lumMmxextFilterCode, c->lumMmxextFilterCodeSize, PROT_EXEC | PROT_READ);
|
||||||
|
@ -99,7 +99,7 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL;
|
|||||||
#undef RENAME
|
#undef RENAME
|
||||||
#undef COMPILE_TEMPLATE_MMXEXT
|
#undef COMPILE_TEMPLATE_MMXEXT
|
||||||
#define COMPILE_TEMPLATE_MMXEXT 1
|
#define COMPILE_TEMPLATE_MMXEXT 1
|
||||||
#define RENAME(a) a ## _MMX2
|
#define RENAME(a) a ## _MMXEXT
|
||||||
#include "rgb2rgb_template.c"
|
#include "rgb2rgb_template.c"
|
||||||
|
|
||||||
//SSE2 versions
|
//SSE2 versions
|
||||||
@ -139,7 +139,7 @@ av_cold void rgb2rgb_init_x86(void)
|
|||||||
if (INLINE_AMD3DNOW(cpu_flags))
|
if (INLINE_AMD3DNOW(cpu_flags))
|
||||||
rgb2rgb_init_3DNOW();
|
rgb2rgb_init_3DNOW();
|
||||||
if (INLINE_MMXEXT(cpu_flags))
|
if (INLINE_MMXEXT(cpu_flags))
|
||||||
rgb2rgb_init_MMX2();
|
rgb2rgb_init_MMXEXT();
|
||||||
if (INLINE_SSE2(cpu_flags))
|
if (INLINE_SSE2(cpu_flags))
|
||||||
rgb2rgb_init_SSE2();
|
rgb2rgb_init_SSE2();
|
||||||
#endif /* HAVE_INLINE_ASM */
|
#endif /* HAVE_INLINE_ASM */
|
||||||
|
@ -83,7 +83,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
|
|||||||
#undef RENAME
|
#undef RENAME
|
||||||
#undef COMPILE_TEMPLATE_MMXEXT
|
#undef COMPILE_TEMPLATE_MMXEXT
|
||||||
#define COMPILE_TEMPLATE_MMXEXT 1
|
#define COMPILE_TEMPLATE_MMXEXT 1
|
||||||
#define RENAME(a) a ## _MMX2
|
#define RENAME(a) a ## _MMXEXT
|
||||||
#include "swscale_template.c"
|
#include "swscale_template.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -311,7 +311,7 @@ av_cold void ff_sws_init_swScale_mmx(SwsContext *c)
|
|||||||
sws_init_swScale_MMX(c);
|
sws_init_swScale_MMX(c);
|
||||||
#if HAVE_MMXEXT_INLINE
|
#if HAVE_MMXEXT_INLINE
|
||||||
if (cpu_flags & AV_CPU_FLAG_MMXEXT)
|
if (cpu_flags & AV_CPU_FLAG_MMXEXT)
|
||||||
sws_init_swScale_MMX2(c);
|
sws_init_swScale_MMXEXT(c);
|
||||||
#endif
|
#endif
|
||||||
#endif /* HAVE_INLINE_ASM */
|
#endif /* HAVE_INLINE_ASM */
|
||||||
|
|
||||||
|
@ -63,7 +63,7 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
|
|||||||
#undef RENAME
|
#undef RENAME
|
||||||
#undef COMPILE_TEMPLATE_MMXEXT
|
#undef COMPILE_TEMPLATE_MMXEXT
|
||||||
#define COMPILE_TEMPLATE_MMXEXT 1
|
#define COMPILE_TEMPLATE_MMXEXT 1
|
||||||
#define RENAME(a) a ## _MMX2
|
#define RENAME(a) a ## _MMXEXT
|
||||||
#include "yuv2rgb_template.c"
|
#include "yuv2rgb_template.c"
|
||||||
#endif /* HAVE_MMXEXT_INLINE */
|
#endif /* HAVE_MMXEXT_INLINE */
|
||||||
|
|
||||||
@ -81,8 +81,10 @@ av_cold SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
|
|||||||
#if HAVE_MMXEXT_INLINE
|
#if HAVE_MMXEXT_INLINE
|
||||||
if (cpu_flags & AV_CPU_FLAG_MMXEXT) {
|
if (cpu_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
switch (c->dstFormat) {
|
switch (c->dstFormat) {
|
||||||
case AV_PIX_FMT_RGB24: return yuv420_rgb24_MMX2;
|
case AV_PIX_FMT_RGB24:
|
||||||
case AV_PIX_FMT_BGR24: return yuv420_bgr24_MMX2;
|
return yuv420_rgb24_MMXEXT;
|
||||||
|
case AV_PIX_FMT_BGR24:
|
||||||
|
return yuv420_bgr24_MMXEXT;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user