mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
Merge remote-tracking branch 'qatar/master'
* qatar/master: Consistently use "cpu_flags" as variable/parameter name for CPU flags Conflicts: libavcodec/x86/dsputil_init.c libavcodec/x86/h264dsp_init.c libavcodec/x86/hpeldsp_init.c libavcodec/x86/motion_est.c libavcodec/x86/mpegvideo.c libavcodec/x86/proresdsp_init.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
commit
9d01bf7d66
@ -449,9 +449,7 @@ static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdi
|
|||||||
av_cold void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags)
|
av_cold void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags)
|
||||||
{
|
{
|
||||||
#if HAVE_ALTIVEC
|
#if HAVE_ALTIVEC
|
||||||
int mm_flags = av_get_cpu_flags();
|
if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_ALTIVEC) {
|
|
||||||
c->avg_pixels_tab[0][0] = ff_avg_pixels16_altivec;
|
c->avg_pixels_tab[0][0] = ff_avg_pixels16_altivec;
|
||||||
c->avg_pixels_tab[1][0] = avg_pixels8_altivec;
|
c->avg_pixels_tab[1][0] = avg_pixels8_altivec;
|
||||||
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec;
|
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec;
|
||||||
|
@ -185,46 +185,46 @@ static void ac3_downmix_sse(float **samples, float (*matrix)[2],
|
|||||||
|
|
||||||
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
|
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
|
||||||
{
|
{
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (EXTERNAL_MMX(mm_flags)) {
|
if (EXTERNAL_MMX(cpu_flags)) {
|
||||||
c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
|
c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
|
||||||
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
|
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
|
||||||
c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx;
|
c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx;
|
||||||
c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
|
c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_AMD3DNOW(mm_flags)) {
|
if (EXTERNAL_AMD3DNOW(cpu_flags)) {
|
||||||
if (!bit_exact) {
|
if (!bit_exact) {
|
||||||
c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
|
c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (EXTERNAL_MMXEXT(mm_flags)) {
|
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||||
c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
|
c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
|
||||||
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext;
|
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSE(mm_flags)) {
|
if (EXTERNAL_SSE(cpu_flags)) {
|
||||||
c->float_to_fixed24 = ff_float_to_fixed24_sse;
|
c->float_to_fixed24 = ff_float_to_fixed24_sse;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSE2(mm_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
|
c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
|
||||||
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
|
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
|
||||||
c->float_to_fixed24 = ff_float_to_fixed24_sse2;
|
c->float_to_fixed24 = ff_float_to_fixed24_sse2;
|
||||||
c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
|
c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
|
||||||
c->extract_exponents = ff_ac3_extract_exponents_sse2;
|
c->extract_exponents = ff_ac3_extract_exponents_sse2;
|
||||||
if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
|
if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW)) {
|
||||||
c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
|
c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
|
||||||
c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
|
c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSSE3(mm_flags)) {
|
if (EXTERNAL_SSSE3(cpu_flags)) {
|
||||||
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
|
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
|
||||||
if (!(mm_flags & AV_CPU_FLAG_ATOM)) {
|
if (!(cpu_flags & AV_CPU_FLAG_ATOM)) {
|
||||||
c->extract_exponents = ff_ac3_extract_exponents_ssse3;
|
c->extract_exponents = ff_ac3_extract_exponents_ssse3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if HAVE_SSE_INLINE && HAVE_7REGS
|
#if HAVE_SSE_INLINE && HAVE_7REGS
|
||||||
if (INLINE_SSE(mm_flags)) {
|
if (INLINE_SSE(cpu_flags)) {
|
||||||
c->downmix = ac3_downmix_sse;
|
c->downmix = ac3_downmix_sse;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -542,17 +542,17 @@ static av_cold void cavsdsp_init_3dnow(CAVSDSPContext *c,
|
|||||||
av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx)
|
av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx)
|
||||||
{
|
{
|
||||||
#if HAVE_MMX_INLINE
|
#if HAVE_MMX_INLINE
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_MMX)
|
if (cpu_flags & AV_CPU_FLAG_MMX)
|
||||||
cavsdsp_init_mmx(c, avctx);
|
cavsdsp_init_mmx(c, avctx);
|
||||||
#endif /* HAVE_MMX_INLINE */
|
#endif /* HAVE_MMX_INLINE */
|
||||||
#if HAVE_MMXEXT_INLINE
|
#if HAVE_MMXEXT_INLINE
|
||||||
if (mm_flags & AV_CPU_FLAG_MMXEXT)
|
if (cpu_flags & AV_CPU_FLAG_MMXEXT)
|
||||||
cavsdsp_init_mmxext(c, avctx);
|
cavsdsp_init_mmxext(c, avctx);
|
||||||
#endif /* HAVE_MMXEXT_INLINE */
|
#endif /* HAVE_MMXEXT_INLINE */
|
||||||
#if HAVE_AMD3DNOW_INLINE
|
#if HAVE_AMD3DNOW_INLINE
|
||||||
if (mm_flags & AV_CPU_FLAG_3DNOW)
|
if (cpu_flags & AV_CPU_FLAG_3DNOW)
|
||||||
cavsdsp_init_3dnow(c, avctx);
|
cavsdsp_init_3dnow(c, avctx);
|
||||||
#endif /* HAVE_AMD3DNOW_INLINE */
|
#endif /* HAVE_AMD3DNOW_INLINE */
|
||||||
}
|
}
|
||||||
|
@ -532,7 +532,7 @@ QPEL_OP(put_no_rnd_, _no_rnd_, mmxext)
|
|||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
|
static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
|
||||||
int mm_flags)
|
int cpu_flags)
|
||||||
{
|
{
|
||||||
#if HAVE_MMX_INLINE
|
#if HAVE_MMX_INLINE
|
||||||
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
|
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
|
||||||
@ -565,7 +565,7 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
|
static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
|
||||||
int mm_flags)
|
int cpu_flags)
|
||||||
{
|
{
|
||||||
#if HAVE_MMXEXT_EXTERNAL
|
#if HAVE_MMXEXT_EXTERNAL
|
||||||
SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, );
|
SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, );
|
||||||
@ -577,7 +577,7 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
|
|||||||
SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, );
|
SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, );
|
||||||
|
|
||||||
/* slower than cmov version on AMD */
|
/* slower than cmov version on AMD */
|
||||||
if (!(mm_flags & AV_CPU_FLAG_3DNOW))
|
if (!(cpu_flags & AV_CPU_FLAG_3DNOW))
|
||||||
c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmxext;
|
c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmxext;
|
||||||
|
|
||||||
c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext;
|
c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext;
|
||||||
@ -592,7 +592,7 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx,
|
static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx,
|
||||||
int mm_flags)
|
int cpu_flags)
|
||||||
{
|
{
|
||||||
#if HAVE_SSE_INLINE
|
#if HAVE_SSE_INLINE
|
||||||
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
|
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
|
||||||
@ -616,7 +616,7 @@ static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
|
static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
|
||||||
int mm_flags)
|
int cpu_flags)
|
||||||
{
|
{
|
||||||
#if HAVE_SSE2_INLINE
|
#if HAVE_SSE2_INLINE
|
||||||
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
|
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
|
||||||
@ -632,14 +632,14 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
|
|||||||
#if HAVE_SSE2_EXTERNAL
|
#if HAVE_SSE2_EXTERNAL
|
||||||
c->scalarproduct_int16 = ff_scalarproduct_int16_sse2;
|
c->scalarproduct_int16 = ff_scalarproduct_int16_sse2;
|
||||||
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2;
|
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2;
|
||||||
if (mm_flags & AV_CPU_FLAG_ATOM) {
|
if (cpu_flags & AV_CPU_FLAG_ATOM) {
|
||||||
c->vector_clip_int32 = ff_vector_clip_int32_int_sse2;
|
c->vector_clip_int32 = ff_vector_clip_int32_int_sse2;
|
||||||
} else {
|
} else {
|
||||||
c->vector_clip_int32 = ff_vector_clip_int32_sse2;
|
c->vector_clip_int32 = ff_vector_clip_int32_sse2;
|
||||||
}
|
}
|
||||||
if (avctx->flags & CODEC_FLAG_BITEXACT) {
|
if (avctx->flags & CODEC_FLAG_BITEXACT) {
|
||||||
c->apply_window_int16 = ff_apply_window_int16_sse2;
|
c->apply_window_int16 = ff_apply_window_int16_sse2;
|
||||||
} else if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
|
} else if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW)) {
|
||||||
c->apply_window_int16 = ff_apply_window_int16_round_sse2;
|
c->apply_window_int16 = ff_apply_window_int16_round_sse2;
|
||||||
}
|
}
|
||||||
c->bswap_buf = ff_bswap32_buf_sse2;
|
c->bswap_buf = ff_bswap32_buf_sse2;
|
||||||
@ -647,25 +647,25 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
|
static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
|
||||||
int mm_flags)
|
int cpu_flags)
|
||||||
{
|
{
|
||||||
#if HAVE_SSSE3_EXTERNAL
|
#if HAVE_SSSE3_EXTERNAL
|
||||||
c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3;
|
c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3;
|
||||||
if (mm_flags & AV_CPU_FLAG_SSE4) // not really sse4, just slow on Conroe
|
if (cpu_flags & AV_CPU_FLAG_SSE4) // not really SSE4, just slow on Conroe
|
||||||
c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4;
|
c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4;
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_ATOM)
|
if (cpu_flags & AV_CPU_FLAG_ATOM)
|
||||||
c->apply_window_int16 = ff_apply_window_int16_ssse3_atom;
|
c->apply_window_int16 = ff_apply_window_int16_ssse3_atom;
|
||||||
else
|
else
|
||||||
c->apply_window_int16 = ff_apply_window_int16_ssse3;
|
c->apply_window_int16 = ff_apply_window_int16_ssse3;
|
||||||
if (!(mm_flags & (AV_CPU_FLAG_SSE42|AV_CPU_FLAG_3DNOW))) // cachesplit
|
if (!(cpu_flags & (AV_CPU_FLAG_SSE42 | AV_CPU_FLAG_3DNOW))) // cachesplit
|
||||||
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3;
|
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3;
|
||||||
c->bswap_buf = ff_bswap32_buf_ssse3;
|
c->bswap_buf = ff_bswap32_buf_ssse3;
|
||||||
#endif /* HAVE_SSSE3_EXTERNAL */
|
#endif /* HAVE_SSSE3_EXTERNAL */
|
||||||
}
|
}
|
||||||
|
|
||||||
static av_cold void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx,
|
static av_cold void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx,
|
||||||
int mm_flags)
|
int cpu_flags)
|
||||||
{
|
{
|
||||||
#if HAVE_SSE4_EXTERNAL
|
#if HAVE_SSE4_EXTERNAL
|
||||||
c->vector_clip_int32 = ff_vector_clip_int32_sse4;
|
c->vector_clip_int32 = ff_vector_clip_int32_sse4;
|
||||||
@ -674,14 +674,14 @@ static av_cold void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx,
|
|||||||
|
|
||||||
av_cold void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
|
av_cold void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
|
||||||
{
|
{
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
#if HAVE_7REGS && HAVE_INLINE_ASM
|
#if HAVE_7REGS && HAVE_INLINE_ASM
|
||||||
if (mm_flags & AV_CPU_FLAG_CMOV)
|
if (cpu_flags & AV_CPU_FLAG_CMOV)
|
||||||
c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_cmov;
|
c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_cmov;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_MMX) {
|
if (cpu_flags & AV_CPU_FLAG_MMX) {
|
||||||
#if HAVE_INLINE_ASM
|
#if HAVE_INLINE_ASM
|
||||||
const int idct_algo = avctx->idct_algo;
|
const int idct_algo = avctx->idct_algo;
|
||||||
|
|
||||||
@ -692,12 +692,12 @@ av_cold void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
|
|||||||
c->idct = ff_simple_idct_mmx;
|
c->idct = ff_simple_idct_mmx;
|
||||||
c->idct_permutation_type = FF_SIMPLE_IDCT_PERM;
|
c->idct_permutation_type = FF_SIMPLE_IDCT_PERM;
|
||||||
} else if (idct_algo == FF_IDCT_XVIDMMX) {
|
} else if (idct_algo == FF_IDCT_XVIDMMX) {
|
||||||
if (mm_flags & AV_CPU_FLAG_SSE2) {
|
if (cpu_flags & AV_CPU_FLAG_SSE2) {
|
||||||
c->idct_put = ff_idct_xvid_sse2_put;
|
c->idct_put = ff_idct_xvid_sse2_put;
|
||||||
c->idct_add = ff_idct_xvid_sse2_add;
|
c->idct_add = ff_idct_xvid_sse2_add;
|
||||||
c->idct = ff_idct_xvid_sse2;
|
c->idct = ff_idct_xvid_sse2;
|
||||||
c->idct_permutation_type = FF_SSE2_IDCT_PERM;
|
c->idct_permutation_type = FF_SSE2_IDCT_PERM;
|
||||||
} else if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
} else if (cpu_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
c->idct_put = ff_idct_xvid_mmxext_put;
|
c->idct_put = ff_idct_xvid_mmxext_put;
|
||||||
c->idct_add = ff_idct_xvid_mmxext_add;
|
c->idct_add = ff_idct_xvid_mmxext_add;
|
||||||
c->idct = ff_idct_xvid_mmxext;
|
c->idct = ff_idct_xvid_mmxext;
|
||||||
@ -710,23 +710,23 @@ av_cold void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
|
|||||||
}
|
}
|
||||||
#endif /* HAVE_INLINE_ASM */
|
#endif /* HAVE_INLINE_ASM */
|
||||||
|
|
||||||
dsputil_init_mmx(c, avctx, mm_flags);
|
dsputil_init_mmx(c, avctx, cpu_flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_MMXEXT)
|
if (cpu_flags & AV_CPU_FLAG_MMXEXT)
|
||||||
dsputil_init_mmxext(c, avctx, mm_flags);
|
dsputil_init_mmxext(c, avctx, cpu_flags);
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_SSE)
|
if (cpu_flags & AV_CPU_FLAG_SSE)
|
||||||
dsputil_init_sse(c, avctx, mm_flags);
|
dsputil_init_sse(c, avctx, cpu_flags);
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_SSE2)
|
if (cpu_flags & AV_CPU_FLAG_SSE2)
|
||||||
dsputil_init_sse2(c, avctx, mm_flags);
|
dsputil_init_sse2(c, avctx, cpu_flags);
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_SSSE3)
|
if (cpu_flags & AV_CPU_FLAG_SSSE3)
|
||||||
dsputil_init_ssse3(c, avctx, mm_flags);
|
dsputil_init_ssse3(c, avctx, cpu_flags);
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_SSE4)
|
if (cpu_flags & AV_CPU_FLAG_SSE4)
|
||||||
dsputil_init_sse4(c, avctx, mm_flags);
|
dsputil_init_sse4(c, avctx, cpu_flags);
|
||||||
|
|
||||||
if (CONFIG_ENCODERS)
|
if (CONFIG_ENCODERS)
|
||||||
ff_dsputilenc_init_mmx(c, avctx);
|
ff_dsputilenc_init_mmx(c, avctx);
|
||||||
|
@ -946,12 +946,12 @@ hadamard_func(ssse3)
|
|||||||
|
|
||||||
av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx)
|
av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx)
|
||||||
{
|
{
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
#if HAVE_YASM
|
#if HAVE_YASM
|
||||||
int bit_depth = avctx->bits_per_raw_sample;
|
int bit_depth = avctx->bits_per_raw_sample;
|
||||||
|
|
||||||
if (EXTERNAL_MMX(mm_flags)) {
|
if (EXTERNAL_MMX(cpu_flags)) {
|
||||||
if (bit_depth <= 8)
|
if (bit_depth <= 8)
|
||||||
c->get_pixels = ff_get_pixels_mmx;
|
c->get_pixels = ff_get_pixels_mmx;
|
||||||
c->diff_pixels = ff_diff_pixels_mmx;
|
c->diff_pixels = ff_diff_pixels_mmx;
|
||||||
@ -959,19 +959,19 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx)
|
|||||||
|
|
||||||
c->pix_norm1 = ff_pix_norm1_mmx;
|
c->pix_norm1 = ff_pix_norm1_mmx;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSE2(mm_flags))
|
if (EXTERNAL_SSE2(cpu_flags))
|
||||||
if (bit_depth <= 8)
|
if (bit_depth <= 8)
|
||||||
c->get_pixels = ff_get_pixels_sse2;
|
c->get_pixels = ff_get_pixels_sse2;
|
||||||
#endif /* HAVE_YASM */
|
#endif /* HAVE_YASM */
|
||||||
|
|
||||||
#if HAVE_INLINE_ASM
|
#if HAVE_INLINE_ASM
|
||||||
if (mm_flags & AV_CPU_FLAG_MMX) {
|
if (cpu_flags & AV_CPU_FLAG_MMX) {
|
||||||
const int dct_algo = avctx->dct_algo;
|
const int dct_algo = avctx->dct_algo;
|
||||||
if (avctx->bits_per_raw_sample <= 8 &&
|
if (avctx->bits_per_raw_sample <= 8 &&
|
||||||
(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX)) {
|
(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX)) {
|
||||||
if(mm_flags & AV_CPU_FLAG_SSE2){
|
if (cpu_flags & AV_CPU_FLAG_SSE2) {
|
||||||
c->fdct = ff_fdct_sse2;
|
c->fdct = ff_fdct_sse2;
|
||||||
} else if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
} else if (cpu_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
c->fdct = ff_fdct_mmxext;
|
c->fdct = ff_fdct_mmxext;
|
||||||
}else{
|
}else{
|
||||||
c->fdct = ff_fdct_mmx;
|
c->fdct = ff_fdct_mmx;
|
||||||
@ -999,7 +999,7 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx)
|
|||||||
|
|
||||||
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
|
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
if (cpu_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
c->sum_abs_dctelem = sum_abs_dctelem_mmxext;
|
c->sum_abs_dctelem = sum_abs_dctelem_mmxext;
|
||||||
c->vsad[4] = vsad_intra16_mmxext;
|
c->vsad[4] = vsad_intra16_mmxext;
|
||||||
|
|
||||||
@ -1010,12 +1010,12 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx)
|
|||||||
c->sub_hfyu_median_prediction = sub_hfyu_median_prediction_mmxext;
|
c->sub_hfyu_median_prediction = sub_hfyu_median_prediction_mmxext;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(mm_flags & AV_CPU_FLAG_SSE2){
|
if (cpu_flags & AV_CPU_FLAG_SSE2) {
|
||||||
c->sum_abs_dctelem= sum_abs_dctelem_sse2;
|
c->sum_abs_dctelem= sum_abs_dctelem_sse2;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if HAVE_SSSE3_INLINE
|
#if HAVE_SSSE3_INLINE
|
||||||
if(mm_flags & AV_CPU_FLAG_SSSE3){
|
if (cpu_flags & AV_CPU_FLAG_SSSE3) {
|
||||||
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
|
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
|
||||||
c->try_8x8basis= try_8x8basis_ssse3;
|
c->try_8x8basis= try_8x8basis_ssse3;
|
||||||
}
|
}
|
||||||
@ -1024,7 +1024,7 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if(mm_flags & AV_CPU_FLAG_3DNOW){
|
if (cpu_flags & AV_CPU_FLAG_3DNOW) {
|
||||||
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
|
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
|
||||||
c->try_8x8basis= try_8x8basis_3dnow;
|
c->try_8x8basis= try_8x8basis_3dnow;
|
||||||
}
|
}
|
||||||
@ -1033,16 +1033,16 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx)
|
|||||||
}
|
}
|
||||||
#endif /* HAVE_INLINE_ASM */
|
#endif /* HAVE_INLINE_ASM */
|
||||||
|
|
||||||
if (EXTERNAL_MMX(mm_flags)) {
|
if (EXTERNAL_MMX(cpu_flags)) {
|
||||||
c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx;
|
c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx;
|
||||||
c->hadamard8_diff[1] = ff_hadamard8_diff_mmx;
|
c->hadamard8_diff[1] = ff_hadamard8_diff_mmx;
|
||||||
|
|
||||||
if (EXTERNAL_MMXEXT(mm_flags)) {
|
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||||
c->hadamard8_diff[0] = ff_hadamard8_diff16_mmxext;
|
c->hadamard8_diff[0] = ff_hadamard8_diff16_mmxext;
|
||||||
c->hadamard8_diff[1] = ff_hadamard8_diff_mmxext;
|
c->hadamard8_diff[1] = ff_hadamard8_diff_mmxext;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_SSE2(mm_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
c->sse[0] = ff_sse16_sse2;
|
c->sse[0] = ff_sse16_sse2;
|
||||||
|
|
||||||
#if HAVE_ALIGNED_STACK
|
#if HAVE_ALIGNED_STACK
|
||||||
@ -1051,7 +1051,7 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_SSSE3(mm_flags) && HAVE_ALIGNED_STACK) {
|
if (EXTERNAL_SSSE3(cpu_flags) && HAVE_ALIGNED_STACK) {
|
||||||
c->hadamard8_diff[0] = ff_hadamard8_diff16_ssse3;
|
c->hadamard8_diff[0] = ff_hadamard8_diff16_ssse3;
|
||||||
c->hadamard8_diff[1] = ff_hadamard8_diff_ssse3;
|
c->hadamard8_diff[1] = ff_hadamard8_diff_ssse3;
|
||||||
}
|
}
|
||||||
|
@ -23,22 +23,23 @@
|
|||||||
|
|
||||||
av_cold void ff_fft_init_x86(FFTContext *s)
|
av_cold void ff_fft_init_x86(FFTContext *s)
|
||||||
{
|
{
|
||||||
int has_vectors = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
#if ARCH_X86_32
|
#if ARCH_X86_32
|
||||||
if (EXTERNAL_AMD3DNOW(has_vectors)) {
|
if (EXTERNAL_AMD3DNOW(cpu_flags)) {
|
||||||
/* 3DNow! for K6-2/3 */
|
/* 3DNow! for K6-2/3 */
|
||||||
s->imdct_calc = ff_imdct_calc_3dnow;
|
s->imdct_calc = ff_imdct_calc_3dnow;
|
||||||
s->imdct_half = ff_imdct_half_3dnow;
|
s->imdct_half = ff_imdct_half_3dnow;
|
||||||
s->fft_calc = ff_fft_calc_3dnow;
|
s->fft_calc = ff_fft_calc_3dnow;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_AMD3DNOWEXT(has_vectors)) {
|
if (EXTERNAL_AMD3DNOWEXT(cpu_flags)) {
|
||||||
/* 3DNowEx for K7 */
|
/* 3DNowEx for K7 */
|
||||||
s->imdct_calc = ff_imdct_calc_3dnowext;
|
s->imdct_calc = ff_imdct_calc_3dnowext;
|
||||||
s->imdct_half = ff_imdct_half_3dnowext;
|
s->imdct_half = ff_imdct_half_3dnowext;
|
||||||
s->fft_calc = ff_fft_calc_3dnowext;
|
s->fft_calc = ff_fft_calc_3dnowext;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (EXTERNAL_SSE(has_vectors)) {
|
if (EXTERNAL_SSE(cpu_flags)) {
|
||||||
/* SSE for P3/P4/K8 */
|
/* SSE for P3/P4/K8 */
|
||||||
s->imdct_calc = ff_imdct_calc_sse;
|
s->imdct_calc = ff_imdct_calc_sse;
|
||||||
s->imdct_half = ff_imdct_half_sse;
|
s->imdct_half = ff_imdct_half_sse;
|
||||||
@ -46,7 +47,7 @@ av_cold void ff_fft_init_x86(FFTContext *s)
|
|||||||
s->fft_calc = ff_fft_calc_sse;
|
s->fft_calc = ff_fft_calc_sse;
|
||||||
s->fft_permutation = FF_FFT_PERM_SWAP_LSBS;
|
s->fft_permutation = FF_FFT_PERM_SWAP_LSBS;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_AVX(has_vectors) && s->nbits >= 5) {
|
if (EXTERNAL_AVX(cpu_flags) && s->nbits >= 5) {
|
||||||
/* AVX for SB */
|
/* AVX for SB */
|
||||||
s->imdct_half = ff_imdct_half_avx;
|
s->imdct_half = ff_imdct_half_avx;
|
||||||
s->fft_calc = ff_fft_calc_avx;
|
s->fft_calc = ff_fft_calc_avx;
|
||||||
@ -57,12 +58,13 @@ av_cold void ff_fft_init_x86(FFTContext *s)
|
|||||||
#if CONFIG_DCT
|
#if CONFIG_DCT
|
||||||
av_cold void ff_dct_init_x86(DCTContext *s)
|
av_cold void ff_dct_init_x86(DCTContext *s)
|
||||||
{
|
{
|
||||||
int has_vectors = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
if (EXTERNAL_SSE(has_vectors))
|
|
||||||
|
if (EXTERNAL_SSE(cpu_flags))
|
||||||
s->dct32 = ff_dct32_float_sse;
|
s->dct32 = ff_dct32_float_sse;
|
||||||
if (EXTERNAL_SSE2(has_vectors))
|
if (EXTERNAL_SSE2(cpu_flags))
|
||||||
s->dct32 = ff_dct32_float_sse2;
|
s->dct32 = ff_dct32_float_sse2;
|
||||||
if (EXTERNAL_AVX(has_vectors))
|
if (EXTERNAL_AVX(cpu_flags))
|
||||||
s->dct32 = ff_dct32_float_avx;
|
s->dct32 = ff_dct32_float_avx;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -116,29 +116,29 @@ static void float_interleave_sse(float *dst, const float **src,
|
|||||||
av_cold void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
|
av_cold void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
|
||||||
{
|
{
|
||||||
#if HAVE_YASM
|
#if HAVE_YASM
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (EXTERNAL_MMX(mm_flags)) {
|
if (EXTERNAL_MMX(cpu_flags)) {
|
||||||
c->float_interleave = float_interleave_mmx;
|
c->float_interleave = float_interleave_mmx;
|
||||||
|
|
||||||
if (EXTERNAL_AMD3DNOW(mm_flags)) {
|
if (EXTERNAL_AMD3DNOW(cpu_flags)) {
|
||||||
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
|
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
|
||||||
c->float_to_int16 = ff_float_to_int16_3dnow;
|
c->float_to_int16 = ff_float_to_int16_3dnow;
|
||||||
c->float_to_int16_interleave = float_to_int16_interleave_3dnow;
|
c->float_to_int16_interleave = float_to_int16_interleave_3dnow;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (EXTERNAL_AMD3DNOWEXT(mm_flags)) {
|
if (EXTERNAL_AMD3DNOWEXT(cpu_flags)) {
|
||||||
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
|
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
|
||||||
c->float_to_int16_interleave = float_to_int16_interleave_3dnowext;
|
c->float_to_int16_interleave = float_to_int16_interleave_3dnowext;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSE(mm_flags)) {
|
if (EXTERNAL_SSE(cpu_flags)) {
|
||||||
c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse;
|
c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse;
|
||||||
c->float_to_int16 = ff_float_to_int16_sse;
|
c->float_to_int16 = ff_float_to_int16_sse;
|
||||||
c->float_to_int16_interleave = float_to_int16_interleave_sse;
|
c->float_to_int16_interleave = float_to_int16_interleave_sse;
|
||||||
c->float_interleave = float_interleave_sse;
|
c->float_interleave = float_interleave_sse;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSE2(mm_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2;
|
c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2;
|
||||||
c->float_to_int16 = ff_float_to_int16_sse2;
|
c->float_to_int16 = ff_float_to_int16_sse2;
|
||||||
c->float_to_int16_interleave = float_to_int16_interleave_sse2;
|
c->float_to_int16_interleave = float_to_int16_interleave_sse2;
|
||||||
|
@ -185,10 +185,10 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
|
|||||||
const int bit_depth,
|
const int bit_depth,
|
||||||
const int chroma_format_idc)
|
const int chroma_format_idc)
|
||||||
{
|
{
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (bit_depth == 8) {
|
if (bit_depth == 8) {
|
||||||
if (EXTERNAL_MMX(mm_flags)) {
|
if (EXTERNAL_MMX(cpu_flags)) {
|
||||||
h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_8_mmx;
|
h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_8_mmx;
|
||||||
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_8_mmx;
|
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_8_mmx;
|
||||||
if (chroma_format_idc == 1) {
|
if (chroma_format_idc == 1) {
|
||||||
@ -203,7 +203,7 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
|
|||||||
if (chroma_format_idc == 1)
|
if (chroma_format_idc == 1)
|
||||||
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_8_mmx;
|
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_8_mmx;
|
||||||
if (codec_id == AV_CODEC_ID_SVQ3) {
|
if (codec_id == AV_CODEC_ID_SVQ3) {
|
||||||
if (mm_flags & AV_CPU_FLAG_CMOV)
|
if (cpu_flags & AV_CPU_FLAG_CMOV)
|
||||||
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_mmx;
|
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_mmx;
|
||||||
} else if (codec_id == AV_CODEC_ID_RV40) {
|
} else if (codec_id == AV_CODEC_ID_RV40) {
|
||||||
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_mmx;
|
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_mmx;
|
||||||
@ -213,7 +213,7 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_MMXEXT(mm_flags)) {
|
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||||
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_8_mmxext;
|
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_8_mmxext;
|
||||||
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_8_mmxext;
|
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_8_mmxext;
|
||||||
if (chroma_format_idc == 1)
|
if (chroma_format_idc == 1)
|
||||||
@ -265,11 +265,11 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_SSE(mm_flags)) {
|
if (EXTERNAL_SSE(cpu_flags)) {
|
||||||
h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_8_sse;
|
h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_8_sse;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_SSE2(mm_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_8_sse2;
|
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_8_sse2;
|
||||||
h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_8_sse2;
|
h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_8_sse2;
|
||||||
h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_sse2;
|
h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_sse2;
|
||||||
@ -292,7 +292,7 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_SSSE3(mm_flags)) {
|
if (EXTERNAL_SSSE3(cpu_flags)) {
|
||||||
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_8_ssse3;
|
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_8_ssse3;
|
||||||
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_8_ssse3;
|
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_8_ssse3;
|
||||||
if (chroma_format_idc == 1)
|
if (chroma_format_idc == 1)
|
||||||
@ -323,7 +323,7 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (bit_depth == 10) {
|
} else if (bit_depth == 10) {
|
||||||
if (EXTERNAL_MMXEXT(mm_flags)) {
|
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||||
h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext;
|
h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext;
|
||||||
h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext;
|
h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext;
|
||||||
|
|
||||||
@ -339,7 +339,7 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
|
|||||||
h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_mmxext;
|
h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_mmxext;
|
||||||
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_mmxext;
|
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_mmxext;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSE2(mm_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2;
|
h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2;
|
||||||
h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_sse2;
|
h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_sse2;
|
||||||
h->pred4x4[VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_10_sse2;
|
h->pred4x4[VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_10_sse2;
|
||||||
@ -371,7 +371,7 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
|
|||||||
h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_sse2;
|
h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_sse2;
|
||||||
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_sse2;
|
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_sse2;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSSE3(mm_flags)) {
|
if (EXTERNAL_SSSE3(cpu_flags)) {
|
||||||
h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_ssse3;
|
h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_ssse3;
|
||||||
h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_ssse3;
|
h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_ssse3;
|
||||||
h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_ssse3;
|
h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_ssse3;
|
||||||
@ -382,7 +382,7 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
|
|||||||
h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_10_ssse3;
|
h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_10_ssse3;
|
||||||
h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_ssse3;
|
h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_ssse3;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_AVX(mm_flags)) {
|
if (EXTERNAL_AVX(cpu_flags)) {
|
||||||
h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_avx;
|
h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_avx;
|
||||||
h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_avx;
|
h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_avx;
|
||||||
h->pred4x4[VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_10_avx;
|
h->pred4x4[VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_10_avx;
|
||||||
|
@ -542,9 +542,9 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
|
|||||||
{
|
{
|
||||||
#if HAVE_YASM
|
#if HAVE_YASM
|
||||||
int high_bit_depth = bit_depth > 8;
|
int high_bit_depth = bit_depth > 8;
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (EXTERNAL_MMXEXT(mm_flags)) {
|
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||||
if (!high_bit_depth) {
|
if (!high_bit_depth) {
|
||||||
SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, );
|
SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, );
|
||||||
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmxext, );
|
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmxext, );
|
||||||
@ -564,8 +564,8 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_SSE2(mm_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) {
|
if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) {
|
||||||
// these functions are slower than mmx on AMD, but faster on Intel
|
// these functions are slower than mmx on AMD, but faster on Intel
|
||||||
H264_QPEL_FUNCS(0, 0, sse2);
|
H264_QPEL_FUNCS(0, 0, sse2);
|
||||||
}
|
}
|
||||||
@ -596,7 +596,7 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_SSSE3(mm_flags)) {
|
if (EXTERNAL_SSSE3(cpu_flags)) {
|
||||||
if (!high_bit_depth) {
|
if (!high_bit_depth) {
|
||||||
H264_QPEL_FUNCS(1, 0, ssse3);
|
H264_QPEL_FUNCS(1, 0, ssse3);
|
||||||
H264_QPEL_FUNCS(1, 1, ssse3);
|
H264_QPEL_FUNCS(1, 1, ssse3);
|
||||||
@ -619,7 +619,7 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_AVX(mm_flags)) {
|
if (EXTERNAL_AVX(cpu_flags)) {
|
||||||
/* AVX implies 64 byte cache lines without the need to avoid unaligned
|
/* AVX implies 64 byte cache lines without the need to avoid unaligned
|
||||||
* memory accesses that cross the boundary between two cache lines.
|
* memory accesses that cross the boundary between two cache lines.
|
||||||
* TODO: Port X264_CPU_CACHELINE_32/64 detection from x264 to avoid
|
* TODO: Port X264_CPU_CACHELINE_32/64 detection from x264 to avoid
|
||||||
|
@ -71,45 +71,45 @@ av_cold void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth)
|
|||||||
{
|
{
|
||||||
#if HAVE_YASM
|
#if HAVE_YASM
|
||||||
int high_bit_depth = bit_depth > 8;
|
int high_bit_depth = bit_depth > 8;
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (EXTERNAL_MMX(mm_flags) && !high_bit_depth) {
|
if (EXTERNAL_MMX(cpu_flags) && !high_bit_depth) {
|
||||||
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_mmx;
|
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_mmx;
|
||||||
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx;
|
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_AMD3DNOW(mm_flags) && !high_bit_depth) {
|
if (EXTERNAL_AMD3DNOW(cpu_flags) && !high_bit_depth) {
|
||||||
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_3dnow;
|
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_3dnow;
|
||||||
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow;
|
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_MMXEXT(mm_flags) && !high_bit_depth) {
|
if (EXTERNAL_MMXEXT(cpu_flags) && !high_bit_depth) {
|
||||||
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_mmxext;
|
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_mmxext;
|
||||||
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmxext;
|
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmxext;
|
||||||
c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_mmxext;
|
c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_mmxext;
|
||||||
c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmxext;
|
c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmxext;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_MMXEXT(mm_flags) && bit_depth > 8 && bit_depth <= 10) {
|
if (EXTERNAL_MMXEXT(cpu_flags) && bit_depth > 8 && bit_depth <= 10) {
|
||||||
c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext;
|
c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext;
|
||||||
c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext;
|
c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext;
|
||||||
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext;
|
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext;
|
||||||
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext;
|
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_SSE2(mm_flags) && bit_depth > 8 && bit_depth <= 10) {
|
if (EXTERNAL_SSE2(cpu_flags) && bit_depth > 8 && bit_depth <= 10) {
|
||||||
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2;
|
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2;
|
||||||
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2;
|
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_SSSE3(mm_flags) && !high_bit_depth) {
|
if (EXTERNAL_SSSE3(cpu_flags) && !high_bit_depth) {
|
||||||
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_ssse3;
|
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_ssse3;
|
||||||
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_ssse3;
|
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_ssse3;
|
||||||
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_ssse3;
|
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_ssse3;
|
||||||
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_ssse3;
|
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_ssse3;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_AVX(mm_flags) && bit_depth > 8 && bit_depth <= 10) {
|
if (EXTERNAL_AVX(cpu_flags) && bit_depth > 8 && bit_depth <= 10) {
|
||||||
// AVX implies !cache64.
|
// AVX implies !cache64.
|
||||||
// TODO: Port cache(32|64) detection from x264.
|
// TODO: Port cache(32|64) detection from x264.
|
||||||
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx;
|
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx;
|
||||||
|
@ -212,13 +212,13 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
|
|||||||
const int chroma_format_idc)
|
const int chroma_format_idc)
|
||||||
{
|
{
|
||||||
#if HAVE_YASM
|
#if HAVE_YASM
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (chroma_format_idc == 1 && EXTERNAL_MMXEXT(mm_flags))
|
if (chroma_format_idc == 1 && EXTERNAL_MMXEXT(cpu_flags))
|
||||||
c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmxext;
|
c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmxext;
|
||||||
|
|
||||||
if (bit_depth == 8) {
|
if (bit_depth == 8) {
|
||||||
if (EXTERNAL_MMX(mm_flags)) {
|
if (EXTERNAL_MMX(cpu_flags)) {
|
||||||
c->h264_idct_dc_add =
|
c->h264_idct_dc_add =
|
||||||
c->h264_idct_add = ff_h264_idct_add_8_mmx;
|
c->h264_idct_add = ff_h264_idct_add_8_mmx;
|
||||||
c->h264_idct8_dc_add =
|
c->h264_idct8_dc_add =
|
||||||
@ -229,10 +229,10 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
|
|||||||
if (chroma_format_idc == 1)
|
if (chroma_format_idc == 1)
|
||||||
c->h264_idct_add8 = ff_h264_idct_add8_8_mmx;
|
c->h264_idct_add8 = ff_h264_idct_add8_8_mmx;
|
||||||
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx;
|
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx;
|
||||||
if (mm_flags & AV_CPU_FLAG_CMOV)
|
if (cpu_flags & AV_CPU_FLAG_CMOV)
|
||||||
c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx;
|
c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx;
|
||||||
|
|
||||||
if (EXTERNAL_MMXEXT(mm_flags)) {
|
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||||
c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmxext;
|
c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmxext;
|
||||||
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmxext;
|
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmxext;
|
||||||
c->h264_idct_add16 = ff_h264_idct_add16_8_mmxext;
|
c->h264_idct_add16 = ff_h264_idct_add16_8_mmxext;
|
||||||
@ -261,7 +261,7 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
|
|||||||
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_mmxext;
|
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_mmxext;
|
||||||
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_mmxext;
|
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_mmxext;
|
||||||
|
|
||||||
if (EXTERNAL_SSE2(mm_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
c->h264_idct8_add = ff_h264_idct8_add_8_sse2;
|
c->h264_idct8_add = ff_h264_idct8_add_8_sse2;
|
||||||
|
|
||||||
c->h264_idct_add16 = ff_h264_idct_add16_8_sse2;
|
c->h264_idct_add16 = ff_h264_idct_add16_8_sse2;
|
||||||
@ -282,11 +282,11 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
|
|||||||
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2;
|
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2;
|
||||||
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
|
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSSE3(mm_flags)) {
|
if (EXTERNAL_SSSE3(cpu_flags)) {
|
||||||
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3;
|
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3;
|
||||||
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3;
|
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_AVX(mm_flags)) {
|
if (EXTERNAL_AVX(cpu_flags)) {
|
||||||
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx;
|
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx;
|
||||||
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx;
|
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx;
|
||||||
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx;
|
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx;
|
||||||
@ -295,8 +295,8 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (bit_depth == 10) {
|
} else if (bit_depth == 10) {
|
||||||
if (EXTERNAL_MMX(mm_flags)) {
|
if (EXTERNAL_MMX(cpu_flags)) {
|
||||||
if (EXTERNAL_MMXEXT(mm_flags)) {
|
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||||
#if ARCH_X86_32
|
#if ARCH_X86_32
|
||||||
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_mmxext;
|
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_mmxext;
|
||||||
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmxext;
|
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmxext;
|
||||||
@ -306,7 +306,7 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
|
|||||||
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmxext;
|
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmxext;
|
||||||
#endif /* ARCH_X86_32 */
|
#endif /* ARCH_X86_32 */
|
||||||
c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmxext;
|
c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmxext;
|
||||||
if (EXTERNAL_SSE2(mm_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
c->h264_idct_add = ff_h264_idct_add_10_sse2;
|
c->h264_idct_add = ff_h264_idct_add_10_sse2;
|
||||||
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2;
|
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2;
|
||||||
|
|
||||||
@ -336,7 +336,7 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
|
|||||||
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
|
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
|
||||||
#endif /* HAVE_ALIGNED_STACK */
|
#endif /* HAVE_ALIGNED_STACK */
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSE4(mm_flags)) {
|
if (EXTERNAL_SSE4(cpu_flags)) {
|
||||||
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4;
|
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4;
|
||||||
c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4;
|
c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4;
|
||||||
c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4;
|
c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4;
|
||||||
@ -345,7 +345,7 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
|
|||||||
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4;
|
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4;
|
||||||
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4;
|
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_AVX(mm_flags)) {
|
if (EXTERNAL_AVX(cpu_flags)) {
|
||||||
c->h264_idct_dc_add =
|
c->h264_idct_dc_add =
|
||||||
c->h264_idct_add = ff_h264_idct_add_10_avx;
|
c->h264_idct_add = ff_h264_idct_add_10_avx;
|
||||||
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx;
|
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx;
|
||||||
|
@ -157,7 +157,7 @@ HPELDSP_AVG_PIXELS16(_mmxext)
|
|||||||
c->PFX ## _pixels_tab IDX [3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU; \
|
c->PFX ## _pixels_tab IDX [3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
static void hpeldsp_init_mmx(HpelDSPContext *c, int flags, int mm_flags)
|
static void hpeldsp_init_mmx(HpelDSPContext *c, int flags, int cpu_flags)
|
||||||
{
|
{
|
||||||
#if HAVE_MMX_INLINE
|
#if HAVE_MMX_INLINE
|
||||||
SET_HPEL_FUNCS(put, [0], 16, mmx);
|
SET_HPEL_FUNCS(put, [0], 16, mmx);
|
||||||
@ -170,7 +170,7 @@ static void hpeldsp_init_mmx(HpelDSPContext *c, int flags, int mm_flags)
|
|||||||
#endif /* HAVE_MMX_INLINE */
|
#endif /* HAVE_MMX_INLINE */
|
||||||
}
|
}
|
||||||
|
|
||||||
static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int mm_flags)
|
static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int cpu_flags)
|
||||||
{
|
{
|
||||||
#if HAVE_MMXEXT_EXTERNAL
|
#if HAVE_MMXEXT_EXTERNAL
|
||||||
c->put_pixels_tab[0][1] = ff_put_pixels16_x2_mmxext;
|
c->put_pixels_tab[0][1] = ff_put_pixels16_x2_mmxext;
|
||||||
@ -204,7 +204,7 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int mm_flags)
|
|||||||
#endif /* HAVE_MMXEXT_EXTERNAL */
|
#endif /* HAVE_MMXEXT_EXTERNAL */
|
||||||
}
|
}
|
||||||
|
|
||||||
static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int mm_flags)
|
static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int cpu_flags)
|
||||||
{
|
{
|
||||||
#if HAVE_AMD3DNOW_EXTERNAL
|
#if HAVE_AMD3DNOW_EXTERNAL
|
||||||
c->put_pixels_tab[0][1] = ff_put_pixels16_x2_3dnow;
|
c->put_pixels_tab[0][1] = ff_put_pixels16_x2_3dnow;
|
||||||
@ -238,10 +238,10 @@ static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int mm_flags)
|
|||||||
#endif /* HAVE_AMD3DNOW_EXTERNAL */
|
#endif /* HAVE_AMD3DNOW_EXTERNAL */
|
||||||
}
|
}
|
||||||
|
|
||||||
static void hpeldsp_init_sse2(HpelDSPContext *c, int flags, int mm_flags)
|
static void hpeldsp_init_sse2(HpelDSPContext *c, int flags, int cpu_flags)
|
||||||
{
|
{
|
||||||
#if HAVE_SSE2_EXTERNAL
|
#if HAVE_SSE2_EXTERNAL
|
||||||
if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
|
if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW)) {
|
||||||
// these functions are slower than mmx on AMD, but faster on Intel
|
// these functions are slower than mmx on AMD, but faster on Intel
|
||||||
c->put_pixels_tab[0][0] = ff_put_pixels16_sse2;
|
c->put_pixels_tab[0][0] = ff_put_pixels16_sse2;
|
||||||
c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_sse2;
|
c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_sse2;
|
||||||
@ -252,17 +252,17 @@ static void hpeldsp_init_sse2(HpelDSPContext *c, int flags, int mm_flags)
|
|||||||
|
|
||||||
void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
|
void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
|
||||||
{
|
{
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (HAVE_MMX && mm_flags & AV_CPU_FLAG_MMX)
|
if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX)
|
||||||
hpeldsp_init_mmx(c, flags, mm_flags);
|
hpeldsp_init_mmx(c, flags, cpu_flags);
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_MMXEXT)
|
if (cpu_flags & AV_CPU_FLAG_MMXEXT)
|
||||||
hpeldsp_init_mmxext(c, flags, mm_flags);
|
hpeldsp_init_mmxext(c, flags, cpu_flags);
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_3DNOW)
|
if (cpu_flags & AV_CPU_FLAG_3DNOW)
|
||||||
hpeldsp_init_3dnow(c, flags, mm_flags);
|
hpeldsp_init_3dnow(c, flags, cpu_flags);
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_SSE2)
|
if (cpu_flags & AV_CPU_FLAG_SSE2)
|
||||||
hpeldsp_init_sse2(c, flags, mm_flags);
|
hpeldsp_init_sse2(c, flags, cpu_flags);
|
||||||
}
|
}
|
||||||
|
@ -148,9 +148,9 @@ static void lpc_compute_autocorr_sse2(const double *data, int len, int lag,
|
|||||||
av_cold void ff_lpc_init_x86(LPCContext *c)
|
av_cold void ff_lpc_init_x86(LPCContext *c)
|
||||||
{
|
{
|
||||||
#if HAVE_SSE2_INLINE
|
#if HAVE_SSE2_INLINE
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (mm_flags & (AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW)) {
|
if (cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
|
||||||
c->lpc_apply_welch_window = lpc_apply_welch_window_sse2;
|
c->lpc_apply_welch_window = lpc_apply_welch_window_sse2;
|
||||||
c->lpc_compute_autocorr = lpc_compute_autocorr_sse2;
|
c->lpc_compute_autocorr = lpc_compute_autocorr_sse2;
|
||||||
}
|
}
|
||||||
|
@ -435,9 +435,9 @@ PIX_SAD(mmxext)
|
|||||||
av_cold void ff_dsputil_init_pix_mmx(DSPContext *c, AVCodecContext *avctx)
|
av_cold void ff_dsputil_init_pix_mmx(DSPContext *c, AVCodecContext *avctx)
|
||||||
{
|
{
|
||||||
#if HAVE_INLINE_ASM
|
#if HAVE_INLINE_ASM
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_MMX) {
|
if (cpu_flags & AV_CPU_FLAG_MMX) {
|
||||||
c->pix_abs[0][0] = sad16_mmx;
|
c->pix_abs[0][0] = sad16_mmx;
|
||||||
c->pix_abs[0][1] = sad16_x2_mmx;
|
c->pix_abs[0][1] = sad16_x2_mmx;
|
||||||
c->pix_abs[0][2] = sad16_y2_mmx;
|
c->pix_abs[0][2] = sad16_y2_mmx;
|
||||||
@ -450,7 +450,7 @@ av_cold void ff_dsputil_init_pix_mmx(DSPContext *c, AVCodecContext *avctx)
|
|||||||
c->sad[0]= sad16_mmx;
|
c->sad[0]= sad16_mmx;
|
||||||
c->sad[1]= sad8_mmx;
|
c->sad[1]= sad8_mmx;
|
||||||
}
|
}
|
||||||
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
if (cpu_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
c->pix_abs[0][0] = sad16_mmxext;
|
c->pix_abs[0][0] = sad16_mmxext;
|
||||||
c->pix_abs[1][0] = sad8_mmxext;
|
c->pix_abs[1][0] = sad8_mmxext;
|
||||||
|
|
||||||
@ -466,7 +466,7 @@ av_cold void ff_dsputil_init_pix_mmx(DSPContext *c, AVCodecContext *avctx)
|
|||||||
c->pix_abs[1][3] = sad8_xy2_mmxext;
|
c->pix_abs[1][3] = sad8_xy2_mmxext;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW) && avctx->codec_id != AV_CODEC_ID_SNOW) {
|
if ((cpu_flags & AV_CPU_FLAG_SSE2) && !(cpu_flags & AV_CPU_FLAG_3DNOW) && avctx->codec_id != AV_CODEC_ID_SNOW) {
|
||||||
c->sad[0]= sad16_sse2;
|
c->sad[0]= sad16_sse2;
|
||||||
}
|
}
|
||||||
#endif /* HAVE_INLINE_ASM */
|
#endif /* HAVE_INLINE_ASM */
|
||||||
|
@ -235,7 +235,7 @@ DECL_IMDCT_BLOCKS(avx,avx)
|
|||||||
|
|
||||||
av_cold void ff_mpadsp_init_x86(MPADSPContext *s)
|
av_cold void ff_mpadsp_init_x86(MPADSPContext *s)
|
||||||
{
|
{
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
int i, j;
|
int i, j;
|
||||||
for (j = 0; j < 4; j++) {
|
for (j = 0; j < 4; j++) {
|
||||||
@ -252,21 +252,21 @@ av_cold void ff_mpadsp_init_x86(MPADSPContext *s)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if HAVE_SSE2_INLINE
|
#if HAVE_SSE2_INLINE
|
||||||
if (mm_flags & AV_CPU_FLAG_SSE2) {
|
if (cpu_flags & AV_CPU_FLAG_SSE2) {
|
||||||
s->apply_window_float = apply_window_mp3;
|
s->apply_window_float = apply_window_mp3;
|
||||||
}
|
}
|
||||||
#endif /* HAVE_SSE2_INLINE */
|
#endif /* HAVE_SSE2_INLINE */
|
||||||
|
|
||||||
#if HAVE_YASM
|
#if HAVE_YASM
|
||||||
if (EXTERNAL_AVX(mm_flags)) {
|
if (EXTERNAL_AVX(cpu_flags)) {
|
||||||
s->imdct36_blocks_float = imdct36_blocks_avx;
|
s->imdct36_blocks_float = imdct36_blocks_avx;
|
||||||
} else if (EXTERNAL_SSSE3(mm_flags)) {
|
} else if (EXTERNAL_SSSE3(cpu_flags)) {
|
||||||
s->imdct36_blocks_float = imdct36_blocks_ssse3;
|
s->imdct36_blocks_float = imdct36_blocks_ssse3;
|
||||||
} else if (EXTERNAL_SSE3(mm_flags)) {
|
} else if (EXTERNAL_SSE3(cpu_flags)) {
|
||||||
s->imdct36_blocks_float = imdct36_blocks_sse3;
|
s->imdct36_blocks_float = imdct36_blocks_sse3;
|
||||||
} else if (EXTERNAL_SSE2(mm_flags)) {
|
} else if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
s->imdct36_blocks_float = imdct36_blocks_sse2;
|
s->imdct36_blocks_float = imdct36_blocks_sse2;
|
||||||
} else if (EXTERNAL_SSE(mm_flags)) {
|
} else if (EXTERNAL_SSE(cpu_flags)) {
|
||||||
s->imdct36_blocks_float = imdct36_blocks_sse;
|
s->imdct36_blocks_float = imdct36_blocks_sse;
|
||||||
}
|
}
|
||||||
#endif /* HAVE_YASM */
|
#endif /* HAVE_YASM */
|
||||||
|
@ -557,9 +557,9 @@ static void denoise_dct_sse2(MpegEncContext *s, int16_t *block){
|
|||||||
av_cold void ff_MPV_common_init_x86(MpegEncContext *s)
|
av_cold void ff_MPV_common_init_x86(MpegEncContext *s)
|
||||||
{
|
{
|
||||||
#if HAVE_MMX_INLINE
|
#if HAVE_MMX_INLINE
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_MMX) {
|
if (cpu_flags & AV_CPU_FLAG_MMX) {
|
||||||
s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx;
|
s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx;
|
||||||
s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx;
|
s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx;
|
||||||
s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx;
|
s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx;
|
||||||
@ -568,7 +568,7 @@ av_cold void ff_MPV_common_init_x86(MpegEncContext *s)
|
|||||||
s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx;
|
s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx;
|
||||||
s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
|
s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_SSE2) {
|
if (cpu_flags & AV_CPU_FLAG_SSE2) {
|
||||||
s->denoise_dct= denoise_dct_sse2;
|
s->denoise_dct= denoise_dct_sse2;
|
||||||
} else {
|
} else {
|
||||||
s->denoise_dct= denoise_dct_mmx;
|
s->denoise_dct= denoise_dct_mmx;
|
||||||
|
@ -87,20 +87,20 @@ av_cold void ff_dct_encode_init_x86(MpegEncContext *s)
|
|||||||
|
|
||||||
if (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX) {
|
if (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX) {
|
||||||
#if HAVE_MMX_INLINE
|
#if HAVE_MMX_INLINE
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
if (INLINE_MMX(mm_flags))
|
if (INLINE_MMX(cpu_flags))
|
||||||
s->dct_quantize = dct_quantize_MMX;
|
s->dct_quantize = dct_quantize_MMX;
|
||||||
#endif
|
#endif
|
||||||
#if HAVE_MMXEXT_INLINE
|
#if HAVE_MMXEXT_INLINE
|
||||||
if (INLINE_MMXEXT(mm_flags))
|
if (INLINE_MMXEXT(cpu_flags))
|
||||||
s->dct_quantize = dct_quantize_MMXEXT;
|
s->dct_quantize = dct_quantize_MMXEXT;
|
||||||
#endif
|
#endif
|
||||||
#if HAVE_SSE2_INLINE
|
#if HAVE_SSE2_INLINE
|
||||||
if (INLINE_SSE2(mm_flags))
|
if (INLINE_SSE2(cpu_flags))
|
||||||
s->dct_quantize = dct_quantize_SSE2;
|
s->dct_quantize = dct_quantize_SSE2;
|
||||||
#endif
|
#endif
|
||||||
#if HAVE_SSSE3_INLINE
|
#if HAVE_SSSE3_INLINE
|
||||||
if (INLINE_SSSE3(mm_flags))
|
if (INLINE_SSSE3(cpu_flags))
|
||||||
s->dct_quantize = dct_quantize_SSSE3;
|
s->dct_quantize = dct_quantize_SSSE3;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -35,16 +35,16 @@ void ff_add_bytes_l2_sse2(uint8_t *dst, uint8_t *src1,
|
|||||||
|
|
||||||
av_cold void ff_pngdsp_init_x86(PNGDSPContext *dsp)
|
av_cold void ff_pngdsp_init_x86(PNGDSPContext *dsp)
|
||||||
{
|
{
|
||||||
int flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
#if ARCH_X86_32
|
#if ARCH_X86_32
|
||||||
if (EXTERNAL_MMX(flags))
|
if (EXTERNAL_MMX(cpu_flags))
|
||||||
dsp->add_bytes_l2 = ff_add_bytes_l2_mmx;
|
dsp->add_bytes_l2 = ff_add_bytes_l2_mmx;
|
||||||
#endif
|
#endif
|
||||||
if (EXTERNAL_MMXEXT(flags))
|
if (EXTERNAL_MMXEXT(cpu_flags))
|
||||||
dsp->add_paeth_prediction = ff_add_png_paeth_prediction_mmxext;
|
dsp->add_paeth_prediction = ff_add_png_paeth_prediction_mmxext;
|
||||||
if (EXTERNAL_SSE2(flags))
|
if (EXTERNAL_SSE2(cpu_flags))
|
||||||
dsp->add_bytes_l2 = ff_add_bytes_l2_sse2;
|
dsp->add_bytes_l2 = ff_add_bytes_l2_sse2;
|
||||||
if (EXTERNAL_SSSE3(flags))
|
if (EXTERNAL_SSSE3(cpu_flags))
|
||||||
dsp->add_paeth_prediction = ff_add_png_paeth_prediction_ssse3;
|
dsp->add_paeth_prediction = ff_add_png_paeth_prediction_ssse3;
|
||||||
}
|
}
|
||||||
|
@ -35,22 +35,22 @@ void ff_prores_idct_put_10_avx (uint16_t *dst, int linesize,
|
|||||||
av_cold void ff_proresdsp_x86_init(ProresDSPContext *dsp, AVCodecContext *avctx)
|
av_cold void ff_proresdsp_x86_init(ProresDSPContext *dsp, AVCodecContext *avctx)
|
||||||
{
|
{
|
||||||
#if ARCH_X86_64
|
#if ARCH_X86_64
|
||||||
int flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if(avctx->flags & CODEC_FLAG_BITEXACT)
|
if(avctx->flags & CODEC_FLAG_BITEXACT)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (EXTERNAL_SSE2(flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
|
dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
|
||||||
dsp->idct_put = ff_prores_idct_put_10_sse2;
|
dsp->idct_put = ff_prores_idct_put_10_sse2;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_SSE4(flags)) {
|
if (EXTERNAL_SSE4(cpu_flags)) {
|
||||||
dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
|
dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
|
||||||
dsp->idct_put = ff_prores_idct_put_10_sse4;
|
dsp->idct_put = ff_prores_idct_put_10_sse4;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_AVX(flags)) {
|
if (EXTERNAL_AVX(cpu_flags)) {
|
||||||
dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
|
dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
|
||||||
dsp->idct_put = ff_prores_idct_put_10_avx;
|
dsp->idct_put = ff_prores_idct_put_10_avx;
|
||||||
}
|
}
|
||||||
|
@ -32,14 +32,14 @@ void ff_rv34_idct_add_mmxext(uint8_t *dst, ptrdiff_t stride, int16_t *block);
|
|||||||
|
|
||||||
av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c)
|
av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c)
|
||||||
{
|
{
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (EXTERNAL_MMX(mm_flags))
|
if (EXTERNAL_MMX(cpu_flags))
|
||||||
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx;
|
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx;
|
||||||
if (EXTERNAL_MMXEXT(mm_flags)) {
|
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||||
c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmxext;
|
c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmxext;
|
||||||
c->rv34_idct_add = ff_rv34_idct_add_mmxext;
|
c->rv34_idct_add = ff_rv34_idct_add_mmxext;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSE4(mm_flags))
|
if (EXTERNAL_SSE4(cpu_flags))
|
||||||
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_sse4;
|
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_sse4;
|
||||||
}
|
}
|
||||||
|
@ -214,9 +214,9 @@ static void avg_rv40_qpel16_mc33_mmx(uint8_t *dst, uint8_t *src,
|
|||||||
av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c)
|
av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c)
|
||||||
{
|
{
|
||||||
#if HAVE_YASM
|
#if HAVE_YASM
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (EXTERNAL_MMX(mm_flags)) {
|
if (EXTERNAL_MMX(cpu_flags)) {
|
||||||
c->put_chroma_pixels_tab[0] = ff_put_rv40_chroma_mc8_mmx;
|
c->put_chroma_pixels_tab[0] = ff_put_rv40_chroma_mc8_mmx;
|
||||||
c->put_chroma_pixels_tab[1] = ff_put_rv40_chroma_mc4_mmx;
|
c->put_chroma_pixels_tab[1] = ff_put_rv40_chroma_mc4_mmx;
|
||||||
#if HAVE_MMX_INLINE
|
#if HAVE_MMX_INLINE
|
||||||
@ -229,7 +229,7 @@ av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c)
|
|||||||
QPEL_MC_SET(put_, _mmx)
|
QPEL_MC_SET(put_, _mmx)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
if (EXTERNAL_MMXEXT(mm_flags)) {
|
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||||
c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmxext;
|
c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmxext;
|
||||||
c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmxext;
|
c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmxext;
|
||||||
c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_mmxext;
|
c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_mmxext;
|
||||||
@ -239,14 +239,14 @@ av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c)
|
|||||||
#if ARCH_X86_32
|
#if ARCH_X86_32
|
||||||
QPEL_MC_SET(avg_, _mmxext)
|
QPEL_MC_SET(avg_, _mmxext)
|
||||||
#endif
|
#endif
|
||||||
} else if (EXTERNAL_AMD3DNOW(mm_flags)) {
|
} else if (EXTERNAL_AMD3DNOW(cpu_flags)) {
|
||||||
c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_3dnow;
|
c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_3dnow;
|
||||||
c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_3dnow;
|
c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_3dnow;
|
||||||
#if ARCH_X86_32
|
#if ARCH_X86_32
|
||||||
QPEL_MC_SET(avg_, _3dnow)
|
QPEL_MC_SET(avg_, _3dnow)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSE2(mm_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_sse2;
|
c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_sse2;
|
||||||
c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_sse2;
|
c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_sse2;
|
||||||
c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_sse2;
|
c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_sse2;
|
||||||
@ -254,7 +254,7 @@ av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c)
|
|||||||
QPEL_MC_SET(put_, _sse2)
|
QPEL_MC_SET(put_, _sse2)
|
||||||
QPEL_MC_SET(avg_, _sse2)
|
QPEL_MC_SET(avg_, _sse2)
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSSE3(mm_flags)) {
|
if (EXTERNAL_SSSE3(cpu_flags)) {
|
||||||
c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_ssse3;
|
c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_ssse3;
|
||||||
c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_ssse3;
|
c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_ssse3;
|
||||||
c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_ssse3;
|
c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_ssse3;
|
||||||
|
@ -53,9 +53,9 @@ void ff_sbr_hf_apply_noise_3_sse2(float (*Y)[2], const float *s_m,
|
|||||||
|
|
||||||
av_cold void ff_sbrdsp_init_x86(SBRDSPContext *s)
|
av_cold void ff_sbrdsp_init_x86(SBRDSPContext *s)
|
||||||
{
|
{
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (EXTERNAL_SSE(mm_flags)) {
|
if (EXTERNAL_SSE(cpu_flags)) {
|
||||||
s->neg_odd_64 = ff_sbr_neg_odd_64_sse;
|
s->neg_odd_64 = ff_sbr_neg_odd_64_sse;
|
||||||
s->sum_square = ff_sbr_sum_square_sse;
|
s->sum_square = ff_sbr_sum_square_sse;
|
||||||
s->sum64x5 = ff_sbr_sum64x5_sse;
|
s->sum64x5 = ff_sbr_sum64x5_sse;
|
||||||
@ -65,7 +65,7 @@ av_cold void ff_sbrdsp_init_x86(SBRDSPContext *s)
|
|||||||
s->qmf_deint_bfly = ff_sbr_qmf_deint_bfly_sse;
|
s->qmf_deint_bfly = ff_sbr_qmf_deint_bfly_sse;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_SSE2(mm_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
s->qmf_deint_bfly = ff_sbr_qmf_deint_bfly_sse2;
|
s->qmf_deint_bfly = ff_sbr_qmf_deint_bfly_sse2;
|
||||||
s->qmf_pre_shuffle = ff_sbr_qmf_pre_shuffle_sse2;
|
s->qmf_pre_shuffle = ff_sbr_qmf_pre_shuffle_sse2;
|
||||||
s->hf_apply_noise[0] = ff_sbr_hf_apply_noise_0_sse2;
|
s->hf_apply_noise[0] = ff_sbr_hf_apply_noise_0_sse2;
|
||||||
|
@ -83,12 +83,12 @@ void ff_avg_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src,
|
|||||||
|
|
||||||
av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
|
av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
|
||||||
{
|
{
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (INLINE_MMX(mm_flags))
|
if (INLINE_MMX(cpu_flags))
|
||||||
ff_vc1dsp_init_mmx(dsp);
|
ff_vc1dsp_init_mmx(dsp);
|
||||||
|
|
||||||
if (INLINE_MMXEXT(mm_flags))
|
if (INLINE_MMXEXT(cpu_flags))
|
||||||
ff_vc1dsp_init_mmxext(dsp);
|
ff_vc1dsp_init_mmxext(dsp);
|
||||||
|
|
||||||
#define ASSIGN_LF(EXT) \
|
#define ASSIGN_LF(EXT) \
|
||||||
@ -100,31 +100,31 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
|
|||||||
dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_ ## EXT
|
dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_ ## EXT
|
||||||
|
|
||||||
#if HAVE_YASM
|
#if HAVE_YASM
|
||||||
if (mm_flags & AV_CPU_FLAG_MMX) {
|
if (cpu_flags & AV_CPU_FLAG_MMX) {
|
||||||
dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_mmx;
|
dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_mmx;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
if (cpu_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
ASSIGN_LF(mmxext);
|
ASSIGN_LF(mmxext);
|
||||||
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmxext;
|
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmxext;
|
||||||
|
|
||||||
dsp->avg_vc1_mspel_pixels_tab[0] = avg_vc1_mspel_mc00_mmxext;
|
dsp->avg_vc1_mspel_pixels_tab[0] = avg_vc1_mspel_mc00_mmxext;
|
||||||
} else if (mm_flags & AV_CPU_FLAG_3DNOW) {
|
} else if (cpu_flags & AV_CPU_FLAG_3DNOW) {
|
||||||
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_3dnow;
|
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_3dnow;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_SSE2) {
|
if (cpu_flags & AV_CPU_FLAG_SSE2) {
|
||||||
dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_sse2;
|
dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_sse2;
|
||||||
dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse2;
|
dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse2;
|
||||||
dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_sse2;
|
dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_sse2;
|
||||||
dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse2;
|
dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse2;
|
||||||
}
|
}
|
||||||
if (mm_flags & AV_CPU_FLAG_SSSE3) {
|
if (cpu_flags & AV_CPU_FLAG_SSSE3) {
|
||||||
ASSIGN_LF(ssse3);
|
ASSIGN_LF(ssse3);
|
||||||
dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_ssse3;
|
dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_ssse3;
|
||||||
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_ssse3;
|
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_ssse3;
|
||||||
}
|
}
|
||||||
if (mm_flags & AV_CPU_FLAG_SSE4) {
|
if (cpu_flags & AV_CPU_FLAG_SSE4) {
|
||||||
dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse4;
|
dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse4;
|
||||||
dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse4;
|
dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse4;
|
||||||
}
|
}
|
||||||
|
@ -108,20 +108,20 @@ void ff_prefetch_3dnow(uint8_t *buf, ptrdiff_t stride, int h);
|
|||||||
av_cold void ff_videodsp_init_x86(VideoDSPContext *ctx, int bpc)
|
av_cold void ff_videodsp_init_x86(VideoDSPContext *ctx, int bpc)
|
||||||
{
|
{
|
||||||
#if HAVE_YASM
|
#if HAVE_YASM
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
#if ARCH_X86_32
|
#if ARCH_X86_32
|
||||||
if (bpc <= 8 && mm_flags & AV_CPU_FLAG_MMX) {
|
if (bpc <= 8 && cpu_flags & AV_CPU_FLAG_MMX) {
|
||||||
ctx->emulated_edge_mc = emulated_edge_mc_mmx;
|
ctx->emulated_edge_mc = emulated_edge_mc_mmx;
|
||||||
}
|
}
|
||||||
if (mm_flags & AV_CPU_FLAG_3DNOW) {
|
if (cpu_flags & AV_CPU_FLAG_3DNOW) {
|
||||||
ctx->prefetch = ff_prefetch_3dnow;
|
ctx->prefetch = ff_prefetch_3dnow;
|
||||||
}
|
}
|
||||||
#endif /* ARCH_X86_32 */
|
#endif /* ARCH_X86_32 */
|
||||||
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
if (cpu_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
ctx->prefetch = ff_prefetch_mmxext;
|
ctx->prefetch = ff_prefetch_mmxext;
|
||||||
}
|
}
|
||||||
if (bpc <= 8 && mm_flags & AV_CPU_FLAG_SSE) {
|
if (bpc <= 8 && cpu_flags & AV_CPU_FLAG_SSE) {
|
||||||
ctx->emulated_edge_mc = emulated_edge_mc_sse;
|
ctx->emulated_edge_mc = emulated_edge_mc_sse;
|
||||||
}
|
}
|
||||||
#endif /* HAVE_YASM */
|
#endif /* HAVE_YASM */
|
||||||
|
@ -31,13 +31,13 @@ void ff_vorbis_inverse_coupling_sse(float *mag, float *ang,
|
|||||||
av_cold void ff_vorbisdsp_init_x86(VorbisDSPContext *dsp)
|
av_cold void ff_vorbisdsp_init_x86(VorbisDSPContext *dsp)
|
||||||
{
|
{
|
||||||
#if HAVE_YASM
|
#if HAVE_YASM
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
#if ARCH_X86_32
|
#if ARCH_X86_32
|
||||||
if (mm_flags & AV_CPU_FLAG_3DNOW)
|
if (cpu_flags & AV_CPU_FLAG_3DNOW)
|
||||||
dsp->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_3dnow;
|
dsp->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_3dnow;
|
||||||
#endif /* ARCH_X86_32 */
|
#endif /* ARCH_X86_32 */
|
||||||
if (mm_flags & AV_CPU_FLAG_SSE)
|
if (cpu_flags & AV_CPU_FLAG_SSE)
|
||||||
dsp->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_sse;
|
dsp->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_sse;
|
||||||
#endif /* HAVE_YASM */
|
#endif /* HAVE_YASM */
|
||||||
}
|
}
|
||||||
|
@ -99,20 +99,20 @@ static void put_vp_no_rnd_pixels8_l2_mmx(uint8_t *dst, const uint8_t *a, const u
|
|||||||
|
|
||||||
av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
|
av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
|
||||||
{
|
{
|
||||||
int cpuflags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
#if HAVE_MMX_INLINE
|
#if HAVE_MMX_INLINE
|
||||||
c->put_no_rnd_pixels_l2 = put_vp_no_rnd_pixels8_l2_mmx;
|
c->put_no_rnd_pixels_l2 = put_vp_no_rnd_pixels8_l2_mmx;
|
||||||
#endif /* HAVE_MMX_INLINE */
|
#endif /* HAVE_MMX_INLINE */
|
||||||
|
|
||||||
#if ARCH_X86_32
|
#if ARCH_X86_32
|
||||||
if (EXTERNAL_MMX(cpuflags)) {
|
if (EXTERNAL_MMX(cpu_flags)) {
|
||||||
c->idct_put = ff_vp3_idct_put_mmx;
|
c->idct_put = ff_vp3_idct_put_mmx;
|
||||||
c->idct_add = ff_vp3_idct_add_mmx;
|
c->idct_add = ff_vp3_idct_add_mmx;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (EXTERNAL_MMXEXT(cpuflags)) {
|
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||||
c->idct_dc_add = ff_vp3_idct_dc_add_mmxext;
|
c->idct_dc_add = ff_vp3_idct_dc_add_mmxext;
|
||||||
|
|
||||||
if (!(flags & CODEC_FLAG_BITEXACT)) {
|
if (!(flags & CODEC_FLAG_BITEXACT)) {
|
||||||
@ -121,7 +121,7 @@ av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXTERNAL_SSE2(cpuflags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
c->idct_put = ff_vp3_idct_put_sse2;
|
c->idct_put = ff_vp3_idct_put_sse2;
|
||||||
c->idct_add = ff_vp3_idct_add_sse2;
|
c->idct_add = ff_vp3_idct_add_sse2;
|
||||||
}
|
}
|
||||||
|
@ -32,16 +32,16 @@ void ff_vp6_filter_diag4_sse2(uint8_t *dst, uint8_t *src, int stride,
|
|||||||
|
|
||||||
av_cold void ff_vp56dsp_init_x86(VP56DSPContext* c, enum AVCodecID codec)
|
av_cold void ff_vp56dsp_init_x86(VP56DSPContext* c, enum AVCodecID codec)
|
||||||
{
|
{
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (CONFIG_VP6_DECODER && codec == AV_CODEC_ID_VP6) {
|
if (CONFIG_VP6_DECODER && codec == AV_CODEC_ID_VP6) {
|
||||||
#if ARCH_X86_32
|
#if ARCH_X86_32
|
||||||
if (EXTERNAL_MMX(mm_flags)) {
|
if (EXTERNAL_MMX(cpu_flags)) {
|
||||||
c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx;
|
c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (EXTERNAL_SSE2(mm_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2;
|
c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -316,9 +316,9 @@ DECLARE_LOOP_FILTER(sse4)
|
|||||||
av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
|
av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
|
||||||
{
|
{
|
||||||
#if HAVE_YASM
|
#if HAVE_YASM
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_MMX) {
|
if (cpu_flags & AV_CPU_FLAG_MMX) {
|
||||||
c->vp8_idct_dc_add = ff_vp8_idct_dc_add_mmx;
|
c->vp8_idct_dc_add = ff_vp8_idct_dc_add_mmx;
|
||||||
c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx;
|
c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx;
|
||||||
#if ARCH_X86_32
|
#if ARCH_X86_32
|
||||||
@ -349,7 +349,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
|
|||||||
|
|
||||||
/* note that 4-tap width=16 functions are missing because w=16
|
/* note that 4-tap width=16 functions are missing because w=16
|
||||||
* is only used for luma, and luma is always a copy or sixtap. */
|
* is only used for luma, and luma is always a copy or sixtap. */
|
||||||
if (mm_flags & AV_CPU_FLAG_MMXEXT) {
|
if (cpu_flags & AV_CPU_FLAG_MMXEXT) {
|
||||||
VP8_MC_FUNC(2, 4, mmxext);
|
VP8_MC_FUNC(2, 4, mmxext);
|
||||||
VP8_BILINEAR_MC_FUNC(2, 4, mmxext);
|
VP8_BILINEAR_MC_FUNC(2, 4, mmxext);
|
||||||
#if ARCH_X86_32
|
#if ARCH_X86_32
|
||||||
@ -373,14 +373,14 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_SSE) {
|
if (cpu_flags & AV_CPU_FLAG_SSE) {
|
||||||
c->vp8_idct_add = ff_vp8_idct_add_sse;
|
c->vp8_idct_add = ff_vp8_idct_add_sse;
|
||||||
c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_sse;
|
c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_sse;
|
||||||
c->put_vp8_epel_pixels_tab[0][0][0] =
|
c->put_vp8_epel_pixels_tab[0][0][0] =
|
||||||
c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
|
c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mm_flags & (AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW)) {
|
if (cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
|
||||||
VP8_LUMA_MC_FUNC(0, 16, sse2);
|
VP8_LUMA_MC_FUNC(0, 16, sse2);
|
||||||
VP8_MC_FUNC(1, 8, sse2);
|
VP8_MC_FUNC(1, 8, sse2);
|
||||||
VP8_BILINEAR_MC_FUNC(0, 16, sse2);
|
VP8_BILINEAR_MC_FUNC(0, 16, sse2);
|
||||||
@ -395,7 +395,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
|
|||||||
c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_sse2;
|
c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_sse2;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_SSE2) {
|
if (cpu_flags & AV_CPU_FLAG_SSE2) {
|
||||||
c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_sse2;
|
c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_sse2;
|
||||||
|
|
||||||
c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2;
|
c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2;
|
||||||
@ -407,7 +407,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
|
|||||||
c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse2;
|
c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse2;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_SSSE3) {
|
if (cpu_flags & AV_CPU_FLAG_SSSE3) {
|
||||||
VP8_LUMA_MC_FUNC(0, 16, ssse3);
|
VP8_LUMA_MC_FUNC(0, 16, ssse3);
|
||||||
VP8_MC_FUNC(1, 8, ssse3);
|
VP8_MC_FUNC(1, 8, ssse3);
|
||||||
VP8_MC_FUNC(2, 4, ssse3);
|
VP8_MC_FUNC(2, 4, ssse3);
|
||||||
@ -429,7 +429,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
|
|||||||
c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
|
c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mm_flags & AV_CPU_FLAG_SSE4) {
|
if (cpu_flags & AV_CPU_FLAG_SSE4) {
|
||||||
c->vp8_idct_dc_add = ff_vp8_idct_dc_add_sse4;
|
c->vp8_idct_dc_add = ff_vp8_idct_dc_add_sse4;
|
||||||
|
|
||||||
c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse4;
|
c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse4;
|
||||||
|
@ -35,24 +35,24 @@ void ff_scale_samples_s32_avx(uint8_t *dst, const uint8_t *src, int len,
|
|||||||
|
|
||||||
av_cold void ff_volume_init_x86(VolumeContext *vol)
|
av_cold void ff_volume_init_x86(VolumeContext *vol)
|
||||||
{
|
{
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
enum AVSampleFormat sample_fmt = av_get_packed_sample_fmt(vol->sample_fmt);
|
enum AVSampleFormat sample_fmt = av_get_packed_sample_fmt(vol->sample_fmt);
|
||||||
|
|
||||||
if (sample_fmt == AV_SAMPLE_FMT_S16) {
|
if (sample_fmt == AV_SAMPLE_FMT_S16) {
|
||||||
if (EXTERNAL_SSE2(mm_flags) && vol->volume_i < 32768) {
|
if (EXTERNAL_SSE2(cpu_flags) && vol->volume_i < 32768) {
|
||||||
vol->scale_samples = ff_scale_samples_s16_sse2;
|
vol->scale_samples = ff_scale_samples_s16_sse2;
|
||||||
vol->samples_align = 8;
|
vol->samples_align = 8;
|
||||||
}
|
}
|
||||||
} else if (sample_fmt == AV_SAMPLE_FMT_S32) {
|
} else if (sample_fmt == AV_SAMPLE_FMT_S32) {
|
||||||
if (EXTERNAL_SSE2(mm_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
vol->scale_samples = ff_scale_samples_s32_sse2;
|
vol->scale_samples = ff_scale_samples_s32_sse2;
|
||||||
vol->samples_align = 4;
|
vol->samples_align = 4;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSSE3(mm_flags) && mm_flags & AV_CPU_FLAG_ATOM) {
|
if (EXTERNAL_SSSE3(cpu_flags) && cpu_flags & AV_CPU_FLAG_ATOM) {
|
||||||
vol->scale_samples = ff_scale_samples_s32_ssse3_atom;
|
vol->scale_samples = ff_scale_samples_s32_ssse3_atom;
|
||||||
vol->samples_align = 4;
|
vol->samples_align = 4;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_AVX(mm_flags)) {
|
if (EXTERNAL_AVX(cpu_flags)) {
|
||||||
vol->scale_samples = ff_scale_samples_s32_avx;
|
vol->scale_samples = ff_scale_samples_s32_avx;
|
||||||
vol->samples_align = 8;
|
vol->samples_align = 8;
|
||||||
}
|
}
|
||||||
|
@ -145,15 +145,15 @@ void ff_conv_flt_to_fltp_6ch_avx (float *const *dst, float *src, int len,
|
|||||||
|
|
||||||
av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
|
av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
|
||||||
{
|
{
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (EXTERNAL_MMX(mm_flags)) {
|
if (EXTERNAL_MMX(cpu_flags)) {
|
||||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32,
|
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32,
|
||||||
0, 1, 8, "MMX", ff_conv_s32_to_s16_mmx);
|
0, 1, 8, "MMX", ff_conv_s32_to_s16_mmx);
|
||||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
|
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
|
||||||
6, 1, 4, "MMX", ff_conv_fltp_to_flt_6ch_mmx);
|
6, 1, 4, "MMX", ff_conv_fltp_to_flt_6ch_mmx);
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSE(mm_flags)) {
|
if (EXTERNAL_SSE(cpu_flags)) {
|
||||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
|
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
|
||||||
6, 1, 2, "SSE", ff_conv_fltp_to_s16_6ch_sse);
|
6, 1, 2, "SSE", ff_conv_fltp_to_s16_6ch_sse);
|
||||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
|
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
|
||||||
@ -161,8 +161,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
|
|||||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
|
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
|
||||||
2, 16, 4, "SSE", ff_conv_flt_to_fltp_2ch_sse);
|
2, 16, 4, "SSE", ff_conv_flt_to_fltp_2ch_sse);
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSE2(mm_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
|
if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW)) {
|
||||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32,
|
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32,
|
||||||
0, 16, 16, "SSE2", ff_conv_s32_to_s16_sse2);
|
0, 16, 16, "SSE2", ff_conv_s32_to_s16_sse2);
|
||||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
|
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
|
||||||
@ -206,7 +206,7 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
|
|||||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
|
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT,
|
||||||
6, 16, 4, "SSE2", ff_conv_flt_to_fltp_6ch_sse2);
|
6, 16, 4, "SSE2", ff_conv_flt_to_fltp_6ch_sse2);
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSSE3(mm_flags)) {
|
if (EXTERNAL_SSSE3(cpu_flags)) {
|
||||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
|
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
|
||||||
6, 16, 4, "SSSE3", ff_conv_s16p_to_flt_6ch_ssse3);
|
6, 16, 4, "SSSE3", ff_conv_s16p_to_flt_6ch_ssse3);
|
||||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
|
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP,
|
||||||
@ -220,13 +220,13 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
|
|||||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
|
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
|
||||||
6, 16, 4, "SSSE3", ff_conv_flt_to_s16p_6ch_ssse3);
|
6, 16, 4, "SSSE3", ff_conv_flt_to_s16p_6ch_ssse3);
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSE4(mm_flags)) {
|
if (EXTERNAL_SSE4(cpu_flags)) {
|
||||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
|
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16,
|
||||||
0, 16, 8, "SSE4", ff_conv_s16_to_flt_sse4);
|
0, 16, 8, "SSE4", ff_conv_s16_to_flt_sse4);
|
||||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
|
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
|
||||||
6, 16, 4, "SSE4", ff_conv_fltp_to_flt_6ch_sse4);
|
6, 16, 4, "SSE4", ff_conv_fltp_to_flt_6ch_sse4);
|
||||||
}
|
}
|
||||||
if (EXTERNAL_AVX(mm_flags)) {
|
if (EXTERNAL_AVX(cpu_flags)) {
|
||||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32,
|
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32,
|
||||||
0, 32, 16, "AVX", ff_conv_s32_to_flt_avx);
|
0, 32, 16, "AVX", ff_conv_s32_to_flt_avx);
|
||||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT,
|
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT,
|
||||||
|
@ -106,7 +106,7 @@ DEFINE_MIX_3_8_TO_1_2(7)
|
|||||||
DEFINE_MIX_3_8_TO_1_2(8)
|
DEFINE_MIX_3_8_TO_1_2(8)
|
||||||
|
|
||||||
#define SET_MIX_3_8_TO_1_2(chan) \
|
#define SET_MIX_3_8_TO_1_2(chan) \
|
||||||
if (EXTERNAL_SSE(mm_flags)) { \
|
if (EXTERNAL_SSE(cpu_flags)) { \
|
||||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
|
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
|
||||||
chan, 1, 16, 4, "SSE", \
|
chan, 1, 16, 4, "SSE", \
|
||||||
ff_mix_ ## chan ## _to_1_fltp_flt_sse); \
|
ff_mix_ ## chan ## _to_1_fltp_flt_sse); \
|
||||||
@ -114,7 +114,7 @@ DEFINE_MIX_3_8_TO_1_2(8)
|
|||||||
chan, 2, 16, 4, "SSE", \
|
chan, 2, 16, 4, "SSE", \
|
||||||
ff_mix_## chan ##_to_2_fltp_flt_sse); \
|
ff_mix_## chan ##_to_2_fltp_flt_sse); \
|
||||||
} \
|
} \
|
||||||
if (EXTERNAL_SSE2(mm_flags)) { \
|
if (EXTERNAL_SSE2(cpu_flags)) { \
|
||||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
|
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
|
||||||
chan, 1, 16, 8, "SSE2", \
|
chan, 1, 16, 8, "SSE2", \
|
||||||
ff_mix_ ## chan ## _to_1_s16p_flt_sse2); \
|
ff_mix_ ## chan ## _to_1_s16p_flt_sse2); \
|
||||||
@ -122,7 +122,7 @@ DEFINE_MIX_3_8_TO_1_2(8)
|
|||||||
chan, 2, 16, 8, "SSE2", \
|
chan, 2, 16, 8, "SSE2", \
|
||||||
ff_mix_ ## chan ## _to_2_s16p_flt_sse2); \
|
ff_mix_ ## chan ## _to_2_s16p_flt_sse2); \
|
||||||
} \
|
} \
|
||||||
if (EXTERNAL_SSE4(mm_flags)) { \
|
if (EXTERNAL_SSE4(cpu_flags)) { \
|
||||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
|
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
|
||||||
chan, 1, 16, 8, "SSE4", \
|
chan, 1, 16, 8, "SSE4", \
|
||||||
ff_mix_ ## chan ## _to_1_s16p_flt_sse4); \
|
ff_mix_ ## chan ## _to_1_s16p_flt_sse4); \
|
||||||
@ -130,7 +130,7 @@ DEFINE_MIX_3_8_TO_1_2(8)
|
|||||||
chan, 2, 16, 8, "SSE4", \
|
chan, 2, 16, 8, "SSE4", \
|
||||||
ff_mix_ ## chan ## _to_2_s16p_flt_sse4); \
|
ff_mix_ ## chan ## _to_2_s16p_flt_sse4); \
|
||||||
} \
|
} \
|
||||||
if (EXTERNAL_AVX(mm_flags)) { \
|
if (EXTERNAL_AVX(cpu_flags)) { \
|
||||||
int ptr_align = 32; \
|
int ptr_align = 32; \
|
||||||
int smp_align = 8; \
|
int smp_align = 8; \
|
||||||
if (ARCH_X86_32 || chan >= 6) { \
|
if (ARCH_X86_32 || chan >= 6) { \
|
||||||
@ -150,7 +150,7 @@ DEFINE_MIX_3_8_TO_1_2(8)
|
|||||||
chan, 2, 16, 8, "AVX", \
|
chan, 2, 16, 8, "AVX", \
|
||||||
ff_mix_ ## chan ## _to_2_s16p_flt_avx); \
|
ff_mix_ ## chan ## _to_2_s16p_flt_avx); \
|
||||||
} \
|
} \
|
||||||
if (EXTERNAL_FMA4(mm_flags)) { \
|
if (EXTERNAL_FMA4(cpu_flags)) { \
|
||||||
int ptr_align = 32; \
|
int ptr_align = 32; \
|
||||||
int smp_align = 8; \
|
int smp_align = 8; \
|
||||||
if (ARCH_X86_32 || chan >= 6) { \
|
if (ARCH_X86_32 || chan >= 6) { \
|
||||||
@ -174,15 +174,15 @@ DEFINE_MIX_3_8_TO_1_2(8)
|
|||||||
av_cold void ff_audio_mix_init_x86(AudioMix *am)
|
av_cold void ff_audio_mix_init_x86(AudioMix *am)
|
||||||
{
|
{
|
||||||
#if HAVE_YASM
|
#if HAVE_YASM
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (EXTERNAL_SSE(mm_flags)) {
|
if (EXTERNAL_SSE(cpu_flags)) {
|
||||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
|
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
|
||||||
2, 1, 16, 8, "SSE", ff_mix_2_to_1_fltp_flt_sse);
|
2, 1, 16, 8, "SSE", ff_mix_2_to_1_fltp_flt_sse);
|
||||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
|
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
|
||||||
1, 2, 16, 4, "SSE", ff_mix_1_to_2_fltp_flt_sse);
|
1, 2, 16, 4, "SSE", ff_mix_1_to_2_fltp_flt_sse);
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSE2(mm_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
|
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
|
||||||
2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_flt_sse2);
|
2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_flt_sse2);
|
||||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_Q8,
|
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_Q8,
|
||||||
@ -190,13 +190,13 @@ av_cold void ff_audio_mix_init_x86(AudioMix *am)
|
|||||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
|
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
|
||||||
1, 2, 16, 8, "SSE2", ff_mix_1_to_2_s16p_flt_sse2);
|
1, 2, 16, 8, "SSE2", ff_mix_1_to_2_s16p_flt_sse2);
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSE4(mm_flags)) {
|
if (EXTERNAL_SSE4(cpu_flags)) {
|
||||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
|
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
|
||||||
2, 1, 16, 8, "SSE4", ff_mix_2_to_1_s16p_flt_sse4);
|
2, 1, 16, 8, "SSE4", ff_mix_2_to_1_s16p_flt_sse4);
|
||||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
|
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
|
||||||
1, 2, 16, 8, "SSE4", ff_mix_1_to_2_s16p_flt_sse4);
|
1, 2, 16, 8, "SSE4", ff_mix_1_to_2_s16p_flt_sse4);
|
||||||
}
|
}
|
||||||
if (EXTERNAL_AVX(mm_flags)) {
|
if (EXTERNAL_AVX(cpu_flags)) {
|
||||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
|
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
|
||||||
2, 1, 32, 16, "AVX", ff_mix_2_to_1_fltp_flt_avx);
|
2, 1, 32, 16, "AVX", ff_mix_2_to_1_fltp_flt_avx);
|
||||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
|
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
|
||||||
|
@ -34,26 +34,26 @@ void ff_dither_int_to_float_triangular_avx(float *dst, int *src0, int len);
|
|||||||
av_cold void ff_dither_init_x86(DitherDSPContext *ddsp,
|
av_cold void ff_dither_init_x86(DitherDSPContext *ddsp,
|
||||||
enum AVResampleDitherMethod method)
|
enum AVResampleDitherMethod method)
|
||||||
{
|
{
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (EXTERNAL_SSE2(mm_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
ddsp->quantize = ff_quantize_sse2;
|
ddsp->quantize = ff_quantize_sse2;
|
||||||
ddsp->ptr_align = 16;
|
ddsp->ptr_align = 16;
|
||||||
ddsp->samples_align = 8;
|
ddsp->samples_align = 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (method == AV_RESAMPLE_DITHER_RECTANGULAR) {
|
if (method == AV_RESAMPLE_DITHER_RECTANGULAR) {
|
||||||
if (EXTERNAL_SSE2(mm_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
ddsp->dither_int_to_float = ff_dither_int_to_float_rectangular_sse2;
|
ddsp->dither_int_to_float = ff_dither_int_to_float_rectangular_sse2;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_AVX(mm_flags)) {
|
if (EXTERNAL_AVX(cpu_flags)) {
|
||||||
ddsp->dither_int_to_float = ff_dither_int_to_float_rectangular_avx;
|
ddsp->dither_int_to_float = ff_dither_int_to_float_rectangular_avx;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (EXTERNAL_SSE2(mm_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
ddsp->dither_int_to_float = ff_dither_int_to_float_triangular_sse2;
|
ddsp->dither_int_to_float = ff_dither_int_to_float_triangular_sse2;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_AVX(mm_flags)) {
|
if (EXTERNAL_AVX(cpu_flags)) {
|
||||||
ddsp->dither_int_to_float = ff_dither_int_to_float_triangular_avx;
|
ddsp->dither_int_to_float = ff_dither_int_to_float_triangular_avx;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -27,9 +27,7 @@
|
|||||||
av_cold void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int bit_exact)
|
av_cold void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int bit_exact)
|
||||||
{
|
{
|
||||||
#if HAVE_ALTIVEC
|
#if HAVE_ALTIVEC
|
||||||
int mm_flags = av_get_cpu_flags();
|
if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
|
||||||
|
|
||||||
if (!(mm_flags & AV_CPU_FLAG_ALTIVEC))
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
fdsp->vector_fmul = ff_vector_fmul_altivec;
|
fdsp->vector_fmul = ff_vector_fmul_altivec;
|
||||||
|
@ -124,17 +124,17 @@ static void vector_fmul_window_sse(float *dst, const float *src0,
|
|||||||
|
|
||||||
av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
|
av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
|
||||||
{
|
{
|
||||||
int mm_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
#if HAVE_6REGS && HAVE_INLINE_ASM
|
#if HAVE_6REGS && HAVE_INLINE_ASM
|
||||||
if (INLINE_AMD3DNOWEXT(mm_flags)) {
|
if (INLINE_AMD3DNOWEXT(cpu_flags)) {
|
||||||
fdsp->vector_fmul_window = vector_fmul_window_3dnowext;
|
fdsp->vector_fmul_window = vector_fmul_window_3dnowext;
|
||||||
}
|
}
|
||||||
if (INLINE_SSE(mm_flags)) {
|
if (INLINE_SSE(cpu_flags)) {
|
||||||
fdsp->vector_fmul_window = vector_fmul_window_sse;
|
fdsp->vector_fmul_window = vector_fmul_window_sse;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (EXTERNAL_SSE(mm_flags)) {
|
if (EXTERNAL_SSE(cpu_flags)) {
|
||||||
fdsp->vector_fmul = ff_vector_fmul_sse;
|
fdsp->vector_fmul = ff_vector_fmul_sse;
|
||||||
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse;
|
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse;
|
||||||
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse;
|
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse;
|
||||||
@ -143,10 +143,10 @@ av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
|
|||||||
fdsp->scalarproduct_float = ff_scalarproduct_float_sse;
|
fdsp->scalarproduct_float = ff_scalarproduct_float_sse;
|
||||||
fdsp->butterflies_float = ff_butterflies_float_sse;
|
fdsp->butterflies_float = ff_butterflies_float_sse;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_SSE2(mm_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;
|
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_AVX(mm_flags)) {
|
if (EXTERNAL_AVX(cpu_flags)) {
|
||||||
fdsp->vector_fmul = ff_vector_fmul_avx;
|
fdsp->vector_fmul = ff_vector_fmul_avx;
|
||||||
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx;
|
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx;
|
||||||
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_avx;
|
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_avx;
|
||||||
|
Loading…
Reference in New Issue
Block a user