From 5169e688956be3378adb3b16a93962fe0048f1c9 Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Mon, 30 Dec 2013 12:09:03 +0100 Subject: [PATCH] dsputil: Propagate bit depth information to all (sub)init functions This avoids recalculating the value over and over again. --- libavcodec/arm/dsputil_arm.h | 9 ++++--- libavcodec/arm/dsputil_init_arm.c | 11 ++++---- libavcodec/arm/dsputil_init_armv5te.c | 5 ++-- libavcodec/arm/dsputil_init_armv6.c | 20 +++++++-------- libavcodec/arm/dsputil_init_neon.c | 7 +++-- libavcodec/bfin/dsputil_init.c | 12 ++++----- libavcodec/dsputil.c | 10 +++++--- libavcodec/dsputil.h | 12 ++++++--- libavcodec/ppc/dsputil_altivec.c | 5 ++-- libavcodec/ppc/dsputil_altivec.h | 3 ++- libavcodec/ppc/dsputil_ppc.c | 19 ++++++-------- libavcodec/x86/dsputil_init.c | 37 +++++++++++---------------- libavcodec/x86/dsputil_x86.h | 3 ++- libavcodec/x86/dsputilenc_mmx.c | 15 +++++------ 14 files changed, 83 insertions(+), 85 deletions(-) diff --git a/libavcodec/arm/dsputil_arm.h b/libavcodec/arm/dsputil_arm.h index cc14b2c4f3..6080203960 100644 --- a/libavcodec/arm/dsputil_arm.h +++ b/libavcodec/arm/dsputil_arm.h @@ -24,8 +24,11 @@ #include "libavcodec/avcodec.h" #include "libavcodec/dsputil.h" -void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx); -void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx); -void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx); +void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); +void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); +void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); #endif /* AVCODEC_ARM_DSPUTIL_ARM_H */ diff --git a/libavcodec/arm/dsputil_init_arm.c b/libavcodec/arm/dsputil_init_arm.c index 2c348ecb0d..33109088ee 100644 --- a/libavcodec/arm/dsputil_init_arm.c +++ b/libavcodec/arm/dsputil_init_arm.c @@ -64,14 +64,15 @@ static void simple_idct_arm_add(uint8_t *dest, int line_size, int16_t *block) ff_add_pixels_clamped(block, dest, line_size); } -av_cold void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx) +av_cold void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) { int cpu_flags = av_get_cpu_flags(); ff_put_pixels_clamped = c->put_pixels_clamped; ff_add_pixels_clamped = c->add_pixels_clamped; - if (avctx->bits_per_raw_sample <= 8) { + if (!high_bit_depth) { if (avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_ARM) { c->idct_put = j_rev_dct_arm_put; @@ -89,9 +90,9 @@ av_cold void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx) c->add_pixels_clamped = ff_add_pixels_clamped_arm; if (have_armv5te(cpu_flags)) - ff_dsputil_init_armv5te(c, avctx); + ff_dsputil_init_armv5te(c, avctx, high_bit_depth); if (have_armv6(cpu_flags)) - ff_dsputil_init_armv6(c, avctx); + ff_dsputil_init_armv6(c, avctx, high_bit_depth); if (have_neon(cpu_flags)) - ff_dsputil_init_neon(c, avctx); + ff_dsputil_init_neon(c, avctx, high_bit_depth); } diff --git a/libavcodec/arm/dsputil_init_armv5te.c b/libavcodec/arm/dsputil_init_armv5te.c index ac6f1cb629..eb45b72088 100644 --- a/libavcodec/arm/dsputil_init_armv5te.c +++ b/libavcodec/arm/dsputil_init_armv5te.c @@ -29,9 +29,10 @@ void ff_simple_idct_armv5te(int16_t *data); void ff_simple_idct_put_armv5te(uint8_t *dest, int line_size, int16_t *data); void ff_simple_idct_add_armv5te(uint8_t *dest, int line_size, int16_t *data); -av_cold void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx) +av_cold void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) { - if (avctx->bits_per_raw_sample <= 8 && + if (!high_bit_depth && (avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_SIMPLEARMV5TE)) { c->idct_put = ff_simple_idct_put_armv5te; diff --git a/libavcodec/arm/dsputil_init_armv6.c b/libavcodec/arm/dsputil_init_armv6.c index f412ff2a94..d385e78fe3 100644 --- a/libavcodec/arm/dsputil_init_armv6.c +++ b/libavcodec/arm/dsputil_init_armv6.c @@ -52,17 +52,17 @@ int ff_sse16_armv6(void *s, uint8_t *blk1, uint8_t *blk2, int ff_pix_norm1_armv6(uint8_t *pix, int line_size); int ff_pix_sum_armv6(uint8_t *pix, int line_size); -av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx) +av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) { - const int high_bit_depth = avctx->bits_per_raw_sample > 8; - - if (avctx->bits_per_raw_sample <= 8 && - (avctx->idct_algo == FF_IDCT_AUTO || - avctx->idct_algo == FF_IDCT_SIMPLEARMV6)) { - c->idct_put = ff_simple_idct_put_armv6; - c->idct_add = ff_simple_idct_add_armv6; - c->idct = ff_simple_idct_armv6; - c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; + if (!high_bit_depth) { + if (avctx->idct_algo == FF_IDCT_AUTO || + avctx->idct_algo == FF_IDCT_SIMPLEARMV6) { + c->idct_put = ff_simple_idct_put_armv6; + c->idct_add = ff_simple_idct_add_armv6; + c->idct = ff_simple_idct_armv6; + c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; + } } c->add_pixels_clamped = ff_add_pixels_clamped_armv6; diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index c39fcc172a..16e052dddd 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -47,11 +47,10 @@ int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int le int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2, const int16_t *v3, int len, int mul); -av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) +av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) { - const int high_bit_depth = avctx->bits_per_raw_sample > 8; - - if (avctx->bits_per_raw_sample <= 8) { + if (!high_bit_depth) { if (avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_SIMPLENEON) { c->idct_put = ff_simple_idct_put_neon; diff --git a/libavcodec/bfin/dsputil_init.c b/libavcodec/bfin/dsputil_init.c index 7a214a30bb..751d54aa38 100644 --- a/libavcodec/bfin/dsputil_init.c +++ b/libavcodec/bfin/dsputil_init.c @@ -147,18 +147,14 @@ static int bfin_pix_abs8_xy2(void *c, uint8_t *blk1, uint8_t *blk2, * 2.64s 2/20 same sman.mp4 decode only */ -av_cold void ff_dsputil_init_bfin(DSPContext *c, AVCodecContext *avctx) +av_cold void ff_dsputil_init_bfin(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) { - const int high_bit_depth = avctx->bits_per_raw_sample > 8; - c->diff_pixels = ff_bfin_diff_pixels; c->put_pixels_clamped = ff_bfin_put_pixels_clamped; c->add_pixels_clamped = ff_bfin_add_pixels_clamped; - if (!high_bit_depth) - c->get_pixels = ff_bfin_get_pixels; - c->clear_blocks = bfin_clear_blocks; c->pix_sum = ff_bfin_pix_sum; @@ -182,7 +178,9 @@ av_cold void ff_dsputil_init_bfin(DSPContext *c, AVCodecContext *avctx) c->sse[1] = ff_bfin_sse8; c->sse[2] = ff_bfin_sse4; - if (avctx->bits_per_raw_sample <= 8) { + if (!high_bit_depth) { + c->get_pixels = ff_bfin_get_pixels; + if (avctx->dct_algo == FF_DCT_AUTO) c->fdct = ff_bfin_fdct; diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index a3b795d879..76f20e0ad7 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -2715,6 +2715,8 @@ av_cold void ff_dsputil_static_init(void) av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) { + const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8; + #if CONFIG_ENCODERS if (avctx->bits_per_raw_sample == 10) { c->fdct = ff_jpeg_fdct_islow_10; @@ -2924,13 +2926,13 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) } if (ARCH_ARM) - ff_dsputil_init_arm(c, avctx); + ff_dsputil_init_arm(c, avctx, high_bit_depth); if (ARCH_BFIN) - ff_dsputil_init_bfin(c, avctx); + ff_dsputil_init_bfin(c, avctx, high_bit_depth); if (ARCH_PPC) - ff_dsputil_init_ppc(c, avctx); + ff_dsputil_init_ppc(c, avctx, high_bit_depth); if (ARCH_X86) - ff_dsputil_init_x86(c, avctx); + ff_dsputil_init_x86(c, avctx, high_bit_depth); ff_init_scantable_permutation(c->idct_permutation, c->idct_permutation_type); diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 2e258ac4f6..f8ac5eff5a 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -347,9 +347,13 @@ void ff_dsputil_init(DSPContext *p, AVCodecContext *avctx); void ff_set_cmp(DSPContext *c, me_cmp_func *cmp, int type); -void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx); -void ff_dsputil_init_bfin(DSPContext *c, AVCodecContext *avctx); -void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx); -void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx); +void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); +void ff_dsputil_init_bfin(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); +void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); +void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); #endif /* AVCODEC_DSPUTIL_H */ diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c index a8985fd81a..a2291fb4d9 100644 --- a/libavcodec/ppc/dsputil_altivec.c +++ b/libavcodec/ppc/dsputil_altivec.c @@ -926,10 +926,9 @@ static int hadamard8_diff16_altivec(/* MpegEncContext */ void *s, uint8_t *dst, return score; } -av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx) +av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) { - const int high_bit_depth = avctx->bits_per_raw_sample > 8; - c->pix_abs[0][1] = sad16_x2_altivec; c->pix_abs[0][2] = sad16_y2_altivec; c->pix_abs[0][3] = sad16_xy2_altivec; diff --git a/libavcodec/ppc/dsputil_altivec.h b/libavcodec/ppc/dsputil_altivec.h index c9a7abaef3..929cb7506b 100644 --- a/libavcodec/ppc/dsputil_altivec.h +++ b/libavcodec/ppc/dsputil_altivec.h @@ -38,7 +38,8 @@ void ff_gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, void ff_idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); -void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx); +void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx); #endif /* AVCODEC_PPC_DSPUTIL_ALTIVEC_H */ diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c index 5dd3f2adc7..698f54562f 100644 --- a/libavcodec/ppc/dsputil_ppc.c +++ b/libavcodec/ppc/dsputil_ppc.c @@ -125,10 +125,9 @@ static long check_dcbzl_effect(void) return count; } -av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx) +av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) { - const int high_bit_depth = avctx->bits_per_raw_sample > 8; - // common optimizations whether AltiVec is available or not if (!high_bit_depth) { switch (check_dcbzl_effect()) { @@ -144,19 +143,17 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx) } if (PPC_ALTIVEC(av_get_cpu_flags())) { - ff_dsputil_init_altivec(c, avctx); + ff_dsputil_init_altivec(c, avctx, high_bit_depth); ff_int_init_altivec(c, avctx); c->gmc1 = ff_gmc1_altivec; + if (!high_bit_depth) { #if CONFIG_ENCODERS - if (avctx->bits_per_raw_sample <= 8 && - (avctx->dct_algo == FF_DCT_AUTO || - avctx->dct_algo == FF_DCT_ALTIVEC)) { - c->fdct = ff_fdct_altivec; - } + if (avctx->dct_algo == FF_DCT_AUTO || + avctx->dct_algo == FF_DCT_ALTIVEC) { + c->fdct = ff_fdct_altivec; + } #endif //CONFIG_ENCODERS - - if (avctx->bits_per_raw_sample <= 8) { if ((avctx->idct_algo == FF_IDCT_AUTO) || (avctx->idct_algo == FF_IDCT_ALTIVEC)) { c->idct_put = ff_idct_put_altivec; diff --git a/libavcodec/x86/dsputil_init.c b/libavcodec/x86/dsputil_init.c index 00f89b2525..288d1af0ad 100644 --- a/libavcodec/x86/dsputil_init.c +++ b/libavcodec/x86/dsputil_init.c @@ -518,11 +518,9 @@ do { \ } while (0) static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, - int cpu_flags) + int cpu_flags, unsigned high_bit_depth) { #if HAVE_MMX_INLINE - const int high_bit_depth = avctx->bits_per_raw_sample > 8; - c->put_pixels_clamped = ff_put_pixels_clamped_mmx; c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx; c->add_pixels_clamped = ff_add_pixels_clamped_mmx; @@ -559,11 +557,9 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, } static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, - int cpu_flags) + int cpu_flags, unsigned high_bit_depth) { #if HAVE_MMXEXT_INLINE - const int high_bit_depth = avctx->bits_per_raw_sample > 8; - if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) { c->idct_put = ff_idct_xvid_mmxext_put; c->idct_add = ff_idct_xvid_mmxext_add; @@ -590,11 +586,9 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, } static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, - int cpu_flags) + int cpu_flags, unsigned high_bit_depth) { #if HAVE_SSE_INLINE - const int high_bit_depth = avctx->bits_per_raw_sample > 8; - c->vector_clipf = ff_vector_clipf_sse; #if FF_API_XVMC @@ -613,11 +607,9 @@ FF_ENABLE_DEPRECATION_WARNINGS } static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, - int cpu_flags) + int cpu_flags, unsigned high_bit_depth) { #if HAVE_SSE2_INLINE - const int high_bit_depth = avctx->bits_per_raw_sample > 8; - if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) { c->idct_put = ff_idct_xvid_sse2_put; c->idct_add = ff_idct_xvid_sse2_add; @@ -639,7 +631,7 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, } static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, - int cpu_flags) + int cpu_flags, unsigned high_bit_depth) { #if HAVE_SSSE3_EXTERNAL c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3; @@ -653,14 +645,15 @@ static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, } static av_cold void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx, - int cpu_flags) + int cpu_flags, unsigned high_bit_depth) { #if HAVE_SSE4_EXTERNAL c->vector_clip_int32 = ff_vector_clip_int32_sse4; #endif /* HAVE_SSE4_EXTERNAL */ } -av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx) +av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) { int cpu_flags = av_get_cpu_flags(); @@ -670,23 +663,23 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx) #endif if (X86_MMX(cpu_flags)) - dsputil_init_mmx(c, avctx, cpu_flags); + dsputil_init_mmx(c, avctx, cpu_flags, high_bit_depth); if (X86_MMXEXT(cpu_flags)) - dsputil_init_mmxext(c, avctx, cpu_flags); + dsputil_init_mmxext(c, avctx, cpu_flags, high_bit_depth); if (X86_SSE(cpu_flags)) - dsputil_init_sse(c, avctx, cpu_flags); + dsputil_init_sse(c, avctx, cpu_flags, high_bit_depth); if (X86_SSE2(cpu_flags)) - dsputil_init_sse2(c, avctx, cpu_flags); + dsputil_init_sse2(c, avctx, cpu_flags, high_bit_depth); if (EXTERNAL_SSSE3(cpu_flags)) - dsputil_init_ssse3(c, avctx, cpu_flags); + dsputil_init_ssse3(c, avctx, cpu_flags, high_bit_depth); if (EXTERNAL_SSE4(cpu_flags)) - dsputil_init_sse4(c, avctx, cpu_flags); + dsputil_init_sse4(c, avctx, cpu_flags, high_bit_depth); if (CONFIG_ENCODERS) - ff_dsputilenc_init_mmx(c, avctx); + ff_dsputilenc_init_mmx(c, avctx, high_bit_depth); } diff --git a/libavcodec/x86/dsputil_x86.h b/libavcodec/x86/dsputil_x86.h index 8f1fc17474..8f8ea052b3 100644 --- a/libavcodec/x86/dsputil_x86.h +++ b/libavcodec/x86/dsputil_x86.h @@ -104,7 +104,8 @@ "psubb "#regb", "#regr" \n\t" \ "psubb "#regd", "#regp" \n\t" -void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx); +void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); void ff_dsputil_init_pix_mmx(DSPContext *c, AVCodecContext *avctx); void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c index 2c320371b0..99f094e76e 100644 --- a/libavcodec/x86/dsputilenc_mmx.c +++ b/libavcodec/x86/dsputilenc_mmx.c @@ -986,16 +986,15 @@ hadamard_func(mmxext) hadamard_func(sse2) hadamard_func(ssse3) -av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx) +av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) { int cpu_flags = av_get_cpu_flags(); const int dct_algo = avctx->dct_algo; #if HAVE_YASM - int bit_depth = avctx->bits_per_raw_sample; - if (EXTERNAL_MMX(cpu_flags)) { - if (bit_depth <= 8) + if (!high_bit_depth) c->get_pixels = ff_get_pixels_mmx; c->diff_pixels = ff_diff_pixels_mmx; c->pix_sum = ff_pix_sum16_mmx; @@ -1003,13 +1002,13 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx) } if (EXTERNAL_SSE2(cpu_flags)) - if (bit_depth <= 8) + if (!high_bit_depth) c->get_pixels = ff_get_pixels_sse2; #endif /* HAVE_YASM */ #if HAVE_INLINE_ASM if (INLINE_MMX(cpu_flags)) { - if (avctx->bits_per_raw_sample <= 8 && + if (!high_bit_depth && (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX)) c->fdct = ff_fdct_mmx; @@ -1039,7 +1038,7 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx) } if (INLINE_MMXEXT(cpu_flags)) { - if (avctx->bits_per_raw_sample <= 8 && + if (!high_bit_depth && (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX)) c->fdct = ff_fdct_mmxext; @@ -1054,7 +1053,7 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx) } if (INLINE_SSE2(cpu_flags)) { - if (avctx->bits_per_raw_sample <= 8 && + if (!high_bit_depth && (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX)) c->fdct = ff_fdct_sse2;