diff --git a/libavcodec/Makefile b/libavcodec/Makefile index d8fc0f0c42..2946e0dbc3 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -24,6 +24,7 @@ OBJS = allcodecs.o \ fmtconvert.o \ imgconvert.o \ jrevdct.o \ + mathtables.o \ options.o \ parser.o \ raw.o \ @@ -744,12 +745,6 @@ OBJS-$(HAVE_PTHREADS) += pthread.o frame_thread_encoder.o OBJS-$(HAVE_W32THREADS) += pthread.o frame_thread_encoder.o OBJS-$(HAVE_OS2THREADS) += pthread.o frame_thread_encoder.o -# inverse.o contains the ff_inverse table definition, which is used by -# the FASTDIV macro (from libavutil); since referencing the external -# table has a negative effect on performance, copy it in libavcodec as -# well. -OBJS-$(!CONFIG_SMALL) += inverse.o - SKIPHEADERS += %_tablegen.h \ %_tables.h \ aac_tablegen_decl.h \ diff --git a/libavcodec/arm/mathops.h b/libavcodec/arm/mathops.h index d67714c496..9313d6bbb0 100644 --- a/libavcodec/arm/mathops.h +++ b/libavcodec/arm/mathops.h @@ -36,6 +36,30 @@ static inline av_const int MULH(int a, int b) __asm__ ("smmul %0, %1, %2" : "=r"(r) : "r"(a), "r"(b)); return r; } + +#define FASTDIV FASTDIV +static av_always_inline av_const int FASTDIV(int a, int b) +{ + int r; + __asm__ ("cmp %2, #2 \n\t" + "ldr %0, [%3, %2, lsl #2] \n\t" + "ite le \n\t" + "lsrle %0, %1, #1 \n\t" + "smmulgt %0, %0, %1 \n\t" + : "=&r"(r) : "r"(a), "r"(b), "r"(ff_inverse) : "cc"); + return r; +} + +#else /* HAVE_ARMV6 */ + +#define FASTDIV FASTDIV +static av_always_inline av_const int FASTDIV(int a, int b) +{ + int r, t; + __asm__ ("umull %1, %0, %2, %3" + : "=&r"(r), "=&r"(t) : "r"(a), "r"(ff_inverse[b])); + return r; +} #endif #define MLS64(d, a, b) MAC64(d, -(a), b) diff --git a/libavcodec/inverse.c b/libavcodec/inverse.c deleted file mode 100644 index 04681d256b..0000000000 --- a/libavcodec/inverse.c +++ /dev/null @@ -1 +0,0 @@ -#include "libavutil/inverse.c" diff --git a/libavcodec/mathops.h b/libavcodec/mathops.h index 2840735c79..7ff055141d 100644 --- a/libavcodec/mathops.h +++ b/libavcodec/mathops.h @@ -22,9 +22,14 @@ #ifndef AVCODEC_MATHOPS_H #define AVCODEC_MATHOPS_H +#include + #include "libavutil/common.h" #include "config.h" +extern const uint32_t ff_inverse[257]; +extern const uint8_t ff_sqrt_tab[256]; + #if ARCH_ARM # include "arm/mathops.h" #elif ARCH_AVR32 @@ -185,4 +190,28 @@ if ((y) < (x)) {\ # define PACK_2S16(a,b) PACK_2U16((a)&0xffff, (b)&0xffff) #endif +#ifndef FASTDIV +# define FASTDIV(a,b) ((uint32_t)((((uint64_t)a) * ff_inverse[b]) >> 32)) +#endif /* FASTDIV */ + +static inline av_const unsigned int ff_sqrt(unsigned int a) +{ + unsigned int b; + + if (a < 255) return (ff_sqrt_tab[a + 1] - 1) >> 4; + else if (a < (1 << 12)) b = ff_sqrt_tab[a >> 4] >> 2; +#if !CONFIG_SMALL + else if (a < (1 << 14)) b = ff_sqrt_tab[a >> 6] >> 1; + else if (a < (1 << 16)) b = ff_sqrt_tab[a >> 8] ; +#endif + else { + int s = av_log2_16bit(a >> 16) >> 1; + unsigned int c = a >> (s + 2); + b = ff_sqrt_tab[c >> (s + 8)]; + b = FASTDIV(c,b) + (b << s); + } + + return b - (a < b * b); +} + #endif /* AVCODEC_MATHOPS_H */ diff --git a/libavutil/inverse.c b/libavcodec/mathtables.c similarity index 79% rename from libavutil/inverse.c rename to libavcodec/mathtables.c index 74c7a933ea..0ebc45f3df 100644 --- a/libavutil/inverse.c +++ b/libavcodec/mathtables.c @@ -1,5 +1,4 @@ /* - * Inverse table * Copyright (c) 2002-2004 Michael Niedermayer * * This file is part of FFmpeg. @@ -58,3 +57,14 @@ const uint32_t ff_inverse[257]={ 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, 16777216 }; + +const uint8_t ff_sqrt_tab[256]={ + 0, 16, 23, 28, 32, 36, 40, 43, 46, 48, 51, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 77, 79, 80, 82, 84, 85, 87, 88, 90, + 91, 92, 94, 95, 96, 98, 99,100,102,103,104,105,107,108,109,110,111,112,114,115,116,117,118,119,120,121,122,123,124,125,126,127, +128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,144,145,146,147,148,149,150,151,151,152,153,154,155,156,156, +157,158,159,160,160,161,162,163,164,164,165,166,167,168,168,169,170,171,171,172,173,174,174,175,176,176,177,178,179,179,180,181, +182,182,183,184,184,185,186,186,187,188,188,189,190,190,191,192,192,193,194,194,195,196,196,197,198,198,199,200,200,201,202,202, +203,204,204,205,205,206,207,207,208,208,209,210,210,211,212,212,213,213,214,215,215,216,216,217,218,218,219,219,220,220,221,222, +222,223,223,224,224,225,226,226,227,227,228,228,229,230,230,231,231,232,232,233,233,234,235,235,236,236,237,237,238,238,239,239, +240,240,241,242,242,243,243,244,244,245,245,246,246,247,247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255,255,255 +}; diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c index 330cf93f3f..013df37971 100644 --- a/libavcodec/motion_est.c +++ b/libavcodec/motion_est.c @@ -30,7 +30,7 @@ #include #include #include -#include "libavutil/intmath.h" + #include "avcodec.h" #include "dsputil.h" #include "mathops.h" diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index cd3ce220c0..2b6d5d89d7 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -27,11 +27,11 @@ * The simplest mpeg encoder (well, it was the simplest!). */ -#include "libavutil/intmath.h" #include "libavutil/imgutils.h" #include "avcodec.h" #include "dsputil.h" #include "internal.h" +#include "mathops.h" #include "mpegvideo.h" #include "mjpegenc.h" #include "msmpeg4.h" diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c index 78e2068fda..108db506c9 100644 --- a/libavcodec/mpegvideo_enc.c +++ b/libavcodec/mpegvideo_enc.c @@ -34,6 +34,7 @@ #include "dsputil.h" #include "mpegvideo.h" #include "h263.h" +#include "mathops.h" #include "mjpegenc.h" #include "msmpeg4.h" #include "faandct.h" diff --git a/libavcodec/ra144.c b/libavcodec/ra144.c index 3b24740b97..1ec1e10e2a 100644 --- a/libavcodec/ra144.c +++ b/libavcodec/ra144.c @@ -22,8 +22,8 @@ #include #include "avcodec.h" #include "celp_filters.h" +#include "mathops.h" #include "ra144.h" -#include "libavutil/common.h" const int16_t ff_gain_val_tab[256][3] = { { 541, 956, 768}, { 877, 581, 568}, { 675,1574, 635}, {1248,1464, 668}, diff --git a/libavcodec/roqaudioenc.c b/libavcodec/roqaudioenc.c index 0129051ad4..d69ccbfbe9 100644 --- a/libavcodec/roqaudioenc.c +++ b/libavcodec/roqaudioenc.c @@ -21,10 +21,10 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "libavutil/intmath.h" #include "avcodec.h" #include "bytestream.h" #include "internal.h" +#include "mathops.h" #define ROQ_FRAME_SIZE 735 #define ROQ_HEADER_SIZE 8 diff --git a/libavutil/Makefile b/libavutil/Makefile index 96eada8425..17b23431cb 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -72,7 +72,6 @@ OBJS = adler32.o \ float_dsp.o \ imgutils.o \ intfloat_readwrite.o \ - inverse.o \ lfg.o \ lls.o \ log.o \ diff --git a/libavutil/arm/intmath.h b/libavutil/arm/intmath.h index 0ab0d4c6f8..0980a6f3b7 100644 --- a/libavutil/arm/intmath.h +++ b/libavutil/arm/intmath.h @@ -30,19 +30,6 @@ #if HAVE_ARMV6 -#define FASTDIV FASTDIV -static av_always_inline av_const int FASTDIV(int a, int b) -{ - int r; - __asm__ ("cmp %2, #2 \n\t" - "ldr %0, [%3, %2, lsl #2] \n\t" - "ite le \n\t" - "lsrle %0, %1, #1 \n\t" - "smmulgt %0, %0, %1 \n\t" - : "=&r"(r) : "r"(a), "r"(b), "r"(ff_inverse) : "cc"); - return r; -} - #define av_clip_uint8 av_clip_uint8_arm static av_always_inline av_const unsigned av_clip_uint8_arm(int a) { @@ -99,17 +86,6 @@ static av_always_inline int av_sat_dadd32_arm(int a, int b) return r; } -#else /* HAVE_ARMV6 */ - -#define FASTDIV FASTDIV -static av_always_inline av_const int FASTDIV(int a, int b) -{ - int r, t; - __asm__ ("umull %1, %0, %2, %3" - : "=&r"(r), "=&r"(t) : "r"(a), "r"(ff_inverse[b])); - return r; -} - #endif /* HAVE_ARMV6 */ #if HAVE_ASM_MOD_Q diff --git a/libavutil/intmath.h b/libavutil/intmath.h index 9cba406abd..da333bc6d2 100644 --- a/libavutil/intmath.h +++ b/libavutil/intmath.h @@ -21,7 +21,6 @@ #ifndef AVUTIL_INTMATH_H #define AVUTIL_INTMATH_H -#include #include "config.h" #include "attributes.h" @@ -30,12 +29,8 @@ * @{ */ -extern const uint32_t ff_inverse[257]; - #if ARCH_ARM # include "arm/intmath.h" -#elif ARCH_X86 -# include "x86/intmath.h" #endif #if HAVE_FAST_CLZ && AV_GCC_VERSION_AT_LEAST(3,4) @@ -49,34 +44,6 @@ extern const uint32_t ff_inverse[257]; #endif /* AV_GCC_VERSION_AT_LEAST(3,4) */ -#ifndef FASTDIV -# define FASTDIV(a,b) ((uint32_t)((((uint64_t)a) * ff_inverse[b]) >> 32)) -#endif /* FASTDIV */ - -#include "common.h" - -extern const uint8_t ff_sqrt_tab[256]; - -static inline av_const unsigned int ff_sqrt(unsigned int a) -{ - unsigned int b; - - if (a < 255) return (ff_sqrt_tab[a + 1] - 1) >> 4; - else if (a < (1 << 12)) b = ff_sqrt_tab[a >> 4] >> 2; -#if !CONFIG_SMALL - else if (a < (1 << 14)) b = ff_sqrt_tab[a >> 6] >> 1; - else if (a < (1 << 16)) b = ff_sqrt_tab[a >> 8] ; -#endif - else { - int s = av_log2_16bit(a >> 16) >> 1; - unsigned int c = a >> (s + 2); - b = ff_sqrt_tab[c >> (s + 8)]; - b = FASTDIV(c,b) + (b << s); - } - - return b - (a < b * b); -} - /** * @} */ diff --git a/libavutil/mathematics.c b/libavutil/mathematics.c index 2e6a5fa8ae..5dfc59d838 100644 --- a/libavutil/mathematics.c +++ b/libavutil/mathematics.c @@ -29,17 +29,6 @@ #include "libavutil/common.h" #include "avassert.h" -const uint8_t ff_sqrt_tab[256]={ - 0, 16, 23, 28, 32, 36, 40, 43, 46, 48, 51, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 77, 79, 80, 82, 84, 85, 87, 88, 90, - 91, 92, 94, 95, 96, 98, 99,100,102,103,104,105,107,108,109,110,111,112,114,115,116,117,118,119,120,121,122,123,124,125,126,127, -128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,144,145,146,147,148,149,150,151,151,152,153,154,155,156,156, -157,158,159,160,160,161,162,163,164,164,165,166,167,168,168,169,170,171,171,172,173,174,174,175,176,176,177,178,179,179,180,181, -182,182,183,184,184,185,186,186,187,188,188,189,190,190,191,192,192,193,194,194,195,196,196,197,198,198,199,200,200,201,202,202, -203,204,204,205,205,206,207,207,208,208,209,210,210,211,212,212,213,213,214,215,215,216,216,217,218,218,219,219,220,220,221,222, -222,223,223,224,224,225,226,226,227,227,228,228,229,230,230,231,231,232,232,233,233,234,235,235,236,236,237,237,238,238,239,239, -240,240,241,242,242,243,243,244,244,245,245,246,246,247,247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255,255,255 -}; - const uint8_t ff_log2_tab[256]={ 0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, diff --git a/libavutil/x86/intmath.h b/libavutil/x86/intmath.h deleted file mode 100644 index 4525a4dae5..0000000000 --- a/libavutil/x86/intmath.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2010 Mans Rullgard - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef AVUTIL_X86_INTMATH_H -#define AVUTIL_X86_INTMATH_H - -#if HAVE_INLINE_ASM -#define FASTDIV(a,b) \ - ({\ - int ret, dmy;\ - __asm__ volatile(\ - "mull %3"\ - :"=d"(ret), "=a"(dmy)\ - :"1"((unsigned int)(a)), "rm"(ff_inverse[b])\ - );\ - ret;\ - }) -#endif - -#endif /* AVUTIL_X86_INTMATH_H */