1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-19 05:49:09 +02:00
FFmpeg/libavutil/intmath.h
Ganesh Ajjanagadde 55d3e97970 avutil/intmath: use de Bruijn based ff_ctz
It has already been demonstrated that the de Bruijn method has benefits
over the current implementation: commit 971d12b7f9d7be3ca8eb98e6c04ed521f83cbd3c.
That commit implemented it for long long, this extends it to the int version.

Tested with FATE.

Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
2015-10-14 13:39:42 -04:00

172 lines
4.0 KiB
C

/*
* Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVUTIL_INTMATH_H
#define AVUTIL_INTMATH_H
#include <stdint.h>
#include "config.h"
#include "attributes.h"
#if ARCH_ARM
# include "arm/intmath.h"
#endif
#if ARCH_X86
# include "x86/intmath.h"
#endif
#if HAVE_FAST_CLZ
#if defined( __INTEL_COMPILER )
#ifndef ff_log2
# define ff_log2(x) (_bit_scan_reverse((x)|1))
# ifndef ff_log2_16bit
# define ff_log2_16bit av_log2
# endif
#endif /* ff_log2 */
#elif AV_GCC_VERSION_AT_LEAST(3,4)
#ifndef ff_log2
# define ff_log2(x) (31 - __builtin_clz((x)|1))
# ifndef ff_log2_16bit
# define ff_log2_16bit av_log2
# endif
#endif /* ff_log2 */
#endif /* AV_GCC_VERSION_AT_LEAST(3,4) */
#endif
extern const uint8_t ff_log2_tab[256];
#ifndef ff_log2
#define ff_log2 ff_log2_c
#if !defined( _MSC_VER )
static av_always_inline av_const int ff_log2_c(unsigned int v)
{
int n = 0;
if (v & 0xffff0000) {
v >>= 16;
n += 16;
}
if (v & 0xff00) {
v >>= 8;
n += 8;
}
n += ff_log2_tab[v];
return n;
}
#else
static av_always_inline av_const int ff_log2_c(unsigned int v)
{
unsigned long n;
_BitScanReverse(&n, v|1);
return n;
}
#define ff_log2_16bit av_log2
#endif
#endif
#ifndef ff_log2_16bit
#define ff_log2_16bit ff_log2_16bit_c
static av_always_inline av_const int ff_log2_16bit_c(unsigned int v)
{
int n = 0;
if (v & 0xff00) {
v >>= 8;
n += 8;
}
n += ff_log2_tab[v];
return n;
}
#endif
#define av_log2 ff_log2
#define av_log2_16bit ff_log2_16bit
/**
* @addtogroup lavu_math
* @{
*/
#if HAVE_FAST_CLZ
#if defined( __INTEL_COMPILER )
#ifndef ff_ctz
#define ff_ctz(v) _bit_scan_forward(v)
#endif
#elif AV_GCC_VERSION_AT_LEAST(3,4)
#ifndef ff_ctz
#define ff_ctz(v) __builtin_ctz(v)
#endif
#ifndef ff_ctzll
#define ff_ctzll(v) __builtin_ctzll(v)
#endif
#endif
#endif
#ifndef ff_ctz
#define ff_ctz ff_ctz_c
/**
* Trailing zero bit count.
*
* @param v input value. If v is 0, the result is undefined.
* @return the number of trailing 0-bits
*/
#if !defined( _MSC_VER )
/* We use the De-Bruijn method outlined in:
* http://supertech.csail.mit.edu/papers/debruijn.pdf. */
static av_always_inline av_const int ff_ctz_c(int v)
{
static const uint8_t debruijn_ctz32[32] = {
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
};
return debruijn_ctz32[(uint32_t)((v & -v) * 0x077CB531U) >> 27];
}
#else
static av_always_inline av_const int ff_ctz_c( int v )
{
unsigned long c;
_BitScanForward(&c, v);
return c;
}
#endif
#endif
#ifndef ff_ctzll
#define ff_ctzll ff_ctzll_c
/* We use the De-Bruijn method outlined in:
* http://supertech.csail.mit.edu/papers/debruijn.pdf. */
static av_always_inline av_const int ff_ctzll_c(long long v)
{
static const uint8_t debruijn_ctz64[64] = {
0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28,
62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11,
63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10,
51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12
};
return debruijn_ctz64[(uint64_t)((v & -v) * 0x022FDD63CC95386D) >> 58];
}
#endif
/**
* @}
*/
#endif /* AVUTIL_INTMATH_H */