1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-03 05:10:03 +02:00
FFmpeg/libavutil/mathematics.c
Michael Niedermayer a18eff49c0 av_compare_ts: Improve speed when calculations fit in 64bit.
about 110 cpu cycles before 60 cpu cycles afterwards.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
2011-05-11 20:41:19 +02:00

185 lines
6.8 KiB
C

/*
* Copyright (c) 2005 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* miscellaneous math routines and tables
*/
#include <assert.h>
#include <stdint.h>
#include <limits.h>
#include "mathematics.h"
#include "libavutil/common.h"
const uint8_t ff_sqrt_tab[256]={
0, 16, 23, 28, 32, 36, 40, 43, 46, 48, 51, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 77, 79, 80, 82, 84, 85, 87, 88, 90,
91, 92, 94, 95, 96, 98, 99,100,102,103,104,105,107,108,109,110,111,112,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,144,145,146,147,148,149,150,151,151,152,153,154,155,156,156,
157,158,159,160,160,161,162,163,164,164,165,166,167,168,168,169,170,171,171,172,173,174,174,175,176,176,177,178,179,179,180,181,
182,182,183,184,184,185,186,186,187,188,188,189,190,190,191,192,192,193,194,194,195,196,196,197,198,198,199,200,200,201,202,202,
203,204,204,205,205,206,207,207,208,208,209,210,210,211,212,212,213,213,214,215,215,216,216,217,218,218,219,219,220,220,221,222,
222,223,223,224,224,225,226,226,227,227,228,228,229,230,230,231,231,232,232,233,233,234,235,235,236,236,237,237,238,238,239,239,
240,240,241,242,242,243,243,244,244,245,245,246,246,247,247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255,255,255
};
const uint8_t ff_log2_tab[256]={
0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
};
const uint8_t av_reverse[256]={
0x00,0x80,0x40,0xC0,0x20,0xA0,0x60,0xE0,0x10,0x90,0x50,0xD0,0x30,0xB0,0x70,0xF0,
0x08,0x88,0x48,0xC8,0x28,0xA8,0x68,0xE8,0x18,0x98,0x58,0xD8,0x38,0xB8,0x78,0xF8,
0x04,0x84,0x44,0xC4,0x24,0xA4,0x64,0xE4,0x14,0x94,0x54,0xD4,0x34,0xB4,0x74,0xF4,
0x0C,0x8C,0x4C,0xCC,0x2C,0xAC,0x6C,0xEC,0x1C,0x9C,0x5C,0xDC,0x3C,0xBC,0x7C,0xFC,
0x02,0x82,0x42,0xC2,0x22,0xA2,0x62,0xE2,0x12,0x92,0x52,0xD2,0x32,0xB2,0x72,0xF2,
0x0A,0x8A,0x4A,0xCA,0x2A,0xAA,0x6A,0xEA,0x1A,0x9A,0x5A,0xDA,0x3A,0xBA,0x7A,0xFA,
0x06,0x86,0x46,0xC6,0x26,0xA6,0x66,0xE6,0x16,0x96,0x56,0xD6,0x36,0xB6,0x76,0xF6,
0x0E,0x8E,0x4E,0xCE,0x2E,0xAE,0x6E,0xEE,0x1E,0x9E,0x5E,0xDE,0x3E,0xBE,0x7E,0xFE,
0x01,0x81,0x41,0xC1,0x21,0xA1,0x61,0xE1,0x11,0x91,0x51,0xD1,0x31,0xB1,0x71,0xF1,
0x09,0x89,0x49,0xC9,0x29,0xA9,0x69,0xE9,0x19,0x99,0x59,0xD9,0x39,0xB9,0x79,0xF9,
0x05,0x85,0x45,0xC5,0x25,0xA5,0x65,0xE5,0x15,0x95,0x55,0xD5,0x35,0xB5,0x75,0xF5,
0x0D,0x8D,0x4D,0xCD,0x2D,0xAD,0x6D,0xED,0x1D,0x9D,0x5D,0xDD,0x3D,0xBD,0x7D,0xFD,
0x03,0x83,0x43,0xC3,0x23,0xA3,0x63,0xE3,0x13,0x93,0x53,0xD3,0x33,0xB3,0x73,0xF3,
0x0B,0x8B,0x4B,0xCB,0x2B,0xAB,0x6B,0xEB,0x1B,0x9B,0x5B,0xDB,0x3B,0xBB,0x7B,0xFB,
0x07,0x87,0x47,0xC7,0x27,0xA7,0x67,0xE7,0x17,0x97,0x57,0xD7,0x37,0xB7,0x77,0xF7,
0x0F,0x8F,0x4F,0xCF,0x2F,0xAF,0x6F,0xEF,0x1F,0x9F,0x5F,0xDF,0x3F,0xBF,0x7F,0xFF,
};
int64_t av_gcd(int64_t a, int64_t b){
if(b) return av_gcd(b, a%b);
else return a;
}
int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd){
int64_t r=0;
assert(c > 0);
assert(b >=0);
assert((unsigned)rnd<=5 && rnd!=4);
if(a<0 && a != INT64_MIN) return -av_rescale_rnd(-a, b, c, rnd ^ ((rnd>>1)&1));
if(rnd==AV_ROUND_NEAR_INF) r= c/2;
else if(rnd&1) r= c-1;
if(b<=INT_MAX && c<=INT_MAX){
if(a<=INT_MAX)
return (a * b + r)/c;
else
return a/c*b + (a%c*b + r)/c;
}else{
#if 1
uint64_t a0= a&0xFFFFFFFF;
uint64_t a1= a>>32;
uint64_t b0= b&0xFFFFFFFF;
uint64_t b1= b>>32;
uint64_t t1= a0*b1 + a1*b0;
uint64_t t1a= t1<<32;
int i;
a0 = a0*b0 + t1a;
a1 = a1*b1 + (t1>>32) + (a0<t1a);
a0 += r;
a1 += a0<r;
for(i=63; i>=0; i--){
// int o= a1 & 0x8000000000000000ULL;
a1+= a1 + ((a0>>i)&1);
t1+=t1;
if(/*o || */c <= a1){
a1 -= c;
t1++;
}
}
return t1;
}
#else
AVInteger ai;
ai= av_mul_i(av_int2i(a), av_int2i(b));
ai= av_add_i(ai, av_int2i(r));
return av_i2int(av_div_i(ai, av_int2i(c)));
}
#endif
}
int64_t av_rescale(int64_t a, int64_t b, int64_t c){
return av_rescale_rnd(a, b, c, AV_ROUND_NEAR_INF);
}
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq){
int64_t b= bq.num * (int64_t)cq.den;
int64_t c= cq.num * (int64_t)bq.den;
return av_rescale_rnd(a, b, c, AV_ROUND_NEAR_INF);
}
int av_compare_ts(int64_t ts_a, AVRational tb_a, int64_t ts_b, AVRational tb_b){
int64_t a= tb_a.num * (int64_t)tb_b.den;
int64_t b= tb_b.num * (int64_t)tb_a.den;
if((FFABS(ts_a)|a|FFABS(ts_b)|b)<=INT_MAX)
return (ts_a*a > ts_b*b) - (ts_a*a < ts_b*b);
if (av_rescale_rnd(ts_a, a, b, AV_ROUND_DOWN) < ts_b) return -1;
if (av_rescale_rnd(ts_b, b, a, AV_ROUND_DOWN) < ts_a) return 1;
return 0;
}
int64_t av_compare_mod(uint64_t a, uint64_t b, uint64_t mod){
int64_t c= (a-b) & (mod-1);
if(c > (mod>>1))
c-= mod;
return c;
}
#ifdef TEST
#include "integer.h"
#undef printf
int main(void){
int64_t a,b,c,d,e;
for(a=7; a<(1LL<<62); a+=a/3+1){
for(b=3; b<(1LL<<62); b+=b/4+1){
for(c=9; c<(1LL<<62); c+=(c*2)/5+3){
int64_t r= c/2;
AVInteger ai;
ai= av_mul_i(av_int2i(a), av_int2i(b));
ai= av_add_i(ai, av_int2i(r));
d= av_i2int(av_div_i(ai, av_int2i(c)));
e= av_rescale(a,b,c);
if((double)a * (double)b / (double)c > (1LL<<63))
continue;
if(d!=e) printf("%"PRId64"*%"PRId64"/%"PRId64"= %"PRId64"=%"PRId64"\n", a, b, c, d, e);
}
}
}
return 0;
}
#endif