mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-19 05:49:09 +02:00
971d12b7f9
This uses Stein's binary GCD algorithm: https://en.wikipedia.org/wiki/Binary_GCD_algorithm to get a roughly 4x speedup over Euclidean GCD on standard architectures with a compiler intrinsic for ctzll, and a roughly 2x speedup otherwise. At the moment, the compiler intrinsic is used on GCC and Clang due to its easy availability. Quick note regarding overflow: yes, subtractions on int64_t can, but the llabs takes care of that. The llabs is also guaranteed to be safe, with no annoying INT64_MIN business since INT64_MIN being a power of 2, is shifted down before being sent to llabs. The binary GCD needs ff_ctzll, an extension of ff_ctz for long long (int64_t). On GCC, this is provided by a built-in. On Microsoft, there is a BitScanForward64 analog of BitScanForward that should work; but I can't confirm. Apparently it is not available on 32 bit builds; so this may or may not work correctly. On Intel, per the documentation there is only an intrinsic for _bit_scan_forward and people have posted on forums regarding _bit_scan_forward64, but often their documentation is woeful. Again, I don't have it, so I can't test. As such, to be safe, for now only the GCC/Clang intrinsic is added, the rest use a compiled version based on the De-Bruijn method of Leiserson et al: http://supertech.csail.mit.edu/papers/debruijn.pdf. Tested with FATE, sample benchmark (x86-64, GCC 5.2.0, Haswell) with a START_TIMER and STOP_TIMER in libavutil/rationsl.c, followed by a make fate. aac-am00_88.err: builtin: 714 decicycles in av_gcd, 4095 runs, 1 skips de-bruijn: 1440 decicycles in av_gcd, 4096 runs, 0 skips previous: 2889 decicycles in av_gcd, 4096 runs, 0 skips Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
205 lines
5.5 KiB
C
205 lines
5.5 KiB
C
/*
|
|
* Copyright (c) 2005-2012 Michael Niedermayer <michaelni@gmx.at>
|
|
*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
/**
|
|
* @file
|
|
* miscellaneous math routines and tables
|
|
*/
|
|
|
|
#include <stdint.h>
|
|
#include <limits.h>
|
|
|
|
#include "mathematics.h"
|
|
#include "libavutil/intmath.h"
|
|
#include "libavutil/common.h"
|
|
#include "avassert.h"
|
|
#include "version.h"
|
|
|
|
/* Stein's binary GCD algorithm:
|
|
* https://en.wikipedia.org/wiki/Binary_GCD_algorithm */
|
|
int64_t av_gcd(int64_t a, int64_t b) {
|
|
int za, zb, k;
|
|
int64_t u, v;
|
|
if (a == 0)
|
|
return b;
|
|
if (b == 0)
|
|
return a;
|
|
za = ff_ctzll(a);
|
|
zb = ff_ctzll(b);
|
|
k = FFMIN(za, zb);
|
|
u = llabs(a >> za);
|
|
v = llabs(b >> zb);
|
|
while (u != v) {
|
|
if (u > v)
|
|
FFSWAP(int64_t, v, u);
|
|
v -= u;
|
|
v >>= ff_ctzll(v);
|
|
}
|
|
return u << k;
|
|
}
|
|
|
|
int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd)
|
|
{
|
|
int64_t r = 0;
|
|
av_assert2(c > 0);
|
|
av_assert2(b >=0);
|
|
av_assert2((unsigned)(rnd&~AV_ROUND_PASS_MINMAX)<=5 && (rnd&~AV_ROUND_PASS_MINMAX)!=4);
|
|
|
|
if (c <= 0 || b < 0 || !((unsigned)(rnd&~AV_ROUND_PASS_MINMAX)<=5 && (rnd&~AV_ROUND_PASS_MINMAX)!=4))
|
|
return INT64_MIN;
|
|
|
|
if (rnd & AV_ROUND_PASS_MINMAX) {
|
|
if (a == INT64_MIN || a == INT64_MAX)
|
|
return a;
|
|
rnd -= AV_ROUND_PASS_MINMAX;
|
|
}
|
|
|
|
if (a < 0 && a != INT64_MIN)
|
|
return -av_rescale_rnd(-a, b, c, rnd ^ ((rnd >> 1) & 1));
|
|
|
|
if (rnd == AV_ROUND_NEAR_INF)
|
|
r = c / 2;
|
|
else if (rnd & 1)
|
|
r = c - 1;
|
|
|
|
if (b <= INT_MAX && c <= INT_MAX) {
|
|
if (a <= INT_MAX)
|
|
return (a * b + r) / c;
|
|
else
|
|
return a / c * b + (a % c * b + r) / c;
|
|
} else {
|
|
#if 1
|
|
uint64_t a0 = a & 0xFFFFFFFF;
|
|
uint64_t a1 = a >> 32;
|
|
uint64_t b0 = b & 0xFFFFFFFF;
|
|
uint64_t b1 = b >> 32;
|
|
uint64_t t1 = a0 * b1 + a1 * b0;
|
|
uint64_t t1a = t1 << 32;
|
|
int i;
|
|
|
|
a0 = a0 * b0 + t1a;
|
|
a1 = a1 * b1 + (t1 >> 32) + (a0 < t1a);
|
|
a0 += r;
|
|
a1 += a0 < r;
|
|
|
|
for (i = 63; i >= 0; i--) {
|
|
a1 += a1 + ((a0 >> i) & 1);
|
|
t1 += t1;
|
|
if (c <= a1) {
|
|
a1 -= c;
|
|
t1++;
|
|
}
|
|
}
|
|
return t1;
|
|
}
|
|
#else
|
|
AVInteger ai;
|
|
ai = av_mul_i(av_int2i(a), av_int2i(b));
|
|
ai = av_add_i(ai, av_int2i(r));
|
|
|
|
return av_i2int(av_div_i(ai, av_int2i(c)));
|
|
}
|
|
#endif
|
|
}
|
|
|
|
int64_t av_rescale(int64_t a, int64_t b, int64_t c)
|
|
{
|
|
return av_rescale_rnd(a, b, c, AV_ROUND_NEAR_INF);
|
|
}
|
|
|
|
int64_t av_rescale_q_rnd(int64_t a, AVRational bq, AVRational cq,
|
|
enum AVRounding rnd)
|
|
{
|
|
int64_t b = bq.num * (int64_t)cq.den;
|
|
int64_t c = cq.num * (int64_t)bq.den;
|
|
return av_rescale_rnd(a, b, c, rnd);
|
|
}
|
|
|
|
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
|
|
{
|
|
return av_rescale_q_rnd(a, bq, cq, AV_ROUND_NEAR_INF);
|
|
}
|
|
|
|
int av_compare_ts(int64_t ts_a, AVRational tb_a, int64_t ts_b, AVRational tb_b)
|
|
{
|
|
int64_t a = tb_a.num * (int64_t)tb_b.den;
|
|
int64_t b = tb_b.num * (int64_t)tb_a.den;
|
|
if ((FFABS(ts_a)|a|FFABS(ts_b)|b) <= INT_MAX)
|
|
return (ts_a*a > ts_b*b) - (ts_a*a < ts_b*b);
|
|
if (av_rescale_rnd(ts_a, a, b, AV_ROUND_DOWN) < ts_b)
|
|
return -1;
|
|
if (av_rescale_rnd(ts_b, b, a, AV_ROUND_DOWN) < ts_a)
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
int64_t av_compare_mod(uint64_t a, uint64_t b, uint64_t mod)
|
|
{
|
|
int64_t c = (a - b) & (mod - 1);
|
|
if (c > (mod >> 1))
|
|
c -= mod;
|
|
return c;
|
|
}
|
|
|
|
int64_t av_rescale_delta(AVRational in_tb, int64_t in_ts, AVRational fs_tb, int duration, int64_t *last, AVRational out_tb){
|
|
int64_t a, b, this;
|
|
|
|
av_assert0(in_ts != AV_NOPTS_VALUE);
|
|
av_assert0(duration >= 0);
|
|
|
|
if (*last == AV_NOPTS_VALUE || !duration || in_tb.num*(int64_t)out_tb.den <= out_tb.num*(int64_t)in_tb.den) {
|
|
simple_round:
|
|
*last = av_rescale_q(in_ts, in_tb, fs_tb) + duration;
|
|
return av_rescale_q(in_ts, in_tb, out_tb);
|
|
}
|
|
|
|
a = av_rescale_q_rnd(2*in_ts-1, in_tb, fs_tb, AV_ROUND_DOWN) >>1;
|
|
b = (av_rescale_q_rnd(2*in_ts+1, in_tb, fs_tb, AV_ROUND_UP )+1)>>1;
|
|
if (*last < 2*a - b || *last > 2*b - a)
|
|
goto simple_round;
|
|
|
|
this = av_clip64(*last, a, b);
|
|
*last = this + duration;
|
|
|
|
return av_rescale_q(this, fs_tb, out_tb);
|
|
}
|
|
|
|
int64_t av_add_stable(AVRational ts_tb, int64_t ts, AVRational inc_tb, int64_t inc)
|
|
{
|
|
int64_t m, d;
|
|
|
|
if (inc != 1)
|
|
inc_tb = av_mul_q(inc_tb, (AVRational) {inc, 1});
|
|
|
|
m = inc_tb.num * (int64_t)ts_tb.den;
|
|
d = inc_tb.den * (int64_t)ts_tb.num;
|
|
|
|
if (m % d == 0)
|
|
return ts + m / d;
|
|
if (m < d)
|
|
return ts;
|
|
|
|
{
|
|
int64_t old = av_rescale_q(ts, ts_tb, inc_tb);
|
|
int64_t old_ts = av_rescale_q(old, inc_tb, ts_tb);
|
|
return av_rescale_q(old + 1, inc_tb, ts_tb) + (ts - old_ts);
|
|
}
|
|
}
|