mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-26 19:01:44 +02:00
lavu/tx: implement 32 bit fixed point FFT and MDCT
Required minimal changes to the code so made sense to implement. FFT and MDCT tested, the output of both was properly rounded. Fun fact: the non-power-of-two fixed-point FFT and MDCT are the fastest ever non-power-of-two fixed-point FFT and MDCT written. This can replace the power of two integer MDCTs in aac and ac3 if the MIPS optimizations are ported across. Unfortunately the ac3 encoder uses a 16-bit fixed point forward transform, unlike the encoder which uses a 32bit inverse transform, so some modifications might be required there. The 3-point FFT is somewhat less accurate than it otherwise could be, having minor rounding errors with bigger transforms. However, this could be improved later, and the way its currently written is the way one would write assembly for it. Similar rounding errors can also be found throughout the power of two FFTs as well, though those are more difficult to correct. Despite this, the integer transforms are more than accurate enough.
This commit is contained in:
parent
e007059d66
commit
e8f054b095
@ -15,6 +15,9 @@ libavutil: 2017-10-21
|
|||||||
|
|
||||||
API changes, most recent first:
|
API changes, most recent first:
|
||||||
|
|
||||||
|
2020-02-13 - xxxxxxxxxx - lavu 56.41.100 - tx.h
|
||||||
|
Add AV_TX_INT32_FFT and AV_TX_INT32_MDCT
|
||||||
|
|
||||||
2020-02-12 - xxxxxxxxxx - lavu 56.40.100 - log.h
|
2020-02-12 - xxxxxxxxxx - lavu 56.40.100 - log.h
|
||||||
Add av_log_once().
|
Add av_log_once().
|
||||||
|
|
||||||
|
@ -163,7 +163,8 @@ OBJS = adler32.o \
|
|||||||
tea.o \
|
tea.o \
|
||||||
tx.o \
|
tx.o \
|
||||||
tx_float.o \
|
tx_float.o \
|
||||||
tx_double.o
|
tx_double.o \
|
||||||
|
tx_int32.o
|
||||||
|
|
||||||
OBJS-$(CONFIG_CUDA) += hwcontext_cuda.o
|
OBJS-$(CONFIG_CUDA) += hwcontext_cuda.o
|
||||||
OBJS-$(CONFIG_D3D11VA) += hwcontext_d3d11va.o
|
OBJS-$(CONFIG_D3D11VA) += hwcontext_d3d11va.o
|
||||||
|
@ -18,6 +18,18 @@
|
|||||||
|
|
||||||
#include "tx_priv.h"
|
#include "tx_priv.h"
|
||||||
|
|
||||||
|
int ff_tx_type_is_mdct(enum AVTXType type)
|
||||||
|
{
|
||||||
|
switch (type) {
|
||||||
|
case AV_TX_FLOAT_MDCT:
|
||||||
|
case AV_TX_DOUBLE_MDCT:
|
||||||
|
case AV_TX_INT32_MDCT:
|
||||||
|
return 1;
|
||||||
|
default:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Calculates the modular multiplicative inverse, not fast, replace */
|
/* Calculates the modular multiplicative inverse, not fast, replace */
|
||||||
static av_always_inline int mulinv(int n, int m)
|
static av_always_inline int mulinv(int n, int m)
|
||||||
{
|
{
|
||||||
@ -35,11 +47,10 @@ int ff_tx_gen_compound_mapping(AVTXContext *s)
|
|||||||
const int n = s->n;
|
const int n = s->n;
|
||||||
const int m = s->m;
|
const int m = s->m;
|
||||||
const int inv = s->inv;
|
const int inv = s->inv;
|
||||||
const int type = s->type;
|
|
||||||
const int len = n*m;
|
const int len = n*m;
|
||||||
const int m_inv = mulinv(m, n);
|
const int m_inv = mulinv(m, n);
|
||||||
const int n_inv = mulinv(n, m);
|
const int n_inv = mulinv(n, m);
|
||||||
const int mdct = type == AV_TX_FLOAT_MDCT || type == AV_TX_DOUBLE_MDCT;
|
const int mdct = ff_tx_type_is_mdct(s->type);
|
||||||
|
|
||||||
if (!(s->pfatab = av_malloc(2*len*sizeof(*s->pfatab))))
|
if (!(s->pfatab = av_malloc(2*len*sizeof(*s->pfatab))))
|
||||||
return AVERROR(ENOMEM);
|
return AVERROR(ENOMEM);
|
||||||
@ -128,6 +139,11 @@ av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type,
|
|||||||
if ((err = ff_tx_init_mdct_fft_double(s, tx, type, inv, len, scale, flags)))
|
if ((err = ff_tx_init_mdct_fft_double(s, tx, type, inv, len, scale, flags)))
|
||||||
goto fail;
|
goto fail;
|
||||||
break;
|
break;
|
||||||
|
case AV_TX_INT32_FFT:
|
||||||
|
case AV_TX_INT32_MDCT:
|
||||||
|
if ((err = ff_tx_init_mdct_fft_int32(s, tx, type, inv, len, scale, flags)))
|
||||||
|
goto fail;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
err = AVERROR(EINVAL);
|
err = AVERROR(EINVAL);
|
||||||
goto fail;
|
goto fail;
|
||||||
|
@ -32,6 +32,10 @@ typedef struct AVComplexDouble {
|
|||||||
double re, im;
|
double re, im;
|
||||||
} AVComplexDouble;
|
} AVComplexDouble;
|
||||||
|
|
||||||
|
typedef struct AVComplexInt32 {
|
||||||
|
int32_t re, im;
|
||||||
|
} AVComplexInt32;
|
||||||
|
|
||||||
enum AVTXType {
|
enum AVTXType {
|
||||||
/**
|
/**
|
||||||
* Standard complex to complex FFT with sample data type AVComplexFloat.
|
* Standard complex to complex FFT with sample data type AVComplexFloat.
|
||||||
@ -51,6 +55,15 @@ enum AVTXType {
|
|||||||
* Same as AV_TX_FLOAT_MDCT with data and scale type of double.
|
* Same as AV_TX_FLOAT_MDCT with data and scale type of double.
|
||||||
*/
|
*/
|
||||||
AV_TX_DOUBLE_MDCT = 3,
|
AV_TX_DOUBLE_MDCT = 3,
|
||||||
|
/**
|
||||||
|
* Same as AV_TX_FLOAT_FFT with a data type of AVComplexInt32.
|
||||||
|
*/
|
||||||
|
AV_TX_INT32_FFT = 4,
|
||||||
|
/**
|
||||||
|
* Same as AV_TX_FLOAT_MDCT with data type of int32_t and scale type of float.
|
||||||
|
* Only scale values less than or equal to 1.0 are supported.
|
||||||
|
*/
|
||||||
|
AV_TX_INT32_MDCT = 5,
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
21
libavutil/tx_int32.c
Normal file
21
libavutil/tx_int32.c
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
/*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define TX_INT32
|
||||||
|
#include "tx_priv.h"
|
||||||
|
#include "tx_template.c"
|
@ -28,28 +28,77 @@
|
|||||||
|
|
||||||
#ifdef TX_FLOAT
|
#ifdef TX_FLOAT
|
||||||
#define TX_NAME(x) x ## _float
|
#define TX_NAME(x) x ## _float
|
||||||
|
#define SCALE_TYPE float
|
||||||
typedef float FFTSample;
|
typedef float FFTSample;
|
||||||
typedef AVComplexFloat FFTComplex;
|
typedef AVComplexFloat FFTComplex;
|
||||||
#elif defined(TX_DOUBLE)
|
#elif defined(TX_DOUBLE)
|
||||||
#define TX_NAME(x) x ## _double
|
#define TX_NAME(x) x ## _double
|
||||||
|
#define SCALE_TYPE double
|
||||||
typedef double FFTSample;
|
typedef double FFTSample;
|
||||||
typedef AVComplexDouble FFTComplex;
|
typedef AVComplexDouble FFTComplex;
|
||||||
|
#elif defined(TX_INT32)
|
||||||
|
#define TX_NAME(x) x ## _int32
|
||||||
|
#define SCALE_TYPE float
|
||||||
|
typedef int32_t FFTSample;
|
||||||
|
typedef AVComplexInt32 FFTComplex;
|
||||||
#else
|
#else
|
||||||
typedef void FFTComplex;
|
typedef void FFTComplex;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(TX_FLOAT) || defined(TX_DOUBLE)
|
#if defined(TX_FLOAT) || defined(TX_DOUBLE)
|
||||||
#define BF(x, y, a, b) do { \
|
|
||||||
x = (a) - (b); \
|
#define MUL(x, y) ((x)*(y))
|
||||||
y = (a) + (b); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
#define CMUL(dre, dim, are, aim, bre, bim) do { \
|
#define CMUL(dre, dim, are, aim, bre, bim) do { \
|
||||||
(dre) = (are) * (bre) - (aim) * (bim); \
|
(dre) = (are) * (bre) - (aim) * (bim); \
|
||||||
(dim) = (are) * (bim) + (aim) * (bre); \
|
(dim) = (are) * (bim) + (aim) * (bre); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
#define SMUL(dre, dim, are, aim, bre, bim) do { \
|
||||||
|
(dre) = (are) * (bre) - (aim) * (bim); \
|
||||||
|
(dim) = (are) * (bim) - (aim) * (bre); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define RESCALE(x) (x)
|
||||||
|
|
||||||
|
#define FOLD(a, b) ((a) + (b))
|
||||||
|
|
||||||
|
#elif defined(TX_INT32)
|
||||||
|
|
||||||
|
#define MUL(x, y) ((int32_t)(((int64_t)(x) * (int64_t)(y) + 0x40000000) >> 31))
|
||||||
|
|
||||||
|
/* Properly rounds the result */
|
||||||
|
#define CMUL(dre, dim, are, aim, bre, bim) do { \
|
||||||
|
int64_t accu; \
|
||||||
|
(accu) = (int64_t)(bre) * (are); \
|
||||||
|
(accu) -= (int64_t)(bim) * (aim); \
|
||||||
|
(dre) = (int)(((accu) + 0x40000000) >> 31); \
|
||||||
|
(accu) = (int64_t)(bim) * (are); \
|
||||||
|
(accu) += (int64_t)(bre) * (aim); \
|
||||||
|
(dim) = (int)(((accu) + 0x40000000) >> 31); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define SMUL(dre, dim, are, aim, bre, bim) do { \
|
||||||
|
int64_t accu; \
|
||||||
|
(accu) = (int64_t)(bre) * (are); \
|
||||||
|
(accu) -= (int64_t)(bim) * (aim); \
|
||||||
|
(dre) = (int)(((accu) + 0x40000000) >> 31); \
|
||||||
|
(accu) = (int64_t)(bim) * (are); \
|
||||||
|
(accu) -= (int64_t)(bre) * (aim); \
|
||||||
|
(dim) = (int)(((accu) + 0x40000000) >> 31); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define RESCALE(x) (lrintf((x) * 2147483648.0))
|
||||||
|
|
||||||
|
#define FOLD(x, y) ((int)((x) + (unsigned)(y) + 32) >> 6)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define BF(x, y, a, b) do { \
|
||||||
|
x = (a) - (b); \
|
||||||
|
y = (a) + (b); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
#define CMUL3(c, a, b) \
|
#define CMUL3(c, a, b) \
|
||||||
CMUL((c).re, (c).im, (a).re, (a).im, (b).re, (b).im)
|
CMUL((c).re, (c).im, (a).re, (a).im, (b).re, (b).im)
|
||||||
|
|
||||||
@ -70,6 +119,7 @@ struct AVTXContext {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/* Shared functions */
|
/* Shared functions */
|
||||||
|
int ff_tx_type_is_mdct(enum AVTXType type);
|
||||||
int ff_tx_gen_compound_mapping(AVTXContext *s);
|
int ff_tx_gen_compound_mapping(AVTXContext *s);
|
||||||
int ff_tx_gen_ptwo_revtab(AVTXContext *s);
|
int ff_tx_gen_ptwo_revtab(AVTXContext *s);
|
||||||
|
|
||||||
@ -96,6 +146,9 @@ int ff_tx_init_mdct_fft_float(AVTXContext *s, av_tx_fn *tx,
|
|||||||
int ff_tx_init_mdct_fft_double(AVTXContext *s, av_tx_fn *tx,
|
int ff_tx_init_mdct_fft_double(AVTXContext *s, av_tx_fn *tx,
|
||||||
enum AVTXType type, int inv, int len,
|
enum AVTXType type, int inv, int len,
|
||||||
const void *scale, uint64_t flags);
|
const void *scale, uint64_t flags);
|
||||||
|
int ff_tx_init_mdct_fft_int32(AVTXContext *s, av_tx_fn *tx,
|
||||||
|
enum AVTXType type, int inv, int len,
|
||||||
|
const void *scale, uint64_t flags);
|
||||||
|
|
||||||
typedef struct CosTabsInitOnce {
|
typedef struct CosTabsInitOnce {
|
||||||
void (*func)(void);
|
void (*func)(void);
|
||||||
|
@ -66,7 +66,7 @@ static av_always_inline void init_cos_tabs_idx(int index)
|
|||||||
double freq = 2*M_PI/m;
|
double freq = 2*M_PI/m;
|
||||||
FFTSample *tab = cos_tabs[index];
|
FFTSample *tab = cos_tabs[index];
|
||||||
for(int i = 0; i <= m/4; i++)
|
for(int i = 0; i <= m/4; i++)
|
||||||
tab[i] = cos(i*freq);
|
tab[i] = RESCALE(cos(i*freq));
|
||||||
for(int i = 1; i < m/4; i++)
|
for(int i = 1; i < m/4; i++)
|
||||||
tab[m/2 - i] = tab[i];
|
tab[m/2 - i] = tab[i];
|
||||||
}
|
}
|
||||||
@ -94,10 +94,10 @@ INIT_FF_COS_TABS_FUNC(17, 131072)
|
|||||||
|
|
||||||
static av_cold void ff_init_53_tabs(void)
|
static av_cold void ff_init_53_tabs(void)
|
||||||
{
|
{
|
||||||
TX_NAME(ff_cos_53)[0] = (FFTComplex){ cos(2 * M_PI / 12), cos(2 * M_PI / 12) };
|
TX_NAME(ff_cos_53)[0] = (FFTComplex){ RESCALE(cos(2 * M_PI / 12)), RESCALE(cos(2 * M_PI / 12)) };
|
||||||
TX_NAME(ff_cos_53)[1] = (FFTComplex){ 0.5, 0.5 };
|
TX_NAME(ff_cos_53)[1] = (FFTComplex){ RESCALE(cos(2 * M_PI / 6)), RESCALE(cos(2 * M_PI / 6)) };
|
||||||
TX_NAME(ff_cos_53)[2] = (FFTComplex){ cos(2 * M_PI / 5), sin(2 * M_PI / 5) };
|
TX_NAME(ff_cos_53)[2] = (FFTComplex){ RESCALE(cos(2 * M_PI / 5)), RESCALE(sin(2 * M_PI / 5)) };
|
||||||
TX_NAME(ff_cos_53)[3] = (FFTComplex){ cos(2 * M_PI / 10), sin(2 * M_PI / 10) };
|
TX_NAME(ff_cos_53)[3] = (FFTComplex){ RESCALE(cos(2 * M_PI / 10)), RESCALE(sin(2 * M_PI / 10)) };
|
||||||
}
|
}
|
||||||
|
|
||||||
static CosTabsInitOnce cos_tabs_init_once[] = {
|
static CosTabsInitOnce cos_tabs_init_once[] = {
|
||||||
@ -132,18 +132,16 @@ static av_always_inline void fft3(FFTComplex *out, FFTComplex *in,
|
|||||||
{
|
{
|
||||||
FFTComplex tmp[2];
|
FFTComplex tmp[2];
|
||||||
|
|
||||||
tmp[0].re = in[1].im - in[2].im;
|
BF(tmp[0].re, tmp[1].im, in[1].im, in[2].im);
|
||||||
tmp[0].im = in[1].re - in[2].re;
|
BF(tmp[0].im, tmp[1].re, in[1].re, in[2].re);
|
||||||
tmp[1].re = in[1].re + in[2].re;
|
|
||||||
tmp[1].im = in[1].im + in[2].im;
|
|
||||||
|
|
||||||
out[0*stride].re = in[0].re + tmp[1].re;
|
out[0*stride].re = in[0].re + tmp[1].re;
|
||||||
out[0*stride].im = in[0].im + tmp[1].im;
|
out[0*stride].im = in[0].im + tmp[1].im;
|
||||||
|
|
||||||
tmp[0].re *= TX_NAME(ff_cos_53)[0].re;
|
tmp[0].re = MUL(TX_NAME(ff_cos_53)[0].re, tmp[0].re);
|
||||||
tmp[0].im *= TX_NAME(ff_cos_53)[0].im;
|
tmp[0].im = MUL(TX_NAME(ff_cos_53)[0].im, tmp[0].im);
|
||||||
tmp[1].re *= TX_NAME(ff_cos_53)[1].re;
|
tmp[1].re = MUL(TX_NAME(ff_cos_53)[1].re, tmp[1].re);
|
||||||
tmp[1].im *= TX_NAME(ff_cos_53)[1].re;
|
tmp[1].im = MUL(TX_NAME(ff_cos_53)[1].re, tmp[1].im);
|
||||||
|
|
||||||
out[1*stride].re = in[0].re - tmp[1].re + tmp[0].re;
|
out[1*stride].re = in[0].re - tmp[1].re + tmp[0].re;
|
||||||
out[1*stride].im = in[0].im - tmp[1].im - tmp[0].im;
|
out[1*stride].im = in[0].im - tmp[1].im - tmp[0].im;
|
||||||
@ -157,46 +155,23 @@ static av_always_inline void NAME(FFTComplex *out, FFTComplex *in, \
|
|||||||
{ \
|
{ \
|
||||||
FFTComplex z0[4], t[6]; \
|
FFTComplex z0[4], t[6]; \
|
||||||
\
|
\
|
||||||
t[0].re = in[1].re + in[4].re; \
|
BF(t[1].im, t[0].re, in[1].re, in[4].re); \
|
||||||
t[0].im = in[1].im + in[4].im; \
|
BF(t[1].re, t[0].im, in[1].im, in[4].im); \
|
||||||
t[1].im = in[1].re - in[4].re; \
|
BF(t[3].im, t[2].re, in[2].re, in[3].re); \
|
||||||
t[1].re = in[1].im - in[4].im; \
|
BF(t[3].re, t[2].im, in[2].im, in[3].im); \
|
||||||
t[2].re = in[2].re + in[3].re; \
|
|
||||||
t[2].im = in[2].im + in[3].im; \
|
|
||||||
t[3].im = in[2].re - in[3].re; \
|
|
||||||
t[3].re = in[2].im - in[3].im; \
|
|
||||||
\
|
\
|
||||||
out[D0*stride].re = in[0].re + in[1].re + in[2].re + \
|
out[D0*stride].re = in[0].re + in[1].re + in[2].re + in[3].re + in[4].re; \
|
||||||
in[3].re + in[4].re; \
|
out[D0*stride].im = in[0].im + in[1].im + in[2].im + in[3].im + in[4].im; \
|
||||||
out[D0*stride].im = in[0].im + in[1].im + in[2].im + \
|
|
||||||
in[3].im + in[4].im; \
|
|
||||||
\
|
\
|
||||||
t[4].re = TX_NAME(ff_cos_53)[2].re * t[2].re; \
|
SMUL(t[4].re, t[0].re, TX_NAME(ff_cos_53)[2].re, TX_NAME(ff_cos_53)[3].re, t[2].re, t[0].re); \
|
||||||
t[4].im = TX_NAME(ff_cos_53)[2].re * t[2].im; \
|
SMUL(t[4].im, t[0].im, TX_NAME(ff_cos_53)[2].re, TX_NAME(ff_cos_53)[3].re, t[2].im, t[0].im); \
|
||||||
t[4].re -= TX_NAME(ff_cos_53)[3].re * t[0].re; \
|
CMUL(t[5].re, t[1].re, TX_NAME(ff_cos_53)[2].im, TX_NAME(ff_cos_53)[3].im, t[3].re, t[1].re); \
|
||||||
t[4].im -= TX_NAME(ff_cos_53)[3].re * t[0].im; \
|
CMUL(t[5].im, t[1].im, TX_NAME(ff_cos_53)[2].im, TX_NAME(ff_cos_53)[3].im, t[3].im, t[1].im); \
|
||||||
t[0].re = TX_NAME(ff_cos_53)[2].re * t[0].re; \
|
|
||||||
t[0].im = TX_NAME(ff_cos_53)[2].re * t[0].im; \
|
|
||||||
t[0].re -= TX_NAME(ff_cos_53)[3].re * t[2].re; \
|
|
||||||
t[0].im -= TX_NAME(ff_cos_53)[3].re * t[2].im; \
|
|
||||||
t[5].re = TX_NAME(ff_cos_53)[2].im * t[3].re; \
|
|
||||||
t[5].im = TX_NAME(ff_cos_53)[2].im * t[3].im; \
|
|
||||||
t[5].re -= TX_NAME(ff_cos_53)[3].im * t[1].re; \
|
|
||||||
t[5].im -= TX_NAME(ff_cos_53)[3].im * t[1].im; \
|
|
||||||
t[1].re = TX_NAME(ff_cos_53)[2].im * t[1].re; \
|
|
||||||
t[1].im = TX_NAME(ff_cos_53)[2].im * t[1].im; \
|
|
||||||
t[1].re += TX_NAME(ff_cos_53)[3].im * t[3].re; \
|
|
||||||
t[1].im += TX_NAME(ff_cos_53)[3].im * t[3].im; \
|
|
||||||
\
|
\
|
||||||
z0[0].re = t[0].re - t[1].re; \
|
BF(z0[0].re, z0[3].re, t[0].re, t[1].re); \
|
||||||
z0[0].im = t[0].im - t[1].im; \
|
BF(z0[0].im, z0[3].im, t[0].im, t[1].im); \
|
||||||
z0[1].re = t[4].re + t[5].re; \
|
BF(z0[2].re, z0[1].re, t[4].re, t[5].re); \
|
||||||
z0[1].im = t[4].im + t[5].im; \
|
BF(z0[2].im, z0[1].im, t[4].im, t[5].im); \
|
||||||
\
|
|
||||||
z0[2].re = t[4].re - t[5].re; \
|
|
||||||
z0[2].im = t[4].im - t[5].im; \
|
|
||||||
z0[3].re = t[0].re + t[1].re; \
|
|
||||||
z0[3].im = t[0].im + t[1].im; \
|
|
||||||
\
|
\
|
||||||
out[D1*stride].re = in[0].re + z0[3].re; \
|
out[D1*stride].re = in[0].re + z0[3].re; \
|
||||||
out[D1*stride].im = in[0].im + z0[0].im; \
|
out[D1*stride].im = in[0].im + z0[0].im; \
|
||||||
@ -324,7 +299,7 @@ static void fft8(FFTComplex *z)
|
|||||||
BF(t6, z[7].im, z[6].im, -z[7].im);
|
BF(t6, z[7].im, z[6].im, -z[7].im);
|
||||||
|
|
||||||
BUTTERFLIES(z[0],z[2],z[4],z[6]);
|
BUTTERFLIES(z[0],z[2],z[4],z[6]);
|
||||||
TRANSFORM(z[1],z[3],z[5],z[7],M_SQRT1_2,M_SQRT1_2);
|
TRANSFORM(z[1],z[3],z[5],z[7],RESCALE(M_SQRT1_2),RESCALE(M_SQRT1_2));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void fft16(FFTComplex *z)
|
static void fft16(FFTComplex *z)
|
||||||
@ -338,7 +313,7 @@ static void fft16(FFTComplex *z)
|
|||||||
fft4(z+12);
|
fft4(z+12);
|
||||||
|
|
||||||
TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
|
TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
|
||||||
TRANSFORM(z[2],z[6],z[10],z[14],M_SQRT1_2,M_SQRT1_2);
|
TRANSFORM(z[2],z[6],z[10],z[14],RESCALE(M_SQRT1_2),RESCALE(M_SQRT1_2));
|
||||||
TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3);
|
TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3);
|
||||||
TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1);
|
TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1);
|
||||||
}
|
}
|
||||||
@ -459,11 +434,11 @@ static void compound_mdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \
|
|||||||
for (int j = 0; j < N; j++) { \
|
for (int j = 0; j < N; j++) { \
|
||||||
const int k = in_map[i*N + j]; \
|
const int k = in_map[i*N + j]; \
|
||||||
if (k < len4) { \
|
if (k < len4) { \
|
||||||
tmp.re = -src[ len4 + k] + src[1*len4 - 1 - k]; \
|
tmp.re = FOLD(-src[ len4 + k], src[1*len4 - 1 - k]); \
|
||||||
tmp.im = -src[ len3 + k] - src[1*len3 - 1 - k]; \
|
tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]); \
|
||||||
} else { \
|
} else { \
|
||||||
tmp.re = -src[ len4 + k] - src[5*len4 - 1 - k]; \
|
tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]); \
|
||||||
tmp.im = src[-len4 + k] - src[1*len3 - 1 - k]; \
|
tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]); \
|
||||||
} \
|
} \
|
||||||
CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im, \
|
CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im, \
|
||||||
exp[k >> 1].re, exp[k >> 1].im); \
|
exp[k >> 1].re, exp[k >> 1].im); \
|
||||||
@ -533,11 +508,11 @@ static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src,
|
|||||||
for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */
|
for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */
|
||||||
const int k = 2*i;
|
const int k = 2*i;
|
||||||
if (k < len4) {
|
if (k < len4) {
|
||||||
tmp.re = -src[ len4 + k] + src[1*len4 - 1 - k];
|
tmp.re = FOLD(-src[ len4 + k], src[1*len4 - 1 - k]);
|
||||||
tmp.im = -src[ len3 + k] - src[1*len3 - 1 - k];
|
tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]);
|
||||||
} else {
|
} else {
|
||||||
tmp.re = -src[ len4 + k] - src[5*len4 - 1 - k];
|
tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]);
|
||||||
tmp.im = src[-len4 + k] - src[1*len3 - 1 - k];
|
tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]);
|
||||||
}
|
}
|
||||||
CMUL(z[s->revtab[i]].im, z[s->revtab[i]].re, tmp.re, tmp.im,
|
CMUL(z[s->revtab[i]].im, z[s->revtab[i]].re, tmp.re, tmp.im,
|
||||||
exp[i].re, exp[i].im);
|
exp[i].re, exp[i].im);
|
||||||
@ -567,8 +542,8 @@ static int gen_mdct_exptab(AVTXContext *s, int len4, double scale)
|
|||||||
scale = sqrt(fabs(scale));
|
scale = sqrt(fabs(scale));
|
||||||
for (int i = 0; i < len4; i++) {
|
for (int i = 0; i < len4; i++) {
|
||||||
const double alpha = M_PI_2 * (i + theta) / len4;
|
const double alpha = M_PI_2 * (i + theta) / len4;
|
||||||
s->exptab[i].re = cos(alpha) * scale;
|
s->exptab[i].re = RESCALE(cos(alpha) * scale);
|
||||||
s->exptab[i].im = sin(alpha) * scale;
|
s->exptab[i].im = RESCALE(sin(alpha) * scale);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -578,7 +553,7 @@ int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx,
|
|||||||
enum AVTXType type, int inv, int len,
|
enum AVTXType type, int inv, int len,
|
||||||
const void *scale, uint64_t flags)
|
const void *scale, uint64_t flags)
|
||||||
{
|
{
|
||||||
const int is_mdct = type == AV_TX_FLOAT_MDCT || type == AV_TX_DOUBLE_MDCT;
|
const int is_mdct = ff_tx_type_is_mdct(type);
|
||||||
int err, n = 1, m = 1, max_ptwo = 1 << (FF_ARRAY_ELEMS(fft_dispatch) + 1);
|
int err, n = 1, m = 1, max_ptwo = 1 << (FF_ARRAY_ELEMS(fft_dispatch) + 1);
|
||||||
|
|
||||||
if (is_mdct)
|
if (is_mdct)
|
||||||
@ -637,7 +612,7 @@ int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (is_mdct)
|
if (is_mdct)
|
||||||
return gen_mdct_exptab(s, n*m, *((FFTSample *)scale));
|
return gen_mdct_exptab(s, n*m, *((SCALE_TYPE *)scale));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -79,7 +79,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define LIBAVUTIL_VERSION_MAJOR 56
|
#define LIBAVUTIL_VERSION_MAJOR 56
|
||||||
#define LIBAVUTIL_VERSION_MINOR 40
|
#define LIBAVUTIL_VERSION_MINOR 41
|
||||||
#define LIBAVUTIL_VERSION_MICRO 100
|
#define LIBAVUTIL_VERSION_MICRO 100
|
||||||
|
|
||||||
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
|
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
|
||||||
|
Loading…
Reference in New Issue
Block a user