1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-11-21 10:55:51 +02:00

lavu/tx: list all odd-length FFT factors as regular codelets

Allows them to be picked just like any other transform.
This commit is contained in:
Lynne 2022-09-24 06:50:17 +02:00
parent 45bd4bf79f
commit e8a9b7b298
No known key found for this signature in database
GPG Key ID: A2FEA5F03F034464

View File

@ -472,6 +472,81 @@ static av_always_inline void fft15(TXComplex *out, TXComplex *in,
fft5_m3(out, tmp + 10, stride);
}
static av_cold int TX_NAME(ff_tx_fft_factor_init)(AVTXContext *s,
const FFTXCodelet *cd,
uint64_t flags,
FFTXCodeletOptions *opts,
int len, int inv,
const void *scale)
{
TX_TAB(ff_tx_init_tabs)(len);
if (flags & FF_TX_PRESHUFFLE) {
s->map = av_malloc(len*sizeof(s->map));
s->map[0] = 0; /* DC is always at the start */
if (inv) /* Reversing the ACs flips the transform direction */
for (int i = 1; i < len; i++)
s->map[i] = len - i;
else
for (int i = 1; i < len; i++)
s->map[i] = i;
}
/* Our 15-point transform is actually a 5x3 PFA, so embed its input map. */
if (len == 15) {
int tmp[15];
memcpy(tmp, s->map, 15*sizeof(*tmp));
for (int i = 0; i < 5; i++) {
for (int j = 0; j < 3; j++)
s->map[i*3 + j] = tmp[(i*3 + j*5) % 15];
}
}
return 0;
}
#define DECL_FACTOR_S(n) \
static void TX_NAME(ff_tx_fft##n)(AVTXContext *s, void *dst, \
void *src, ptrdiff_t stride) \
{ \
fft##n((TXComplex *)dst, (TXComplex *)src, stride / sizeof(TXComplex)); \
} \
static const FFTXCodelet TX_NAME(ff_tx_fft##n##_ns_def) = { \
.name = TX_NAME_STR("fft" #n "_ns"), \
.function = TX_NAME(ff_tx_fft##n), \
.type = TX_TYPE(FFT), \
.flags = AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \
AV_TX_UNALIGNED | FF_TX_PRESHUFFLE, \
.factors[0] = n, \
.min_len = n, \
.max_len = n, \
.init = TX_NAME(ff_tx_fft_factor_init), \
.cpu_flags = FF_TX_CPU_FLAGS_ALL, \
.prio = FF_TX_PRIO_BASE, \
};
#define DECL_FACTOR_F(n) \
DECL_FACTOR_S(n) \
static const FFTXCodelet TX_NAME(ff_tx_fft##n##_fwd_def) = { \
.name = TX_NAME_STR("fft" #n "_fwd"), \
.function = TX_NAME(ff_tx_fft##n), \
.type = TX_TYPE(FFT), \
.flags = AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \
AV_TX_UNALIGNED | FF_TX_FORWARD_ONLY, \
.factors[0] = n, \
.min_len = n, \
.max_len = n, \
.init = TX_NAME(ff_tx_fft_factor_init), \
.cpu_flags = FF_TX_CPU_FLAGS_ALL, \
.prio = FF_TX_PRIO_BASE, \
};
DECL_FACTOR_F(3)
DECL_FACTOR_F(5)
DECL_FACTOR_F(7)
DECL_FACTOR_F(9)
DECL_FACTOR_S(15)
#define BUTTERFLIES(a0, a1, a2, a3) \
do { \
r0=a0.re; \
@ -1483,6 +1558,19 @@ const FFTXCodelet * const TX_NAME(ff_tx_codelet_list)[] = {
&TX_NAME(ff_tx_fft65536_ns_def),
&TX_NAME(ff_tx_fft131072_ns_def),
/* Prime factor codelets */
&TX_NAME(ff_tx_fft3_ns_def),
&TX_NAME(ff_tx_fft5_ns_def),
&TX_NAME(ff_tx_fft7_ns_def),
&TX_NAME(ff_tx_fft9_ns_def),
&TX_NAME(ff_tx_fft15_ns_def),
/* We get these for free */
&TX_NAME(ff_tx_fft3_fwd_def),
&TX_NAME(ff_tx_fft5_fwd_def),
&TX_NAME(ff_tx_fft7_fwd_def),
&TX_NAME(ff_tx_fft9_fwd_def),
/* Standalone transforms */
&TX_NAME(ff_tx_fft_def),
&TX_NAME(ff_tx_fft_inplace_def),