You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-15 14:13:16 +02:00
lavu/tx: invert permutation lookups
out[lut[i]] = in[i] lookups were 4.04 times(!) slower than out[i] = in[lut[i]] lookups for an out-of-place FFT of length 4096. The permutes remain unchanged for anything but out-of-place monolithic FFT, as those benefit quite a lot from the current order (it means there's only 1 lookup necessary to add to an offset, rather than a full gather). The code was based around non-power-of-two FFTs, so this wasn't benchmarked early on.
This commit is contained in:
@@ -91,7 +91,7 @@ int ff_tx_gen_compound_mapping(AVTXContext *s)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ff_tx_gen_ptwo_revtab(AVTXContext *s)
|
int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup)
|
||||||
{
|
{
|
||||||
const int m = s->m, inv = s->inv;
|
const int m = s->m, inv = s->inv;
|
||||||
|
|
||||||
@@ -101,7 +101,10 @@ int ff_tx_gen_ptwo_revtab(AVTXContext *s)
|
|||||||
/* Default */
|
/* Default */
|
||||||
for (int i = 0; i < m; i++) {
|
for (int i = 0; i < m; i++) {
|
||||||
int k = -split_radix_permutation(i, m, inv) & (m - 1);
|
int k = -split_radix_permutation(i, m, inv) & (m - 1);
|
||||||
s->revtab[k] = i;
|
if (invert_lookup)
|
||||||
|
s->revtab[i] = k;
|
||||||
|
else
|
||||||
|
s->revtab[k] = i;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@@ -123,7 +123,7 @@ struct AVTXContext {
|
|||||||
/* Shared functions */
|
/* Shared functions */
|
||||||
int ff_tx_type_is_mdct(enum AVTXType type);
|
int ff_tx_type_is_mdct(enum AVTXType type);
|
||||||
int ff_tx_gen_compound_mapping(AVTXContext *s);
|
int ff_tx_gen_compound_mapping(AVTXContext *s);
|
||||||
int ff_tx_gen_ptwo_revtab(AVTXContext *s);
|
int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup);
|
||||||
int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s);
|
int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s);
|
||||||
|
|
||||||
/* Also used by SIMD init */
|
/* Also used by SIMD init */
|
||||||
|
@@ -410,7 +410,7 @@ static void monolithic_fft(AVTXContext *s, void *_out, void *_in,
|
|||||||
} while ((src = *inplace_idx++));
|
} while ((src = *inplace_idx++));
|
||||||
} else {
|
} else {
|
||||||
for (int i = 0; i < m; i++)
|
for (int i = 0; i < m; i++)
|
||||||
out[s->revtab[i]] = in[i];
|
out[i] = in[s->revtab[i]];
|
||||||
}
|
}
|
||||||
|
|
||||||
fft_dispatch[mb](out);
|
fft_dispatch[mb](out);
|
||||||
@@ -738,7 +738,7 @@ int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx,
|
|||||||
if (n != 1)
|
if (n != 1)
|
||||||
init_cos_tabs(0);
|
init_cos_tabs(0);
|
||||||
if (m != 1) {
|
if (m != 1) {
|
||||||
if ((err = ff_tx_gen_ptwo_revtab(s)))
|
if ((err = ff_tx_gen_ptwo_revtab(s, n == 1 && !(flags & AV_TX_INPLACE))))
|
||||||
return err;
|
return err;
|
||||||
if (flags & AV_TX_INPLACE) {
|
if (flags & AV_TX_INPLACE) {
|
||||||
if (is_mdct) /* In-place MDCTs are not supported yet */
|
if (is_mdct) /* In-place MDCTs are not supported yet */
|
||||||
|
Reference in New Issue
Block a user