From eb81fd792fa88f9015b5e99e4940a464de9182a5 Mon Sep 17 00:00:00 2001 From: Steven Liu Date: Wed, 26 Dec 2018 16:09:49 +0800 Subject: [PATCH] avcodec/fft_template: improve performance of the ff_fft_init in fft_template Before patch: init nbits = 17, get 10000 samples, average cost: 16175 us After patch: init nbits = 17, get 10000 samples, average cost: 14989 us Reviewed-by: Michael Niedermayer Signed-off-by: Steven Liu --- libavcodec/fft_template.c | 46 +++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/libavcodec/fft_template.c b/libavcodec/fft_template.c index 762c014bc8..20a62e4290 100644 --- a/libavcodec/fft_template.c +++ b/libavcodec/fft_template.c @@ -261,17 +261,41 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) if (s->fft_permutation == FF_FFT_PERM_AVX) { fft_perm_avx(s); } else { - for(i=0; ifft_permutation == FF_FFT_PERM_SWAP_LSBS) - j = (j&~3) | ((j>>1)&1) | ((j<<1)&2); - k = -split_radix_permutation(i, n, s->inverse) & (n-1); - if (s->revtab) - s->revtab[k] = j; - if (s->revtab32) - s->revtab32[k] = j; - } +#define PROCESS_FFT_PERM_SWAP_LSBS(num) do {\ + for(i = 0; i < n; i++) {\ + int k;\ + j = i;\ + j = (j & ~3) | ((j >> 1) & 1) | ((j << 1) & 2);\ + k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\ + s->revtab##num[k] = j;\ + } \ +} while(0); + +#define PROCESS_FFT_PERM_DEFAULT(num) do {\ + for(i = 0; i < n; i++) {\ + int k;\ + j = i;\ + k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\ + s->revtab##num[k] = j;\ + } \ +} while(0); + +#define SPLIT_RADIX_PERMUTATION(num) do { \ + if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS) {\ + PROCESS_FFT_PERM_SWAP_LSBS(num) \ + } else {\ + PROCESS_FFT_PERM_DEFAULT(num) \ + }\ +} while(0); + + if (s->revtab) + SPLIT_RADIX_PERMUTATION() + if (s->revtab32) + SPLIT_RADIX_PERMUTATION(32) + +#undef PROCESS_FFT_PERM_DEFAULT +#undef PROCESS_FFT_PERM_SWAP_LSBS +#undef SPLIT_RADIX_PERMUTATION } return 0;