1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-08-10 06:10:52 +02:00

lavu/tx: generalize single-factor transforms

Not that useful, but it gives us fast small odd-length transforms.
This commit is contained in:
Lynne
2022-09-24 06:49:16 +02:00
parent 79f11e2409
commit 45bd4bf79f
3 changed files with 26 additions and 26 deletions

View File

@@ -121,9 +121,9 @@ int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup)
return 0; return 0;
} }
int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s) int ff_tx_gen_inplace_map(AVTXContext *s, int len)
{ {
int *src_map, out_map_idx = 0, len = s->len; int *src_map, out_map_idx = 0;
if (!s->sub || !s->sub->map) if (!s->sub || !s->sub->map)
return AVERROR(EINVAL); return AVERROR(EINVAL);

View File

@@ -259,7 +259,7 @@ int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup);
* specific order, allows the revtab to be done in-place. The sub-transform * specific order, allows the revtab to be done in-place. The sub-transform
* and its map should already be initialized. * and its map should already be initialized.
*/ */
int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s); int ff_tx_gen_inplace_map(AVTXContext *s, int len);
/* /*
* This generates a parity-based revtab of length len and direction inv. * This generates a parity-based revtab of length len and direction inv.

View File

@@ -650,12 +650,12 @@ DECL_SR_CODELET(32768,16384,8192)
DECL_SR_CODELET(65536,32768,16384) DECL_SR_CODELET(65536,32768,16384)
DECL_SR_CODELET(131072,65536,32768) DECL_SR_CODELET(131072,65536,32768)
static av_cold int TX_NAME(ff_tx_fft_sr_init)(AVTXContext *s, static av_cold int TX_NAME(ff_tx_fft_init)(AVTXContext *s,
const FFTXCodelet *cd, const FFTXCodelet *cd,
uint64_t flags, uint64_t flags,
FFTXCodeletOptions *opts, FFTXCodeletOptions *opts,
int len, int inv, int len, int inv,
const void *scale) const void *scale)
{ {
int ret; int ret;
int is_inplace = !!(flags & AV_TX_INPLACE); int is_inplace = !!(flags & AV_TX_INPLACE);
@@ -668,14 +668,14 @@ static av_cold int TX_NAME(ff_tx_fft_sr_init)(AVTXContext *s,
if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts, len, inv, scale))) if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, &sub_opts, len, inv, scale)))
return ret; return ret;
if (is_inplace && (ret = ff_tx_gen_ptwo_inplace_revtab_idx(s))) if (is_inplace && (ret = ff_tx_gen_inplace_map(s, len)))
return ret; return ret;
return 0; return 0;
} }
static void TX_NAME(ff_tx_fft_sr)(AVTXContext *s, void *_dst, static void TX_NAME(ff_tx_fft)(AVTXContext *s, void *_dst,
void *_src, ptrdiff_t stride) void *_src, ptrdiff_t stride)
{ {
TXComplex *src = _src; TXComplex *src = _src;
TXComplex *dst = _dst; TXComplex *dst = _dst;
@@ -690,8 +690,8 @@ static void TX_NAME(ff_tx_fft_sr)(AVTXContext *s, void *_dst,
s->fn[0](&s->sub[0], dst, dst, stride); s->fn[0](&s->sub[0], dst, dst, stride);
} }
static void TX_NAME(ff_tx_fft_sr_inplace)(AVTXContext *s, void *_dst, static void TX_NAME(ff_tx_fft_inplace)(AVTXContext *s, void *_dst,
void *_src, ptrdiff_t stride) void *_src, ptrdiff_t stride)
{ {
TXComplex *dst = _dst; TXComplex *dst = _dst;
TXComplex tmp; TXComplex tmp;
@@ -713,28 +713,28 @@ static void TX_NAME(ff_tx_fft_sr_inplace)(AVTXContext *s, void *_dst,
s->fn[0](&s->sub[0], dst, dst, stride); s->fn[0](&s->sub[0], dst, dst, stride);
} }
static const FFTXCodelet TX_NAME(ff_tx_fft_sr_def) = { static const FFTXCodelet TX_NAME(ff_tx_fft_def) = {
.name = TX_NAME_STR("fft_sr"), .name = TX_NAME_STR("fft"),
.function = TX_NAME(ff_tx_fft_sr), .function = TX_NAME(ff_tx_fft),
.type = TX_TYPE(FFT), .type = TX_TYPE(FFT),
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE, .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE,
.factors[0] = 2, .factors[0] = TX_FACTOR_ANY,
.min_len = 2, .min_len = 2,
.max_len = TX_LEN_UNLIMITED, .max_len = TX_LEN_UNLIMITED,
.init = TX_NAME(ff_tx_fft_sr_init), .init = TX_NAME(ff_tx_fft_init),
.cpu_flags = FF_TX_CPU_FLAGS_ALL, .cpu_flags = FF_TX_CPU_FLAGS_ALL,
.prio = FF_TX_PRIO_BASE, .prio = FF_TX_PRIO_BASE,
}; };
static const FFTXCodelet TX_NAME(ff_tx_fft_sr_inplace_def) = { static const FFTXCodelet TX_NAME(ff_tx_fft_inplace_def) = {
.name = TX_NAME_STR("fft_sr_inplace"), .name = TX_NAME_STR("fft_inplace"),
.function = TX_NAME(ff_tx_fft_sr_inplace), .function = TX_NAME(ff_tx_fft_inplace),
.type = TX_TYPE(FFT), .type = TX_TYPE(FFT),
.flags = AV_TX_UNALIGNED | AV_TX_INPLACE, .flags = AV_TX_UNALIGNED | AV_TX_INPLACE,
.factors[0] = 2, .factors[0] = TX_FACTOR_ANY,
.min_len = 2, .min_len = 2,
.max_len = TX_LEN_UNLIMITED, .max_len = TX_LEN_UNLIMITED,
.init = TX_NAME(ff_tx_fft_sr_init), .init = TX_NAME(ff_tx_fft_init),
.cpu_flags = FF_TX_CPU_FLAGS_ALL, .cpu_flags = FF_TX_CPU_FLAGS_ALL,
.prio = FF_TX_PRIO_BASE, .prio = FF_TX_PRIO_BASE,
}; };
@@ -1484,8 +1484,8 @@ const FFTXCodelet * const TX_NAME(ff_tx_codelet_list)[] = {
&TX_NAME(ff_tx_fft131072_ns_def), &TX_NAME(ff_tx_fft131072_ns_def),
/* Standalone transforms */ /* Standalone transforms */
&TX_NAME(ff_tx_fft_sr_def), &TX_NAME(ff_tx_fft_def),
&TX_NAME(ff_tx_fft_sr_inplace_def), &TX_NAME(ff_tx_fft_inplace_def),
&TX_NAME(ff_tx_fft_pfa_3xM_def), &TX_NAME(ff_tx_fft_pfa_3xM_def),
&TX_NAME(ff_tx_fft_pfa_5xM_def), &TX_NAME(ff_tx_fft_pfa_5xM_def),
&TX_NAME(ff_tx_fft_pfa_7xM_def), &TX_NAME(ff_tx_fft_pfa_7xM_def),