1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-11-26 19:01:44 +02:00

lavu/tx: add length decomposition function

Rather than using a list of lengths supported, this goes a step beyond
and uses all registered codelets to come up with a good decomposition.
This commit is contained in:
Lynne 2022-11-19 00:58:13 +01:00
parent 87bae6b018
commit 7f019e7758
No known key found for this signature in database
GPG Key ID: A2FEA5F03F034464
2 changed files with 151 additions and 0 deletions

View File

@ -17,6 +17,7 @@
*/
#include "avassert.h"
#include "intmath.h"
#include "cpu.h"
#include "qsort.h"
#include "bprint.h"
@ -395,6 +396,148 @@ static int get_codelet_prio(const FFTXCodelet *cd, int cpu_flags, int len)
return prio;
}
typedef struct FFTXLenDecomp {
int len;
int len2;
int prio;
const FFTXCodelet *cd;
} FFTXLenDecomp;
static int cmp_decomp(FFTXLenDecomp *a, FFTXLenDecomp *b)
{
return FFDIFFSIGN(b->prio, a->prio);
}
int ff_tx_decompose_length(int dst[TX_MAX_DECOMPOSITIONS], enum AVTXType type,
int len, int inv)
{
int nb_decomp = 0;
FFTXLenDecomp ld[TX_MAX_DECOMPOSITIONS];
int codelet_list_idx = codelet_list_num;
const int cpu_flags = av_get_cpu_flags();
/* Loop through all codelets in all codelet lists to find matches
* to the requirements */
while (codelet_list_idx--) {
const FFTXCodelet * const * list = codelet_list[codelet_list_idx];
const FFTXCodelet *cd = NULL;
while ((cd = *list++)) {
int fl = len;
int skip = 0, prio;
int factors_product = 1, factors_mod = 0;
if (nb_decomp >= TX_MAX_DECOMPOSITIONS)
goto sort;
/* Check if the type matches */
if (cd->type != TX_TYPE_ANY && type != cd->type)
continue;
/* Check direction for non-orthogonal codelets */
if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv))
continue;
/* Check if the CPU supports the required ISA */
if (cd->cpu_flags != FF_TX_CPU_FLAGS_ALL &&
!(cpu_flags & (cd->cpu_flags & ~cpu_slow_mask)))
continue;
for (int i = 0; i < TX_MAX_FACTORS; i++) {
if (!cd->factors[i] || (fl == 1))
break;
if (cd->factors[i] == TX_FACTOR_ANY) {
factors_mod++;
factors_product *= fl;
} else if (!(fl % cd->factors[i])) {
factors_mod++;
if (cd->factors[i] == 2) {
int b = ff_ctz(fl);
fl >>= b;
factors_product <<= b;
} else {
do {
fl /= cd->factors[i];
factors_product *= cd->factors[i];
} while (!(fl % cd->factors[i]));
}
}
}
/* Disqualify if factor requirements are not satisfied or if trivial */
if ((factors_mod < cd->nb_factors) || (len == factors_product))
continue;
if (av_gcd(factors_product, fl) != 1)
continue;
/* Check if length is supported and factorization was successful */
if ((factors_product < cd->min_len) ||
(cd->max_len != TX_LEN_UNLIMITED && (factors_product > cd->max_len)))
continue;
prio = get_codelet_prio(cd, cpu_flags, factors_product) * factors_product;
/* Check for duplicates */
for (int i = 0; i < nb_decomp; i++) {
if (factors_product == ld[i].len) {
/* Update priority if new one is higher */
if (prio > ld[i].prio)
ld[i].prio = prio;
skip = 1;
break;
}
}
/* Add decomposition if unique */
if (!skip) {
ld[nb_decomp].cd = cd;
ld[nb_decomp].len = factors_product;
ld[nb_decomp].len2 = fl;
ld[nb_decomp].prio = prio;
nb_decomp++;
}
}
}
if (!nb_decomp)
return AVERROR(EINVAL);
sort:
AV_QSORT(ld, nb_decomp, FFTXLenDecomp, cmp_decomp);
for (int i = 0; i < nb_decomp; i++) {
if (ld[i].cd->nb_factors > 1)
dst[i] = ld[i].len2;
else
dst[i] = ld[i].len;
}
return nb_decomp;
}
int ff_tx_gen_default_map(AVTXContext *s, FFTXCodeletOptions *opts)
{
s->map = av_malloc(s->len*sizeof(*s->map));
if (!s->map)
return AVERROR(ENOMEM);
s->map[0] = 0; /* DC is always at the start */
if (s->inv) /* Reversing the ACs flips the transform direction */
for (int i = 1; i < s->len; i++)
s->map[i] = s->len - i;
else
for (int i = 1; i < s->len; i++)
s->map[i] = i;
s->map_dir = FF_TX_MAP_GATHER;
return 0;
}
#if !CONFIG_SMALL
static void print_flags(AVBPrint *bp, uint64_t f)
{

View File

@ -183,6 +183,9 @@ typedef struct FFTXCodeletOptions {
/* Maximum amount of subtransform functions, subtransforms and factors. Arbitrary. */
#define TX_MAX_SUB 4
/* Maximum number of returned results for ff_tx_decompose_length. Arbitrary. */
#define TX_MAX_DECOMPOSITIONS 512
typedef struct FFTXCodelet {
const char *name; /* Codelet name, for debugging */
av_tx_fn function; /* Codelet function, != NULL */
@ -284,6 +287,11 @@ int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type,
/* Clear the context by freeing all tables, maps and subtransforms. */
void ff_tx_clear_ctx(AVTXContext *s);
/* Attempt to factorize a length into 2 integers such that
* len / dst1 == dst2, where dst1 and dst2 are coprime. */
int ff_tx_decompose_length(int dst[TX_MAX_DECOMPOSITIONS], enum AVTXType type,
int len, int inv);
/* Generate a default map (0->len or 0, (len-1)->1 for inverse transforms)
* for a context. */
int ff_tx_gen_default_map(AVTXContext *s, FFTXCodeletOptions *opts);