mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-05-29 21:47:48 +02:00
lavu/tx: allow codelets to specify a minimum number of matching factors
This commit is contained in:
parent
dd77e61182
commit
6ddd10c3e2
@ -409,42 +409,38 @@ static int cmp_matches(TXCodeletMatch *a, TXCodeletMatch *b)
|
|||||||
/* We want all factors to completely cover the length */
|
/* We want all factors to completely cover the length */
|
||||||
static inline int check_cd_factors(const FFTXCodelet *cd, int len)
|
static inline int check_cd_factors(const FFTXCodelet *cd, int len)
|
||||||
{
|
{
|
||||||
int all_flag = 0;
|
int matches = 0, any_flag = 0;
|
||||||
|
|
||||||
for (int i = 0; i < TX_MAX_SUB; i++) {
|
for (int i = 0; i < TX_MAX_FACTORS; i++) {
|
||||||
int factor = cd->factors[i];
|
int factor = cd->factors[i];
|
||||||
|
|
||||||
/* Conditions satisfied */
|
if (factor == TX_FACTOR_ANY) {
|
||||||
if (len == 1)
|
any_flag = 1;
|
||||||
return 1;
|
matches++;
|
||||||
|
|
||||||
/* No more factors */
|
|
||||||
if (!factor) {
|
|
||||||
break;
|
|
||||||
} else if (factor == TX_FACTOR_ANY) {
|
|
||||||
all_flag = 1;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
} else if (len <= 1 || !factor) {
|
||||||
|
break;
|
||||||
if (factor == 2) { /* Fast path */
|
} else if (factor == 2) { /* Fast path */
|
||||||
int bits_2 = ff_ctz(len);
|
int bits_2 = ff_ctz(len);
|
||||||
if (!bits_2)
|
if (!bits_2)
|
||||||
return 0; /* Factor not supported */
|
continue; /* Factor not supported */
|
||||||
|
|
||||||
len >>= bits_2;
|
len >>= bits_2;
|
||||||
|
matches++;
|
||||||
} else {
|
} else {
|
||||||
int res = len % factor;
|
int res = len % factor;
|
||||||
if (res)
|
if (res)
|
||||||
return 0; /* Factor not supported */
|
continue; /* Factor not supported */
|
||||||
|
|
||||||
while (!res) {
|
while (!res) {
|
||||||
len /= factor;
|
len /= factor;
|
||||||
res = len % factor;
|
res = len % factor;
|
||||||
}
|
}
|
||||||
|
matches++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return all_flag || (len == 1);
|
return (cd->nb_factors <= matches) && (any_flag || len == 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type,
|
av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type,
|
||||||
|
@ -71,7 +71,8 @@ typedef void TXComplex;
|
|||||||
.function = TX_FN_NAME(fn, suffix), \
|
.function = TX_FN_NAME(fn, suffix), \
|
||||||
.type = TX_TYPE(tx_type), \
|
.type = TX_TYPE(tx_type), \
|
||||||
.flags = FF_TX_ALIGNED | FF_TX_OUT_OF_PLACE | cd_flags, \
|
.flags = FF_TX_ALIGNED | FF_TX_OUT_OF_PLACE | cd_flags, \
|
||||||
.factors = { f1, f2 }, \
|
.factors = { (f1), (f2) }, \
|
||||||
|
.nb_factors = !!(f1) + !!(f2), \
|
||||||
.min_len = len_min, \
|
.min_len = len_min, \
|
||||||
.max_len = len_max, \
|
.max_len = len_max, \
|
||||||
.init = init_fn, \
|
.init = init_fn, \
|
||||||
@ -163,6 +164,9 @@ typedef struct FFTXCodeletOptions {
|
|||||||
invert the lookup direction for the map generated */
|
invert the lookup direction for the map generated */
|
||||||
} FFTXCodeletOptions;
|
} FFTXCodeletOptions;
|
||||||
|
|
||||||
|
/* Maximum number of factors a codelet may have. Arbitrary. */
|
||||||
|
#define TX_MAX_FACTORS 16
|
||||||
|
|
||||||
/* Maximum amount of subtransform functions, subtransforms and factors. Arbitrary. */
|
/* Maximum amount of subtransform functions, subtransforms and factors. Arbitrary. */
|
||||||
#define TX_MAX_SUB 4
|
#define TX_MAX_SUB 4
|
||||||
|
|
||||||
@ -175,13 +179,16 @@ typedef struct FFTXCodelet {
|
|||||||
uint64_t flags; /* A combination of AVTXFlags and codelet
|
uint64_t flags; /* A combination of AVTXFlags and codelet
|
||||||
* flags that describe its properties. */
|
* flags that describe its properties. */
|
||||||
|
|
||||||
int factors[TX_MAX_SUB]; /* Length factors */
|
int factors[TX_MAX_FACTORS]; /* Length factors. MUST be coprime. */
|
||||||
#define TX_FACTOR_ANY -1 /* When used alone, signals that the codelet
|
#define TX_FACTOR_ANY -1 /* When used alone, signals that the codelet
|
||||||
* supports all factors. Otherwise, if other
|
* supports all factors. Otherwise, if other
|
||||||
* factors are present, it signals that whatever
|
* factors are present, it signals that whatever
|
||||||
* remains will be supported, as long as the
|
* remains will be supported, as long as the
|
||||||
* other factors are a component of the length */
|
* other factors are a component of the length */
|
||||||
|
|
||||||
|
int nb_factors; /* Minimum number of factors that have to
|
||||||
|
* be a modulo of the length. Must not be 0. */
|
||||||
|
|
||||||
int min_len; /* Minimum length of transform, must be >= 1 */
|
int min_len; /* Minimum length of transform, must be >= 1 */
|
||||||
int max_len; /* Maximum length of transform */
|
int max_len; /* Maximum length of transform */
|
||||||
#define TX_LEN_UNLIMITED -1 /* Special length value to permit all lengths */
|
#define TX_LEN_UNLIMITED -1 /* Special length value to permit all lengths */
|
||||||
|
@ -518,6 +518,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft##n##_ns_def) = { \
|
|||||||
.flags = AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \
|
.flags = AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \
|
||||||
AV_TX_UNALIGNED | FF_TX_PRESHUFFLE, \
|
AV_TX_UNALIGNED | FF_TX_PRESHUFFLE, \
|
||||||
.factors[0] = n, \
|
.factors[0] = n, \
|
||||||
|
.nb_factors = 1, \
|
||||||
.min_len = n, \
|
.min_len = n, \
|
||||||
.max_len = n, \
|
.max_len = n, \
|
||||||
.init = TX_NAME(ff_tx_fft_factor_init), \
|
.init = TX_NAME(ff_tx_fft_factor_init), \
|
||||||
@ -534,6 +535,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft##n##_fwd_def) = { \
|
|||||||
.flags = AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \
|
.flags = AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \
|
||||||
AV_TX_UNALIGNED | FF_TX_FORWARD_ONLY, \
|
AV_TX_UNALIGNED | FF_TX_FORWARD_ONLY, \
|
||||||
.factors[0] = n, \
|
.factors[0] = n, \
|
||||||
|
.nb_factors = 1, \
|
||||||
.min_len = n, \
|
.min_len = n, \
|
||||||
.max_len = n, \
|
.max_len = n, \
|
||||||
.init = TX_NAME(ff_tx_fft_factor_init), \
|
.init = TX_NAME(ff_tx_fft_factor_init), \
|
||||||
@ -614,6 +616,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft##n##_ns_def) = { \
|
|||||||
.flags = FF_TX_OUT_OF_PLACE | AV_TX_INPLACE | \
|
.flags = FF_TX_OUT_OF_PLACE | AV_TX_INPLACE | \
|
||||||
AV_TX_UNALIGNED | FF_TX_PRESHUFFLE, \
|
AV_TX_UNALIGNED | FF_TX_PRESHUFFLE, \
|
||||||
.factors[0] = 2, \
|
.factors[0] = 2, \
|
||||||
|
.nb_factors = 1, \
|
||||||
.min_len = n, \
|
.min_len = n, \
|
||||||
.max_len = n, \
|
.max_len = n, \
|
||||||
.init = TX_NAME(ff_tx_fft_sr_codelet_init), \
|
.init = TX_NAME(ff_tx_fft_sr_codelet_init), \
|
||||||
@ -814,6 +817,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft_def) = {
|
|||||||
.type = TX_TYPE(FFT),
|
.type = TX_TYPE(FFT),
|
||||||
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE,
|
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE,
|
||||||
.factors[0] = TX_FACTOR_ANY,
|
.factors[0] = TX_FACTOR_ANY,
|
||||||
|
.nb_factors = 1,
|
||||||
.min_len = 2,
|
.min_len = 2,
|
||||||
.max_len = TX_LEN_UNLIMITED,
|
.max_len = TX_LEN_UNLIMITED,
|
||||||
.init = TX_NAME(ff_tx_fft_init),
|
.init = TX_NAME(ff_tx_fft_init),
|
||||||
@ -827,6 +831,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft_inplace_small_def) = {
|
|||||||
.type = TX_TYPE(FFT),
|
.type = TX_TYPE(FFT),
|
||||||
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | AV_TX_INPLACE,
|
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | AV_TX_INPLACE,
|
||||||
.factors[0] = TX_FACTOR_ANY,
|
.factors[0] = TX_FACTOR_ANY,
|
||||||
|
.nb_factors = 1,
|
||||||
.min_len = 2,
|
.min_len = 2,
|
||||||
.max_len = 65536,
|
.max_len = 65536,
|
||||||
.init = TX_NAME(ff_tx_fft_inplace_small_init),
|
.init = TX_NAME(ff_tx_fft_inplace_small_init),
|
||||||
@ -840,6 +845,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft_inplace_def) = {
|
|||||||
.type = TX_TYPE(FFT),
|
.type = TX_TYPE(FFT),
|
||||||
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | AV_TX_INPLACE,
|
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | AV_TX_INPLACE,
|
||||||
.factors[0] = TX_FACTOR_ANY,
|
.factors[0] = TX_FACTOR_ANY,
|
||||||
|
.nb_factors = 1,
|
||||||
.min_len = 2,
|
.min_len = 2,
|
||||||
.max_len = TX_LEN_UNLIMITED,
|
.max_len = TX_LEN_UNLIMITED,
|
||||||
.init = TX_NAME(ff_tx_fft_init),
|
.init = TX_NAME(ff_tx_fft_init),
|
||||||
@ -927,6 +933,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft_naive_small_def) = {
|
|||||||
.type = TX_TYPE(FFT),
|
.type = TX_TYPE(FFT),
|
||||||
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE,
|
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE,
|
||||||
.factors[0] = TX_FACTOR_ANY,
|
.factors[0] = TX_FACTOR_ANY,
|
||||||
|
.nb_factors = 1,
|
||||||
.min_len = 2,
|
.min_len = 2,
|
||||||
.max_len = 1024,
|
.max_len = 1024,
|
||||||
.init = TX_NAME(ff_tx_fft_init_naive_small),
|
.init = TX_NAME(ff_tx_fft_init_naive_small),
|
||||||
@ -940,6 +947,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft_naive_def) = {
|
|||||||
.type = TX_TYPE(FFT),
|
.type = TX_TYPE(FFT),
|
||||||
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE,
|
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE,
|
||||||
.factors[0] = TX_FACTOR_ANY,
|
.factors[0] = TX_FACTOR_ANY,
|
||||||
|
.nb_factors = 1,
|
||||||
.min_len = 2,
|
.min_len = 2,
|
||||||
.max_len = TX_LEN_UNLIMITED,
|
.max_len = TX_LEN_UNLIMITED,
|
||||||
.init = NULL,
|
.init = NULL,
|
||||||
@ -1007,6 +1015,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft_pfa_##N##xM_def) = { \
|
|||||||
.type = TX_TYPE(FFT), \
|
.type = TX_TYPE(FFT), \
|
||||||
.flags = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE, \
|
.flags = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE, \
|
||||||
.factors = { N, TX_FACTOR_ANY }, \
|
.factors = { N, TX_FACTOR_ANY }, \
|
||||||
|
.nb_factors = 2, \
|
||||||
.min_len = N*2, \
|
.min_len = N*2, \
|
||||||
.max_len = TX_LEN_UNLIMITED, \
|
.max_len = TX_LEN_UNLIMITED, \
|
||||||
.init = TX_NAME(ff_tx_fft_pfa_init), \
|
.init = TX_NAME(ff_tx_fft_pfa_init), \
|
||||||
@ -1089,6 +1098,7 @@ static const FFTXCodelet TX_NAME(ff_tx_mdct_naive_fwd_def) = {
|
|||||||
.type = TX_TYPE(MDCT),
|
.type = TX_TYPE(MDCT),
|
||||||
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY,
|
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY,
|
||||||
.factors = { 2, TX_FACTOR_ANY }, /* MDCTs need an even length */
|
.factors = { 2, TX_FACTOR_ANY }, /* MDCTs need an even length */
|
||||||
|
.nb_factors = 2,
|
||||||
.min_len = 2,
|
.min_len = 2,
|
||||||
.max_len = TX_LEN_UNLIMITED,
|
.max_len = TX_LEN_UNLIMITED,
|
||||||
.init = TX_NAME(ff_tx_mdct_naive_init),
|
.init = TX_NAME(ff_tx_mdct_naive_init),
|
||||||
@ -1102,6 +1112,7 @@ static const FFTXCodelet TX_NAME(ff_tx_mdct_naive_inv_def) = {
|
|||||||
.type = TX_TYPE(MDCT),
|
.type = TX_TYPE(MDCT),
|
||||||
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY,
|
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY,
|
||||||
.factors = { 2, TX_FACTOR_ANY },
|
.factors = { 2, TX_FACTOR_ANY },
|
||||||
|
.nb_factors = 2,
|
||||||
.min_len = 2,
|
.min_len = 2,
|
||||||
.max_len = TX_LEN_UNLIMITED,
|
.max_len = TX_LEN_UNLIMITED,
|
||||||
.init = TX_NAME(ff_tx_mdct_naive_init),
|
.init = TX_NAME(ff_tx_mdct_naive_init),
|
||||||
@ -1234,6 +1245,7 @@ static const FFTXCodelet TX_NAME(ff_tx_mdct_fwd_def) = {
|
|||||||
.type = TX_TYPE(MDCT),
|
.type = TX_TYPE(MDCT),
|
||||||
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY,
|
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY,
|
||||||
.factors = { 2, TX_FACTOR_ANY },
|
.factors = { 2, TX_FACTOR_ANY },
|
||||||
|
.nb_factors = 2,
|
||||||
.min_len = 2,
|
.min_len = 2,
|
||||||
.max_len = TX_LEN_UNLIMITED,
|
.max_len = TX_LEN_UNLIMITED,
|
||||||
.init = TX_NAME(ff_tx_mdct_init),
|
.init = TX_NAME(ff_tx_mdct_init),
|
||||||
@ -1247,6 +1259,7 @@ static const FFTXCodelet TX_NAME(ff_tx_mdct_inv_def) = {
|
|||||||
.type = TX_TYPE(MDCT),
|
.type = TX_TYPE(MDCT),
|
||||||
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY,
|
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY,
|
||||||
.factors = { 2, TX_FACTOR_ANY },
|
.factors = { 2, TX_FACTOR_ANY },
|
||||||
|
.nb_factors = 2,
|
||||||
.min_len = 2,
|
.min_len = 2,
|
||||||
.max_len = TX_LEN_UNLIMITED,
|
.max_len = TX_LEN_UNLIMITED,
|
||||||
.init = TX_NAME(ff_tx_mdct_init),
|
.init = TX_NAME(ff_tx_mdct_init),
|
||||||
@ -1299,6 +1312,7 @@ static const FFTXCodelet TX_NAME(ff_tx_mdct_inv_full_def) = {
|
|||||||
.flags = AV_TX_UNALIGNED | AV_TX_INPLACE |
|
.flags = AV_TX_UNALIGNED | AV_TX_INPLACE |
|
||||||
FF_TX_OUT_OF_PLACE | AV_TX_FULL_IMDCT,
|
FF_TX_OUT_OF_PLACE | AV_TX_FULL_IMDCT,
|
||||||
.factors = { 2, TX_FACTOR_ANY },
|
.factors = { 2, TX_FACTOR_ANY },
|
||||||
|
.nb_factors = 2,
|
||||||
.min_len = 2,
|
.min_len = 2,
|
||||||
.max_len = TX_LEN_UNLIMITED,
|
.max_len = TX_LEN_UNLIMITED,
|
||||||
.init = TX_NAME(ff_tx_mdct_inv_full_init),
|
.init = TX_NAME(ff_tx_mdct_inv_full_init),
|
||||||
@ -1396,6 +1410,7 @@ static const FFTXCodelet TX_NAME(ff_tx_mdct_pfa_##N##xM_inv_def) = { \
|
|||||||
.type = TX_TYPE(MDCT), \
|
.type = TX_TYPE(MDCT), \
|
||||||
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY, \
|
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY, \
|
||||||
.factors = { N, TX_FACTOR_ANY }, \
|
.factors = { N, TX_FACTOR_ANY }, \
|
||||||
|
.nb_factors = 2, \
|
||||||
.min_len = N*2, \
|
.min_len = N*2, \
|
||||||
.max_len = TX_LEN_UNLIMITED, \
|
.max_len = TX_LEN_UNLIMITED, \
|
||||||
.init = TX_NAME(ff_tx_mdct_pfa_init), \
|
.init = TX_NAME(ff_tx_mdct_pfa_init), \
|
||||||
@ -1463,6 +1478,7 @@ static const FFTXCodelet TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd_def) = { \
|
|||||||
.type = TX_TYPE(MDCT), \
|
.type = TX_TYPE(MDCT), \
|
||||||
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY, \
|
.flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY, \
|
||||||
.factors = { N, TX_FACTOR_ANY }, \
|
.factors = { N, TX_FACTOR_ANY }, \
|
||||||
|
.nb_factors = 2, \
|
||||||
.min_len = N*2, \
|
.min_len = N*2, \
|
||||||
.max_len = TX_LEN_UNLIMITED, \
|
.max_len = TX_LEN_UNLIMITED, \
|
||||||
.init = TX_NAME(ff_tx_mdct_pfa_init), \
|
.init = TX_NAME(ff_tx_mdct_pfa_init), \
|
||||||
@ -1583,6 +1599,7 @@ static const FFTXCodelet TX_NAME(ff_tx_rdft_r2c_def) = {
|
|||||||
.flags = AV_TX_UNALIGNED | AV_TX_INPLACE |
|
.flags = AV_TX_UNALIGNED | AV_TX_INPLACE |
|
||||||
FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY,
|
FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY,
|
||||||
.factors = { 2, TX_FACTOR_ANY },
|
.factors = { 2, TX_FACTOR_ANY },
|
||||||
|
.nb_factors = 2,
|
||||||
.min_len = 2,
|
.min_len = 2,
|
||||||
.max_len = TX_LEN_UNLIMITED,
|
.max_len = TX_LEN_UNLIMITED,
|
||||||
.init = TX_NAME(ff_tx_rdft_init),
|
.init = TX_NAME(ff_tx_rdft_init),
|
||||||
@ -1597,6 +1614,7 @@ static const FFTXCodelet TX_NAME(ff_tx_rdft_c2r_def) = {
|
|||||||
.flags = AV_TX_UNALIGNED | AV_TX_INPLACE |
|
.flags = AV_TX_UNALIGNED | AV_TX_INPLACE |
|
||||||
FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY,
|
FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY,
|
||||||
.factors = { 2, TX_FACTOR_ANY },
|
.factors = { 2, TX_FACTOR_ANY },
|
||||||
|
.nb_factors = 2,
|
||||||
.min_len = 2,
|
.min_len = 2,
|
||||||
.max_len = TX_LEN_UNLIMITED,
|
.max_len = TX_LEN_UNLIMITED,
|
||||||
.init = TX_NAME(ff_tx_rdft_init),
|
.init = TX_NAME(ff_tx_rdft_init),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user