mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-24 13:56:33 +02:00
lavu/tx: add real to real and real to imaginary RDFT transforms
These are in-place transforms, required for DCT-I and DST-I. Templated as the mod2 variant requires minor modifications, and is required specifically for DCT-I/DST-I.
This commit is contained in:
parent
fba4546175
commit
11e22730e1
@ -2,6 +2,9 @@ The last version increases of all libraries were on 2023-02-09
|
|||||||
|
|
||||||
API changes, most recent first:
|
API changes, most recent first:
|
||||||
|
|
||||||
|
2023-07-xx - xxxxxxxxxx - lavu 58.18.100 - tx.h
|
||||||
|
Add AV_TX_REAL_TO_REAL and AV_TX_REAL_TO_IMAGINARY
|
||||||
|
|
||||||
2023-08-18 - xxxxxxxxxx - lavu 58.17.100 - channel_layout.h
|
2023-08-18 - xxxxxxxxxx - lavu 58.17.100 - channel_layout.h
|
||||||
All AV_CHANNEL_LAYOUT_* macros are now compatible with C++ 17 and older.
|
All AV_CHANNEL_LAYOUT_* macros are now compatible with C++ 17 and older.
|
||||||
|
|
||||||
|
@ -437,7 +437,9 @@ int ff_tx_decompose_length(int dst[TX_MAX_DECOMPOSITIONS], enum AVTXType type,
|
|||||||
|
|
||||||
/* Check direction for non-orthogonal codelets */
|
/* Check direction for non-orthogonal codelets */
|
||||||
if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
|
if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
|
||||||
((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv))
|
((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv) ||
|
||||||
|
((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_REAL)) && inv) ||
|
||||||
|
((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_IMAGINARY)) && inv))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* Check if the CPU supports the required ISA */
|
/* Check if the CPU supports the required ISA */
|
||||||
@ -560,6 +562,10 @@ static void print_flags(AVBPrint *bp, uint64_t f)
|
|||||||
av_bprintf(bp, "%spreshuf", prev > 1 ? sep : "");
|
av_bprintf(bp, "%spreshuf", prev > 1 ? sep : "");
|
||||||
if ((f & AV_TX_FULL_IMDCT) && ++prev)
|
if ((f & AV_TX_FULL_IMDCT) && ++prev)
|
||||||
av_bprintf(bp, "%simdct_full", prev > 1 ? sep : "");
|
av_bprintf(bp, "%simdct_full", prev > 1 ? sep : "");
|
||||||
|
if ((f & AV_TX_REAL_TO_REAL) && ++prev)
|
||||||
|
av_bprintf(bp, "%sreal_to_real", prev > 1 ? sep : "");
|
||||||
|
if ((f & AV_TX_REAL_TO_IMAGINARY) && ++prev)
|
||||||
|
av_bprintf(bp, "%sreal_to_imaginary", prev > 1 ? sep : "");
|
||||||
if ((f & FF_TX_ASM_CALL) && ++prev)
|
if ((f & FF_TX_ASM_CALL) && ++prev)
|
||||||
av_bprintf(bp, "%sasm_call", prev > 1 ? sep : "");
|
av_bprintf(bp, "%sasm_call", prev > 1 ? sep : "");
|
||||||
av_bprintf(bp, "]");
|
av_bprintf(bp, "]");
|
||||||
@ -717,7 +723,11 @@ av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type,
|
|||||||
uint64_t req_flags = flags;
|
uint64_t req_flags = flags;
|
||||||
|
|
||||||
/* Flags the codelet may require to be present */
|
/* Flags the codelet may require to be present */
|
||||||
uint64_t inv_req_mask = AV_TX_FULL_IMDCT | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL;
|
uint64_t inv_req_mask = AV_TX_FULL_IMDCT |
|
||||||
|
AV_TX_REAL_TO_REAL |
|
||||||
|
AV_TX_REAL_TO_IMAGINARY |
|
||||||
|
FF_TX_PRESHUFFLE |
|
||||||
|
FF_TX_ASM_CALL;
|
||||||
|
|
||||||
/* Unaligned codelets are compatible with the aligned flag */
|
/* Unaligned codelets are compatible with the aligned flag */
|
||||||
if (req_flags & FF_TX_ALIGNED)
|
if (req_flags & FF_TX_ALIGNED)
|
||||||
@ -742,7 +752,9 @@ av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type,
|
|||||||
|
|
||||||
/* Check direction for non-orthogonal codelets */
|
/* Check direction for non-orthogonal codelets */
|
||||||
if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
|
if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
|
||||||
((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv))
|
((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && !inv) ||
|
||||||
|
((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_REAL)) && inv) ||
|
||||||
|
((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_IMAGINARY)) && inv))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* Check if the requested flags match from both sides */
|
/* Check if the requested flags match from both sides */
|
||||||
|
@ -149,6 +149,16 @@ enum AVTXFlags {
|
|||||||
* Ignored for all transforms but inverse MDCTs.
|
* Ignored for all transforms but inverse MDCTs.
|
||||||
*/
|
*/
|
||||||
AV_TX_FULL_IMDCT = 1ULL << 2,
|
AV_TX_FULL_IMDCT = 1ULL << 2,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Perform a real to half-complex RDFT.
|
||||||
|
* Only the real, or imaginary coefficients will
|
||||||
|
* be output, depending on the flag used. Only available for forward RDFTs.
|
||||||
|
* Output array must have enough space to hold N complex values
|
||||||
|
* (regular size for a real to complex transform).
|
||||||
|
*/
|
||||||
|
AV_TX_REAL_TO_REAL = 1ULL << 3,
|
||||||
|
AV_TX_REAL_TO_IMAGINARY = 1ULL << 4,
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1613,14 +1613,17 @@ static av_cold int TX_NAME(ff_tx_rdft_init)(AVTXContext *s,
|
|||||||
int ret;
|
int ret;
|
||||||
double f, m;
|
double f, m;
|
||||||
TXSample *tab;
|
TXSample *tab;
|
||||||
|
int len4 = FFALIGN(len, 4) / 4;
|
||||||
|
|
||||||
s->scale_d = *((SCALE_TYPE *)scale);
|
s->scale_d = *((SCALE_TYPE *)scale);
|
||||||
s->scale_f = s->scale_d;
|
s->scale_f = s->scale_d;
|
||||||
|
|
||||||
|
flags &= ~(AV_TX_REAL_TO_REAL | AV_TX_REAL_TO_IMAGINARY);
|
||||||
|
|
||||||
if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, NULL, len >> 1, inv, scale)))
|
if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, NULL, len >> 1, inv, scale)))
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
if (!(s->exp = av_mallocz((8 + (len >> 2) - 1)*sizeof(*s->exp))))
|
if (!(s->exp = av_mallocz((8 + 2*len4)*sizeof(*s->exp))))
|
||||||
return AVERROR(ENOMEM);
|
return AVERROR(ENOMEM);
|
||||||
|
|
||||||
tab = (TXSample *)s->exp;
|
tab = (TXSample *)s->exp;
|
||||||
@ -1639,17 +1642,20 @@ static av_cold int TX_NAME(ff_tx_rdft_init)(AVTXContext *s,
|
|||||||
*tab++ = RESCALE( (0.5 - inv) * m);
|
*tab++ = RESCALE( (0.5 - inv) * m);
|
||||||
*tab++ = RESCALE(-(0.5 - inv) * m);
|
*tab++ = RESCALE(-(0.5 - inv) * m);
|
||||||
|
|
||||||
for (int i = 0; i < len >> 2; i++)
|
for (int i = 0; i < len4; i++)
|
||||||
*tab++ = RESCALE(cos(i*f));
|
*tab++ = RESCALE(cos(i*f));
|
||||||
for (int i = len >> 2; i >= 0; i--)
|
|
||||||
*tab++ = RESCALE(cos(i*f) * (inv ? +1.0 : -1.0));
|
tab = ((TXSample *)s->exp) + len4 + 8;
|
||||||
|
|
||||||
|
for (int i = 0; i < len4; i++)
|
||||||
|
*tab++ = RESCALE(cos(((float)len/4.0 - (float)i + 0)*f) * (inv ? +1.0 : -1.0));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define DECL_RDFT(name, inv) \
|
#define DECL_RDFT(n, inv) \
|
||||||
static void TX_NAME(ff_tx_rdft_ ##name)(AVTXContext *s, void *_dst, \
|
static void TX_NAME(ff_tx_rdft_ ##n)(AVTXContext *s, void *_dst, \
|
||||||
void *_src, ptrdiff_t stride) \
|
void *_src, ptrdiff_t stride) \
|
||||||
{ \
|
{ \
|
||||||
const int len2 = s->len >> 1; \
|
const int len2 = s->len >> 1; \
|
||||||
const int len4 = s->len >> 2; \
|
const int len4 = s->len >> 2; \
|
||||||
@ -1698,41 +1704,132 @@ static void TX_NAME(ff_tx_rdft_ ##name)(AVTXContext *s, void *_dst, \
|
|||||||
data[len2].re = data[0].im; \
|
data[len2].re = data[0].im; \
|
||||||
data[ 0].im = data[len2].im = 0; \
|
data[ 0].im = data[len2].im = 0; \
|
||||||
} \
|
} \
|
||||||
}
|
} \
|
||||||
|
\
|
||||||
DECL_RDFT(r2c, 0)
|
static const FFTXCodelet TX_NAME(ff_tx_rdft_ ##n## _def) = { \
|
||||||
DECL_RDFT(c2r, 1)
|
.name = TX_NAME_STR("rdft_" #n), \
|
||||||
|
.function = TX_NAME(ff_tx_rdft_ ##n), \
|
||||||
static const FFTXCodelet TX_NAME(ff_tx_rdft_r2c_def) = {
|
.type = TX_TYPE(RDFT), \
|
||||||
.name = TX_NAME_STR("rdft_r2c"),
|
.flags = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \
|
||||||
.function = TX_NAME(ff_tx_rdft_r2c),
|
(inv ? FF_TX_INVERSE_ONLY : FF_TX_FORWARD_ONLY), \
|
||||||
.type = TX_TYPE(RDFT),
|
.factors = { 4, TX_FACTOR_ANY }, \
|
||||||
.flags = AV_TX_UNALIGNED | AV_TX_INPLACE |
|
.nb_factors = 2, \
|
||||||
FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY,
|
.min_len = 4, \
|
||||||
.factors = { 2, TX_FACTOR_ANY },
|
.max_len = TX_LEN_UNLIMITED, \
|
||||||
.nb_factors = 2,
|
.init = TX_NAME(ff_tx_rdft_init), \
|
||||||
.min_len = 2,
|
.cpu_flags = FF_TX_CPU_FLAGS_ALL, \
|
||||||
.max_len = TX_LEN_UNLIMITED,
|
.prio = FF_TX_PRIO_BASE, \
|
||||||
.init = TX_NAME(ff_tx_rdft_init),
|
|
||||||
.cpu_flags = FF_TX_CPU_FLAGS_ALL,
|
|
||||||
.prio = FF_TX_PRIO_BASE,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static const FFTXCodelet TX_NAME(ff_tx_rdft_c2r_def) = {
|
DECL_RDFT(r2c, 0)
|
||||||
.name = TX_NAME_STR("rdft_c2r"),
|
DECL_RDFT(c2r, 1)
|
||||||
.function = TX_NAME(ff_tx_rdft_c2r),
|
|
||||||
.type = TX_TYPE(RDFT),
|
#define DECL_RDFT_HALF(n, mode, mod2) \
|
||||||
.flags = AV_TX_UNALIGNED | AV_TX_INPLACE |
|
static void TX_NAME(ff_tx_rdft_ ##n)(AVTXContext *s, void *_dst, \
|
||||||
FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY,
|
void *_src, ptrdiff_t stride) \
|
||||||
.factors = { 2, TX_FACTOR_ANY },
|
{ \
|
||||||
.nb_factors = 2,
|
const int len = s->len; \
|
||||||
.min_len = 2,
|
const int len2 = len >> 1; \
|
||||||
.max_len = TX_LEN_UNLIMITED,
|
const int len4 = len >> 2; \
|
||||||
.init = TX_NAME(ff_tx_rdft_init),
|
const int aligned_len4 = FFALIGN(len, 4)/4; \
|
||||||
.cpu_flags = FF_TX_CPU_FLAGS_ALL,
|
const TXSample *fact = (void *)s->exp; \
|
||||||
.prio = FF_TX_PRIO_BASE,
|
const TXSample *tcos = fact + 8; \
|
||||||
|
const TXSample *tsin = tcos + aligned_len4; \
|
||||||
|
TXComplex *data = _dst; \
|
||||||
|
TXSample *out = _dst; /* Half-complex is forward-only */ \
|
||||||
|
TXSample tmp_dc; \
|
||||||
|
av_unused TXSample tmp_mid; \
|
||||||
|
TXSample tmp[4]; \
|
||||||
|
TXComplex sf, sl; \
|
||||||
|
\
|
||||||
|
s->fn[0](&s->sub[0], _dst, _src, sizeof(TXComplex)); \
|
||||||
|
\
|
||||||
|
tmp_dc = data[0].re; \
|
||||||
|
data[ 0].re = tmp_dc + data[0].im; \
|
||||||
|
tmp_dc = tmp_dc - data[0].im; \
|
||||||
|
\
|
||||||
|
data[ 0].re = MULT(fact[0], data[ 0].re); \
|
||||||
|
tmp_dc = MULT(fact[1], tmp_dc); \
|
||||||
|
data[len4].re = MULT(fact[2], data[len4].re); \
|
||||||
|
\
|
||||||
|
if (!mod2) { \
|
||||||
|
data[len4].im = MULT(fact[3], data[len4].im); \
|
||||||
|
} else { \
|
||||||
|
sf = data[len4]; \
|
||||||
|
sl = data[len4 + 1]; \
|
||||||
|
if (mode == AV_TX_REAL_TO_REAL) \
|
||||||
|
tmp[0] = MULT(fact[4], (sf.re + sl.re)); \
|
||||||
|
else \
|
||||||
|
tmp[0] = MULT(fact[5], (sf.im - sl.im)); \
|
||||||
|
tmp[1] = MULT(fact[6], (sf.im + sl.im)); \
|
||||||
|
tmp[2] = MULT(fact[7], (sf.re - sl.re)); \
|
||||||
|
\
|
||||||
|
if (mode == AV_TX_REAL_TO_REAL) { \
|
||||||
|
tmp[3] = tmp[1]*tcos[len4] - tmp[2]*tsin[len4]; \
|
||||||
|
tmp_mid = (tmp[0] - tmp[3]); \
|
||||||
|
} else { \
|
||||||
|
tmp[3] = tmp[1]*tsin[len4] + tmp[2]*tcos[len4]; \
|
||||||
|
tmp_mid = (tmp[0] + tmp[3]); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
/* NOTE: unrolling this breaks non-mod8 lengths */ \
|
||||||
|
for (int i = 1; i <= len4; i++) { \
|
||||||
|
TXSample tmp[4]; \
|
||||||
|
TXComplex sf = data[i]; \
|
||||||
|
TXComplex sl = data[len2 - i]; \
|
||||||
|
\
|
||||||
|
if (mode == AV_TX_REAL_TO_REAL) \
|
||||||
|
tmp[0] = MULT(fact[4], (sf.re + sl.re)); \
|
||||||
|
else \
|
||||||
|
tmp[0] = MULT(fact[5], (sf.im - sl.im)); \
|
||||||
|
\
|
||||||
|
tmp[1] = MULT(fact[6], (sf.im + sl.im)); \
|
||||||
|
tmp[2] = MULT(fact[7], (sf.re - sl.re)); \
|
||||||
|
\
|
||||||
|
if (mode == AV_TX_REAL_TO_REAL) { \
|
||||||
|
tmp[3] = tmp[1]*tcos[i] - tmp[2]*tsin[i]; \
|
||||||
|
out[i] = (tmp[0] + tmp[3]); \
|
||||||
|
out[len - i] = (tmp[0] - tmp[3]); \
|
||||||
|
} else { \
|
||||||
|
tmp[3] = tmp[1]*tsin[i] + tmp[2]*tcos[i]; \
|
||||||
|
out[i - 1] = (tmp[3] - tmp[0]); \
|
||||||
|
out[len - i - 1] = (tmp[0] + tmp[3]); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
for (int i = 1; i < (len4 + (mode == AV_TX_REAL_TO_IMAGINARY)); i++) \
|
||||||
|
out[len2 - i] = out[len - i]; \
|
||||||
|
\
|
||||||
|
if (mode == AV_TX_REAL_TO_REAL) { \
|
||||||
|
out[len2] = tmp_dc; \
|
||||||
|
if (mod2) \
|
||||||
|
out[len4 + 1] = tmp_mid; \
|
||||||
|
} else if (mod2) { \
|
||||||
|
out[len4] = tmp_mid; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
static const FFTXCodelet TX_NAME(ff_tx_rdft_ ##n## _def) = { \
|
||||||
|
.name = TX_NAME_STR("rdft_" #n), \
|
||||||
|
.function = TX_NAME(ff_tx_rdft_ ##n), \
|
||||||
|
.type = TX_TYPE(RDFT), \
|
||||||
|
.flags = AV_TX_UNALIGNED | AV_TX_INPLACE | mode | \
|
||||||
|
FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY, \
|
||||||
|
.factors = { 2 + 2*(!mod2), TX_FACTOR_ANY }, \
|
||||||
|
.nb_factors = 2, \
|
||||||
|
.min_len = 2 + 2*(!mod2), \
|
||||||
|
.max_len = TX_LEN_UNLIMITED, \
|
||||||
|
.init = TX_NAME(ff_tx_rdft_init), \
|
||||||
|
.cpu_flags = FF_TX_CPU_FLAGS_ALL, \
|
||||||
|
.prio = FF_TX_PRIO_BASE, \
|
||||||
};
|
};
|
||||||
|
|
||||||
|
DECL_RDFT_HALF(r2r, AV_TX_REAL_TO_REAL, 0)
|
||||||
|
DECL_RDFT_HALF(r2r_mod2, AV_TX_REAL_TO_REAL, 1)
|
||||||
|
DECL_RDFT_HALF(r2i, AV_TX_REAL_TO_IMAGINARY, 0)
|
||||||
|
DECL_RDFT_HALF(r2i_mod2, AV_TX_REAL_TO_IMAGINARY, 1)
|
||||||
|
|
||||||
static av_cold int TX_NAME(ff_tx_dct_init)(AVTXContext *s,
|
static av_cold int TX_NAME(ff_tx_dct_init)(AVTXContext *s,
|
||||||
const FFTXCodelet *cd,
|
const FFTXCodelet *cd,
|
||||||
uint64_t flags,
|
uint64_t flags,
|
||||||
@ -1997,6 +2094,10 @@ const FFTXCodelet * const TX_NAME(ff_tx_codelet_list)[] = {
|
|||||||
&TX_NAME(ff_tx_mdct_naive_inv_def),
|
&TX_NAME(ff_tx_mdct_naive_inv_def),
|
||||||
&TX_NAME(ff_tx_mdct_inv_full_def),
|
&TX_NAME(ff_tx_mdct_inv_full_def),
|
||||||
&TX_NAME(ff_tx_rdft_r2c_def),
|
&TX_NAME(ff_tx_rdft_r2c_def),
|
||||||
|
&TX_NAME(ff_tx_rdft_r2r_def),
|
||||||
|
&TX_NAME(ff_tx_rdft_r2r_mod2_def),
|
||||||
|
&TX_NAME(ff_tx_rdft_r2i_def),
|
||||||
|
&TX_NAME(ff_tx_rdft_r2i_mod2_def),
|
||||||
&TX_NAME(ff_tx_rdft_c2r_def),
|
&TX_NAME(ff_tx_rdft_c2r_def),
|
||||||
&TX_NAME(ff_tx_dctII_def),
|
&TX_NAME(ff_tx_dctII_def),
|
||||||
&TX_NAME(ff_tx_dctIII_def),
|
&TX_NAME(ff_tx_dctIII_def),
|
||||||
|
@ -79,7 +79,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define LIBAVUTIL_VERSION_MAJOR 58
|
#define LIBAVUTIL_VERSION_MAJOR 58
|
||||||
#define LIBAVUTIL_VERSION_MINOR 17
|
#define LIBAVUTIL_VERSION_MINOR 18
|
||||||
#define LIBAVUTIL_VERSION_MICRO 100
|
#define LIBAVUTIL_VERSION_MICRO 100
|
||||||
|
|
||||||
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
|
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
|
||||||
|
Loading…
x
Reference in New Issue
Block a user