avfilter/avf_showcqt: add attack option

Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
2025-08-10 06:10:52 +02:00 · 2017-04-08 09:55:06 +07:00
parent 47ccefac21
commit 3408f46694
4 changed files with 41 additions and 8 deletions
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -17192,6 +17192,11 @@ event in time domain is represented more accurately (such as fast bass drum),
 otherwise event in frequency domain is represented more accurately
 (such as bass guitar). Acceptable range is @code{[0.002, 1]}. Default value is @code{0.17}.

+@item attack
+Set attack time in seconds. The default is @code{0} (disabled). Otherwise, it
+limits future samples by applying asymmetric windowing in time domain, useful
+when low latency is required. Accepted range is @code{[0, 1]}.
+
@item basefreq
 Specify the transform base frequency. Default value is @code{20.01523126408007475},
 which is frequency 50 cents below E0. Acceptable range is @code{[10, 100000]}.
--- a/libavfilter/avf_showcqt.c
+++ b/libavfilter/avf_showcqt.c
@@ -78,6 +78,7 @@ static const AVOption showcqt_options[] = {
    { "bar_t",  "set bar transparency", OFFSET(bar_t),      AV_OPT_TYPE_FLOAT, { .dbl = 1.0 },            0.0, 1.0,      FLAGS },
    { "timeclamp",     "set timeclamp", OFFSET(timeclamp), AV_OPT_TYPE_DOUBLE, { .dbl = 0.17 },         0.002, 1.0,      FLAGS },
    { "tc",            "set timeclamp", OFFSET(timeclamp), AV_OPT_TYPE_DOUBLE, { .dbl = 0.17 },         0.002, 1.0,      FLAGS },
+    { "attack",      "set attack time", OFFSET(attack),    AV_OPT_TYPE_DOUBLE, { .dbl = 0 },              0.0, 1.0,      FLAGS },
    { "basefreq", "set base frequency", OFFSET(basefreq),  AV_OPT_TYPE_DOUBLE, { .dbl = BASEFREQ },      10.0, 100000.0, FLAGS },
    { "endfreq",   "set end frequency", OFFSET(endfreq),   AV_OPT_TYPE_DOUBLE, { .dbl = ENDFREQ },       10.0, 100000.0, FLAGS },
    { "coeffclamp",   "set coeffclamp", OFFSET(coeffclamp), AV_OPT_TYPE_FLOAT, { .dbl = 1.0 },            0.1, 10.0,     FLAGS },
@@ -152,6 +153,7 @@ static void common_uninit(ShowCQTContext *s)
    av_freep(&s->fft_data);
    av_freep(&s->fft_result);
    av_freep(&s->cqt_result);
+    av_freep(&s->attack_data);
    av_freep(&s->c_buf);
    av_freep(&s->h_buf);
    av_freep(&s->rcp_h_buf);
@@ -1138,6 +1140,14 @@ static int plot_cqt(AVFilterContext *ctx, AVFrame **frameout)
    last_time = av_gettime();

    memcpy(s->fft_result, s->fft_data, s->fft_len * sizeof(*s->fft_data));
+    if (s->attack_data) {
+        int k;
+        for (k = 0; k < s->remaining_fill_max; k++) {
+            s->fft_result[s->fft_len/2+k].re *= s->attack_data[k];
+            s->fft_result[s->fft_len/2+k].im *= s->attack_data[k];
+        }
+    }
+
    av_fft_permute(s->fft_ctx, s->fft_result);
    av_fft_calc(s->fft_ctx, s->fft_result);
    s->fft_result[s->fft_len] = s->fft_result[0];
@@ -1377,6 +1387,21 @@ static int config_output(AVFilterLink *outlink)
    if (!s->fft_ctx || !s->fft_data || !s->fft_result || !s->cqt_result)
        return AVERROR(ENOMEM);

+    s->remaining_fill_max = s->fft_len / 2;
+    if (s->attack > 0.0) {
+        int k;
+
+        s->remaining_fill_max = FFMIN(s->remaining_fill_max, ceil(inlink->sample_rate * s->attack));
+        s->attack_data = av_malloc_array(s->remaining_fill_max, sizeof(*s->attack_data));
+        if (!s->attack_data)
+            return AVERROR(ENOMEM);
+
+        for (k = 0; k < s->remaining_fill_max; k++) {
+            double y = M_PI * k / (inlink->sample_rate * s->attack);
+            s->attack_data[k] = 0.355768 + 0.487396 * cos(y) + 0.144232 * cos(2*y) + 0.012604 * cos(3*y);
+        }
+    }
+
    s->cqt_align = 1;
    s->cqt_calc = cqt_calc;
    s->permute_coeffs = NULL;
@@ -1435,7 +1460,7 @@ static int config_output(AVFilterLink *outlink)
    s->sono_count = 0;
    s->next_pts = 0;
    s->sono_idx = 0;
-    s->remaining_fill = s->fft_len / 2;
+    s->remaining_fill = s->remaining_fill_max;
    s->remaining_frac = 0;
    s->step_frac = av_div_q(av_make_q(inlink->sample_rate, s->count) , s->rate);
    s->step = (int)(s->step_frac.num / s->step_frac.den);
@@ -1463,15 +1488,15 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
    AVFrame *out = NULL;

    if (!insamples) {
-        while (s->remaining_fill < s->fft_len / 2) {
-            memset(&s->fft_data[s->fft_len - s->remaining_fill], 0, sizeof(*s->fft_data) * s->remaining_fill);
+        while (s->remaining_fill < s->remaining_fill_max) {
+            memset(&s->fft_data[s->fft_len/2 + s->remaining_fill_max - s->remaining_fill], 0, sizeof(*s->fft_data) * s->remaining_fill);
            ret = plot_cqt(ctx, &out);
            if (ret < 0)
                return ret;

            step = s->step + (s->step_frac.num + s->remaining_frac) / s->step_frac.den;
            s->remaining_frac = (s->step_frac.num + s->remaining_frac) % s->step_frac.den;
-            for (x = 0; x < (s->fft_len-step); x++)
+            for (x = 0; x < (s->fft_len/2 + s->remaining_fill_max - step); x++)
                s->fft_data[x] = s->fft_data[x+step];
            s->remaining_fill += step;

@@ -1486,7 +1511,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)

    while (remaining) {
        i = insamples->nb_samples - remaining;
-        j = s->fft_len - s->remaining_fill;
+        j = s->fft_len/2 + s->remaining_fill_max - s->remaining_fill;
        if (remaining >= s->remaining_fill) {
            for (m = 0; m < s->remaining_fill; m++) {
                s->fft_data[j+m].re = audio_data[2*(i+m)];
@@ -1500,7 +1525,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
            remaining -= s->remaining_fill;
            if (out) {
                int64_t pts = av_rescale_q(insamples->pts, inlink->time_base, av_make_q(1, inlink->sample_rate));
-                pts += insamples->nb_samples - remaining - s->fft_len/2;
+                pts += insamples->nb_samples - remaining - s->remaining_fill_max;
                pts = av_rescale_q(pts, av_make_q(1, inlink->sample_rate), outlink->time_base);
                if (FFABS(pts - out->pts) > PTS_TOLERANCE) {
                    av_log(ctx, AV_LOG_DEBUG, "changing pts from %"PRId64" (%.3f) to %"PRId64" (%.3f).\n",
@@ -1518,7 +1543,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
            }
            step = s->step + (s->step_frac.num + s->remaining_frac) / s->step_frac.den;
            s->remaining_frac = (s->step_frac.num + s->remaining_frac) % s->step_frac.den;
-            for (m = 0; m < s->fft_len-step; m++)
+            for (m = 0; m < s->fft_len/2 + s->remaining_fill_max - step; m++)
                s->fft_data[m] = s->fft_data[m+step];
            s->remaining_fill = step;
        } else {
--- a/libavfilter/avf_showcqt.h
+++ b/libavfilter/avf_showcqt.h
@@ -55,6 +55,7 @@ typedef struct {
    AVRational          step_frac;
    int                 remaining_frac;
    int                 remaining_fill;
+    int                 remaining_fill_max;
    int64_t             next_pts;
    double              *freq;
    FFTContext          *fft_ctx;
@@ -62,6 +63,7 @@ typedef struct {
    FFTComplex          *fft_data;
    FFTComplex          *fft_result;
    FFTComplex          *cqt_result;
+    float               *attack_data;
    int                 fft_bits;
    int                 fft_len;
    int                 cqt_len;
@@ -104,6 +106,7 @@ typedef struct {
    float               bar_g;
    float               bar_t;
    double              timeclamp;
+    double              attack;
    double              basefreq;
    double              endfreq;
    float               coeffclamp; /* deprecated - ignored */
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -31,7 +31,7 @@

 #define LIBAVFILTER_VERSION_MAJOR   6
 #define LIBAVFILTER_VERSION_MINOR  84
-#define LIBAVFILTER_VERSION_MICRO 100
+#define LIBAVFILTER_VERSION_MICRO 101

 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
                                               LIBAVFILTER_VERSION_MINOR, \