avfilter/af_adynamicequalizer: add adaptive detection mode

2024-12-23 12:43:46 +02:00 · 2023-11-08 14:31:50 +01:00 · 2023-11-08 14:31:50 +01:00 · 08e97dae20
commit 08e97dae20
parent 82be1e5c0d
3 changed files with 149 additions and 0 deletions
--- a/doc/filters.texi
+++ b/doc/filters.texi
@ -1100,6 +1100,8 @@ Disable using automatically gathered threshold value.
 Stop picking threshold value.
@item on
 Start picking threshold value.
+@item adaptive
+Adaptively pick threshold value, by calculating sliding window entropy.
@end table

@item precision
--- a/libavfilter/adynamicequalizer_template.c
+++ b/libavfilter/adynamicequalizer_template.c
@ -27,7 +27,11 @@
 #undef CLIP
 #undef SAMPLE_FORMAT
 #undef FABS
+#undef FLOG
+#undef FEXP
+#undef FLOG2
 #undef FLOG10
+#undef FEXP2
 #undef FEXP10
 #undef EPSILON
 #if DEPTH == 32
@ -41,7 +45,11 @@
 #define FMAX fmaxf
 #define CLIP av_clipf
 #define FABS fabsf
+#define FLOG logf
+#define FEXP expf
+#define FLOG2 log2f
 #define FLOG10 log10f
+#define FEXP2 exp2f
 #define FEXP10 ff_exp10f
 #define EPSILON (1.f / (1 << 23))
 #define ftype float
@ -56,7 +64,11 @@
 #define FMAX fmax
 #define CLIP av_clipd
 #define FABS fabs
+#define FLOG log
+#define FEXP exp
+#define FLOG2 log2
 #define FLOG10 log10
+#define FEXP2 exp2
 #define FEXP10 ff_exp10
 #define EPSILON (1.0 / (1LL << 53))
 #define ftype double
@ -150,6 +162,92 @@ static int fn(filter_prepare)(AVFilterContext *ctx)
    return 0;
 }

+#define PEAKS(empty_value,op,sample, psample)\
+    if (!empty && psample == ss[front]) {    \
+        ss[front] = empty_value;             \
+        if (back != front) {                 \
+            front--;                         \
+            if (front < 0)                   \
+                front = n - 1;               \
+        }                                    \
+        empty = front == back;               \
+    }                                        \
+                                             \
+    if (!empty && sample op ss[front]) {     \
+        while (1) {                          \
+            ss[front] = empty_value;         \
+            if (back == front) {             \
+                empty = 1;                   \
+                break;                       \
+            }                                \
+            front--;                         \
+            if (front < 0)                   \
+                front = n - 1;               \
+        }                                    \
+    }                                        \
+                                             \
+    while (!empty && sample op ss[back]) {   \
+        ss[back] = empty_value;              \
+        if (back == front) {                 \
+            empty = 1;                       \
+            break;                           \
+        }                                    \
+        back++;                              \
+        if (back >= n)                       \
+            back = 0;                        \
+    }                                        \
+                                             \
+    if (!empty) {                            \
+        back--;                              \
+        if (back < 0)                        \
+            back = n - 1;                    \
+    }
+
+static void fn(queue_sample)(ChannelContext *cc,
+                             const ftype x,
+                             const int nb_samples)
+{
+    ftype *ss = cc->dqueue;
+    ftype *qq = cc->queue;
+    int front = cc->front;
+    int back = cc->back;
+    int empty, n, pos = cc->position;
+    ftype px = qq[pos];
+
+    fn(cc->sum) += x;
+    fn(cc->log_sum) += FLOG2(x);
+    if (cc->size >= nb_samples) {
+        fn(cc->sum) -= px;
+        fn(cc->log_sum) -= FLOG2(px);
+    }
+
+    qq[pos] = x;
+    pos++;
+    if (pos >= nb_samples)
+        pos = 0;
+    cc->position = pos;
+
+    if (cc->size < nb_samples)
+        cc->size++;
+    n = cc->size;
+
+    empty = (front == back) && (ss[front] == ZERO);
+    PEAKS(ZERO, >, x, px)
+
+    ss[back] = x;
+
+    cc->front = front;
+    cc->back = back;
+}
+
+static ftype fn(get_peak)(ChannelContext *cc, ftype *score)
+{
+    ftype s, *ss = cc->dqueue;
+    s = FEXP2(fn(cc->log_sum) / cc->size) / (fn(cc->sum) / cc->size);
+    *score = LIN2LOG(s);
+    return ss[cc->front];
+}
+
 static int fn(filter_channels)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
    AudioDynamicEqualizerContext *s = ctx->priv;
@ -157,6 +255,7 @@ static int fn(filter_channels)(AVFilterContext *ctx, void *arg, int jobnr, int n
    AVFrame *in = td->in;
    AVFrame *out = td->out;
    const ftype sample_rate = in->sample_rate;
+    const int isample_rate = in->sample_rate;
    const ftype makeup = s->makeup;
    const ftype ratio = s->ratio;
    const ftype range = s->range;
@ -197,6 +296,27 @@ static int fn(filter_channels)(AVFilterContext *ctx, void *arg, int jobnr, int n

            fn(cc->new_threshold_log) = FMAX(fn(cc->new_threshold_log), LIN2LOG(new_threshold));
        }
+    } else if (detection == DET_ADAPTIVE) {
+        for (int ch = start; ch < end; ch++) {
+            const ftype *src = (const ftype *)in->extended_data[ch];
+            ChannelContext *cc = &s->cc[ch];
+            ftype *tstate = fn(cc->tstate);
+            ftype score, peak;
+
+            for (int n = 0; n < in->nb_samples; n++) {
+                ftype detect = FMAX(FABS(fn(get_svf)(src[n], dm, da, tstate)), EPSILON);
+                fn(queue_sample)(cc, detect, isample_rate);
+            }
+
+            peak = fn(get_peak)(cc, &score);
+
+            if (score >= -3.5) {
+                fn(cc->threshold_log) = LIN2LOG(peak);
+            } else if (cc->detection == DET_UNSET) {
+                fn(cc->threshold_log) = s->threshold_log;
+            }
+            cc->detection = detection;
+        }
    } else if (detection == DET_DISABLED) {
        for (int ch = start; ch < end; ch++) {
            ChannelContext *cc = &s->cc[ch];
--- a/libavfilter/af_adynamicequalizer.c
+++ b/libavfilter/af_adynamicequalizer.c
@ -29,6 +29,7 @@ enum DetectionModes {
    DET_DISABLED,
    DET_OFF,
    DET_ON,
+    DET_ADAPTIVE,
    NB_DMODES,
 };

@ -50,6 +51,8 @@ typedef struct ChannelContext {
    double detect_double;
    double threshold_log_double;
    double new_threshold_log_double;
+    double log_sum_double;
+    double sum_double;
    float fa_float[3], fm_float[3];
    float dstate_float[2];
    float fstate_float[2];
@ -58,6 +61,14 @@ typedef struct ChannelContext {
    float detect_float;
    float threshold_log_float;
    float new_threshold_log_float;
+    float log_sum_float;
+    float sum_float;
+    void *dqueue;
+    void *queue;
+    int position;
+    int size;
+    int front;
+    int back;
    int detection;
    int init;
 } ChannelContext;
@ -86,6 +97,7 @@ typedef struct AudioDynamicEqualizerContext {
    int dftype;
    int precision;
    int format;
+    int nb_channels;

    int (*filter_prepare)(AVFilterContext *ctx);
    int (*filter_channels)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
@ -140,6 +152,7 @@ static int config_input(AVFilterLink *inlink)
    s->cc = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->cc));
    if (!s->cc)
        return AVERROR(ENOMEM);
+    s->nb_channels = inlink->ch_layout.nb_channels;

    switch (s->format) {
    case AV_SAMPLE_FMT_DBLP:
@ -152,6 +165,14 @@ static int config_input(AVFilterLink *inlink)
        break;
    }

+    for (int ch = 0; ch < s->nb_channels; ch++) {
+        ChannelContext *cc = &s->cc[ch];
+        cc->queue = av_calloc(inlink->sample_rate, sizeof(double));
+        cc->dqueue = av_calloc(inlink->sample_rate, sizeof(double));
+        if (!cc->queue || !cc->dqueue)
+            return AVERROR(ENOMEM);
+    }
+
    return 0;
 }

@ -189,6 +210,11 @@ static av_cold void uninit(AVFilterContext *ctx)
 {
    AudioDynamicEqualizerContext *s = ctx->priv;

+    for (int ch = 0; ch < s->nb_channels; ch++) {
+        ChannelContext *cc = &s->cc[ch];
+        av_freep(&cc->queue);
+        av_freep(&cc->dqueue);
+    }
    av_freep(&s->cc);
 }

@ -226,6 +252,7 @@ static const AVOption adynamicequalizer_options[] = {
    {   "disabled", 0,                         0,                  AV_OPT_TYPE_CONST,  {.i64=DET_DISABLED}, 0, 0,   FLAGS, "auto" },
    {   "off",      0,                         0,                  AV_OPT_TYPE_CONST,  {.i64=DET_OFF},      0, 0,   FLAGS, "auto" },
    {   "on",       0,                         0,                  AV_OPT_TYPE_CONST,  {.i64=DET_ON},       0, 0,   FLAGS, "auto" },
+    {   "adaptive", 0,                         0,                  AV_OPT_TYPE_CONST,  {.i64=DET_ADAPTIVE}, 0, 0,   FLAGS, "auto" },
    { "precision", "set processing precision", OFFSET(precision),  AV_OPT_TYPE_INT,    {.i64=0},        0, 2,       AF, "precision" },
    {   "auto",  "set auto processing precision",                  0, AV_OPT_TYPE_CONST, {.i64=0},      0, 0,       AF, "precision" },
    {   "float", "set single-floating point processing precision", 0, AV_OPT_TYPE_CONST, {.i64=1},      0, 0,       AF, "precision" },