avfilter/af_afftdn: rewrite noise floor tracking

2025-10-30 23:18:11 +02:00 · 2022-03-16 00:10:11 +01:00
parent 77d20a0f74
commit ccb81acc3d
1 changed files with 61 additions and 179 deletions
--- a/libavfilter/af_afftdn.c
+++ b/libavfilter/af_afftdn.c
@@ -92,11 +92,6 @@ typedef struct DeNoiseChannel {
    double      noise_band_avi[NB_PROFILE_BANDS];
    double      noise_band_var[NB_PROFILE_BANDS];

-    double      sfm_threshold;
-    double      sfm_alpha;
-    double      sfm_results[3];
-    int         sfm_fail_flags[SFM_FLAGS_SIZE];
-    int         sfm_fail_total;
    double      noise_reduction;
    double      last_noise_reduction;
    double      noise_floor;
@@ -155,7 +150,6 @@ typedef struct AudioFFTDeNoiseContext {
    double  window_weight;
    double  floor;
    double  sample_floor;
-    double  auto_floor;

    int     noise_band_edge[NB_PROFILE_BANDS + 2];
    int     noise_band_count;
@@ -301,42 +295,6 @@ static double process_get_band_noise(AudioFFTDeNoiseContext *s,
    return sum;
 }

-static void calculate_sfm(AudioFFTDeNoiseContext *s,
-                          DeNoiseChannel *dnch,
-                          int start, int end)
-{
-    double d1 = 0.0, d2 = 1.0;
-    int i = 0, j = 0;
-
-    for (int k = start; k < end; k++) {
-        if (dnch->noisy_data[k] > s->sample_floor) {
-            j++;
-            d1 += dnch->noisy_data[k];
-            d2 *= dnch->noisy_data[k];
-            if (d2 > 1.0E100) {
-                d2 *= 1.0E-100;
-                i++;
-            } else if (d2 < 1.0E-100) {
-                d2 *= 1.0E100;
-                i--;
-            }
-        }
-    }
-    if (j > 1) {
-        d1 /= j;
-        dnch->sfm_results[0] = d1;
-        d2 = log(d2) + 230.2585 * i;
-        d2 /= j;
-        d1 = log(d1);
-        dnch->sfm_results[1] = d1;
-        dnch->sfm_results[2] = d1 - d2;
-    } else {
-        dnch->sfm_results[0] = s->auto_floor;
-        dnch->sfm_results[1] = dnch->sfm_threshold;
-        dnch->sfm_results[2] = dnch->sfm_threshold;
-    }
-}
-
 static double limit_gain(double a, double b)
 {
    if (a > 1.0)
@@ -346,116 +304,88 @@ static double limit_gain(double a, double b)
    return 1.0;
 }

+static void spectral_flatness(AudioFFTDeNoiseContext *s, const double *const spectral,
+                              double floor, int len, double *rnum, double *rden)
+{
+    double num = 0., den = 0.;
+    int size = 0;
+
+    for (int n = 0; n < len; n++) {
+        const double v = spectral[n];
+        if (v > floor) {
+            num += log(v);
+            den += v;
+            size++;
+        }
+    }
+
+    size = FFMAX(size, 1);
+
+    num /= size;
+    den /= size;
+
+    num = exp(num);
+
+    *rnum = num;
+    *rden = den;
+}
+
+static void set_parameters(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, int update_var, int update_auto_var);
+
+static double floor_offset(const double *S, int size, double mean)
+{
+    double offset = 0.0;
+
+    for (int n = 0; n < size; n++) {
+        const double p = S[n] - mean;
+
+        offset = fmax(offset, fabs(p));
+    }
+
+    return offset / mean;
+}
+
 static void process_frame(AVFilterContext *ctx,
                          AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch,
                          AVComplexFloat *fft_data,
                          double *prior, double *prior_band_excit, int track_noise)
 {
    AVFilterLink *outlink = ctx->outputs[0];
-    const double sample_floor = s->sample_floor;
    const double *abs_var = dnch->abs_var;
    const double ratio = outlink->frame_count_out ? s->ratio : 1.0;
    const double rratio = 1. - ratio;
    const int *bin2band = s->bin2band;
+    double *noisy_data = dnch->noisy_data;
    double *band_excit = dnch->band_excit;
    double *band_amt = dnch->band_amt;
    double *gain = dnch->gain;
-    int n = 0, i1;

-    for (int i = 0; i < s->fft_length2; i++) {
-        double sqr_new_gain, new_gain, mag, mag_abs_var, new_mag_abs_var;
+    for (int i = 0; i < s->bin_count; i++) {
+        double sqr_new_gain, new_gain, power, mag, mag_abs_var, new_mag_abs_var;

-        mag = fft_data[i].re * fft_data[i].re + fft_data[i].im * fft_data[i].im;
-        if (mag > sample_floor)
-            n = i;
-
-        dnch->noisy_data[i] = mag;
-        mag_abs_var = mag / abs_var[i];
+        noisy_data[i] = mag = hypot(fft_data[i].re, fft_data[i].im);
+        power = mag * mag;
+        mag_abs_var = power / abs_var[i];
        new_mag_abs_var = ratio * prior[i] + rratio * fmax(mag_abs_var - 1.0, 0.0);
        new_gain = new_mag_abs_var / (1.0 + new_mag_abs_var);
        sqr_new_gain = new_gain * new_gain;
        prior[i] = mag_abs_var * sqr_new_gain;
-        dnch->clean_data[i] = mag * sqr_new_gain;
+        dnch->clean_data[i] = power * sqr_new_gain;
        gain[i] = new_gain;
    }

-    if (n > s->fft_length2 - 2) {
-        n = s->bin_count;
-        i1 = s->noise_band_count;
-    } else {
-        i1 = 0;
-        for (int i = 0; i <= s->noise_band_count; i++) {
-            if (n > 1.1 * s->noise_band_edge[i]) {
-                i1 = i;
-            }
-        }
-    }
+    if (track_noise) {
+        double flatness, num, den;

-    if (track_noise && (i1 > s->noise_band_count / 2)) {
-        int j = FFMIN(n, s->noise_band_edge[i1]);
-        int m = 3, k;
+        spectral_flatness(s, noisy_data, s->floor, s->bin_count, &num, &den);

-        for (k = i1 - 1; k >= 0; k--) {
-            int i = s->noise_band_edge[k];
-            calculate_sfm(s, dnch, i, j);
-            dnch->noise_band_sample[k] = dnch->sfm_results[0];
-            if (dnch->sfm_results[2] + 0.013 * m * fmax(0.0, dnch->sfm_results[1] - 20.53) >= dnch->sfm_threshold) {
-                break;
-            }
-            j = i;
-            m++;
-        }
+        flatness = num / den;
+        if (flatness > 0.8) {
+            const double offset = floor_offset(noisy_data, s->bin_count, den);
+            const double new_floor = av_clipd(10.0 * log10(den) - 100.0 + offset, -90., -20.);

-        if (k < i1 - 1) {
-            double sum = 0.0, min, max;
-            int i;
-
-            for (i = i1 - 1; i > k; i--) {
-                min = log(dnch->noise_band_sample[i] / dnch->noise_band_auto_var[i]);
-                sum += min;
-            }
-
-            i = i1 - k - 1;
-            if (i < 5) {
-                min = 3.0E-4 * i * i;
-            } else {
-                min = 3.0E-4 * (8 * i - 16);
-            }
-            if (i < 3) {
-                max = 2.0E-4 * i * i;
-            } else {
-                max = 2.0E-4 * (4 * i - 4);
-            }
-
-            if (s->track_residual) {
-                if (dnch->last_noise_floor > dnch->last_residual_floor + 9) {
-                    min *= 0.5;
-                    max *= 0.75;
-                } else if (dnch->last_noise_floor > dnch->last_residual_floor + 6) {
-                    min *= 0.4;
-                    max *= 1.0;
-                } else if (dnch->last_noise_floor > dnch->last_residual_floor + 4) {
-                    min *= 0.3;
-                    max *= 1.3;
-                } else if (dnch->last_noise_floor > dnch->last_residual_floor + 2) {
-                    min *= 0.2;
-                    max *= 1.6;
-                } else if (dnch->last_noise_floor > dnch->last_residual_floor) {
-                    min *= 0.1;
-                    max *= 2.0;
-                } else {
-                    min = 0.0;
-                    max *= 2.5;
-                }
-            }
-
-            sum = av_clipd(sum, -min, max);
-            sum = exp(sum);
-            for (int i = 0; i < NB_PROFILE_BANDS; i++)
-                dnch->noise_band_auto_var[i] *= sum;
-        } else if (dnch->sfm_results[2] >= dnch->sfm_threshold) {
-            dnch->sfm_fail_flags[s->block_count & SFM_FLAGS_MASK] = 1;
-            dnch->sfm_fail_total += 1;
+            dnch->noise_floor = 0.1 * new_floor + dnch->noise_floor * 0.9;
+            set_parameters(s, dnch, 1, 0);
        }
    }

@@ -495,7 +425,7 @@ static void process_frame(AVFilterContext *ctx,
        }
    }

-    for (int i = 0; i < s->fft_length2; i++) {
+    for (int i = 0; i < s->bin_count; i++) {
        const double new_gain = gain[i];

        fft_data[i].re *= new_gain;
@@ -556,7 +486,6 @@ static void set_band_parameters(AudioFFTDeNoiseContext *s,
        d4 = (m - i) / d2;
        dnch->rel_var[m] = exp((d5 * d3 + band_noise * d4) * C);
    }
-    dnch->rel_var[s->fft_length2] = exp(band_noise * C);

    for (i = 0; i < NB_PROFILE_BANDS; i++)
        dnch->noise_band_auto_var[i] = dnch->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C);
@@ -667,7 +596,7 @@ static int config_input(AVFilterLink *inlink)
    s->fft_length2 = 1 << (32 - ff_clz(s->window_length));
    s->fft_length = s->fft_length2;
    s->buffer_length = s->fft_length * 2;
-    s->bin_count = s->fft_length2 + 1;
+    s->bin_count = s->fft_length2 / 2 + 1;

    s->band_centre[0] = 80;
    for (i = 1; i < NB_PROFILE_BANDS; i++) {
@@ -712,7 +641,7 @@ static int config_input(AVFilterLink *inlink)
    for (i = 0; i < s->bin_count; i++)
        s->bin2band[i] = lrint(sdiv * freq2bark((0.5 * i * s->sample_rate) / s->fft_length2));

-    s->number_of_bands = s->bin2band[s->fft_length2] + 1;
+    s->number_of_bands = s->bin2band[s->bin_count - 1] + 1;

    s->band_alpha = av_calloc(s->number_of_bands, sizeof(*s->band_alpha));
    s->band_beta = av_calloc(s->number_of_bands, sizeof(*s->band_beta));
@@ -745,19 +674,6 @@ static int config_input(AVFilterLink *inlink)

        reduce_mean(dnch->band_noise);

-        dnch->sfm_threshold = 0.8;
-        dnch->sfm_alpha = 0.05;
-        for (i = 0; i < SFM_FLAGS_SIZE; i++)
-            dnch->sfm_fail_flags[i] = 0;
-
-        dnch->sfm_fail_total = 0;
-        j = FFMAX((int)(10.0 * (1.3 - dnch->sfm_threshold)), 1);
-
-        for (i = 0; i < SFM_FLAGS_SIZE; i += j) {
-            dnch->sfm_fail_flags[i] = 1;
-            dnch->sfm_fail_total += 1;
-        }
-
        dnch->amt = av_calloc(s->bin_count, sizeof(*dnch->amt));
        dnch->band_amt = av_calloc(s->number_of_bands, sizeof(*dnch->band_amt));
        dnch->band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->band_excit));
@@ -884,7 +800,6 @@ static int config_input(AVFilterLink *inlink)
    s->window_weight = 0.5 * sum;
    s->floor = (1LL << 48) * exp(-23.025558369790467) * s->window_weight;
    s->sample_floor = s->floor * exp(4.144600506562284);
-    s->auto_floor = s->floor * exp(6.907667510937141);

    for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
        DeNoiseChannel *dnch = &s->dnch[ch];
@@ -965,7 +880,6 @@ static void sample_noise_block(AudioFFTDeNoiseContext *s,

        mag2 = fmax(mag2, s->sample_floor);

-        dnch->noisy_data[i] = mag2;
        var += mag2;
        n++;
    }
@@ -1034,7 +948,7 @@ static void set_noise_profile(AudioFFTDeNoiseContext *s,
    for (int m = 0; m < NB_PROFILE_BANDS; m++)
        temp[m] -= d1;

-    new_noise_floor = round(d1) + 2.5;
+    new_noise_floor = d1;

    if (new_profile) {
        av_log(s, AV_LOG_INFO, "bn=");
@@ -1066,16 +980,6 @@ static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_job
        double *dst = dnch->out_samples;
        float *fft_in = dnch->fft_in;

-        if (s->track_noise) {
-            int i = s->block_count & SFM_FLAGS_MASK;
-
-            if (dnch->sfm_fail_flags[i])
-                dnch->sfm_fail_total--;
-            dnch->sfm_fail_flags[i] = 0;
-            dnch->sfm_threshold *= 1.0 - dnch->sfm_alpha;
-            dnch->sfm_threshold += dnch->sfm_alpha * ((1.0 / SFM_FLAGS_SIZE) * dnch->sfm_fail_total);
-        }
-
        for (int m = 0; m < window_length; m++)
            fft_in[m] = window[m] * src[m] * (1LL << 23);

@@ -1098,25 +1002,6 @@ static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_job
    return 0;
 }

-static void get_auto_noise_levels(AudioFFTDeNoiseContext *s,
-                                  DeNoiseChannel *dnch,
-                                  double *levels)
-{
-    if (s->noise_band_count > 0) {
-        for (int i = 0; i < s->noise_band_count; i++) {
-            levels[i] = 10.0 * log10(dnch->noise_band_auto_var[i] / s->floor) - 100.0;
-        }
-        if (s->noise_band_count < NB_PROFILE_BANDS) {
-            for (int i = s->noise_band_count; i < NB_PROFILE_BANDS; i++)
-                levels[i] = levels[i - 1];
-        }
-    } else {
-        for (int i = 0; i < NB_PROFILE_BANDS; i++) {
-            levels[i] = -100.0;
-        }
-    }
-}
-
 static int output_frame(AVFilterLink *inlink, AVFrame *in)
 {
    AVFilterContext *ctx = inlink->dst;
@@ -1139,10 +1024,7 @@ static int output_frame(AVFilterLink *inlink, AVFrame *in)

        for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
            DeNoiseChannel *dnch = &s->dnch[ch];
-            double levels[NB_PROFILE_BANDS];

-            get_auto_noise_levels(s, dnch, levels);
-            set_noise_profile(s, dnch, levels, 0);
            average += dnch->noise_floor;
            max = fmax(max, dnch->noise_floor);
            min = fmin(min, dnch->noise_floor);