You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	avfilter/af_afftdn: rewrite noise floor tracking
This commit is contained in:
		| @@ -92,11 +92,6 @@ typedef struct DeNoiseChannel { | ||||
|     double      noise_band_avi[NB_PROFILE_BANDS]; | ||||
|     double      noise_band_var[NB_PROFILE_BANDS]; | ||||
|  | ||||
|     double      sfm_threshold; | ||||
|     double      sfm_alpha; | ||||
|     double      sfm_results[3]; | ||||
|     int         sfm_fail_flags[SFM_FLAGS_SIZE]; | ||||
|     int         sfm_fail_total; | ||||
|     double      noise_reduction; | ||||
|     double      last_noise_reduction; | ||||
|     double      noise_floor; | ||||
| @@ -155,7 +150,6 @@ typedef struct AudioFFTDeNoiseContext { | ||||
|     double  window_weight; | ||||
|     double  floor; | ||||
|     double  sample_floor; | ||||
|     double  auto_floor; | ||||
|  | ||||
|     int     noise_band_edge[NB_PROFILE_BANDS + 2]; | ||||
|     int     noise_band_count; | ||||
| @@ -301,42 +295,6 @@ static double process_get_band_noise(AudioFFTDeNoiseContext *s, | ||||
|     return sum; | ||||
| } | ||||
|  | ||||
| static void calculate_sfm(AudioFFTDeNoiseContext *s, | ||||
|                           DeNoiseChannel *dnch, | ||||
|                           int start, int end) | ||||
| { | ||||
|     double d1 = 0.0, d2 = 1.0; | ||||
|     int i = 0, j = 0; | ||||
|  | ||||
|     for (int k = start; k < end; k++) { | ||||
|         if (dnch->noisy_data[k] > s->sample_floor) { | ||||
|             j++; | ||||
|             d1 += dnch->noisy_data[k]; | ||||
|             d2 *= dnch->noisy_data[k]; | ||||
|             if (d2 > 1.0E100) { | ||||
|                 d2 *= 1.0E-100; | ||||
|                 i++; | ||||
|             } else if (d2 < 1.0E-100) { | ||||
|                 d2 *= 1.0E100; | ||||
|                 i--; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     if (j > 1) { | ||||
|         d1 /= j; | ||||
|         dnch->sfm_results[0] = d1; | ||||
|         d2 = log(d2) + 230.2585 * i; | ||||
|         d2 /= j; | ||||
|         d1 = log(d1); | ||||
|         dnch->sfm_results[1] = d1; | ||||
|         dnch->sfm_results[2] = d1 - d2; | ||||
|     } else { | ||||
|         dnch->sfm_results[0] = s->auto_floor; | ||||
|         dnch->sfm_results[1] = dnch->sfm_threshold; | ||||
|         dnch->sfm_results[2] = dnch->sfm_threshold; | ||||
|     } | ||||
| } | ||||
|  | ||||
| static double limit_gain(double a, double b) | ||||
| { | ||||
|     if (a > 1.0) | ||||
| @@ -346,116 +304,88 @@ static double limit_gain(double a, double b) | ||||
|     return 1.0; | ||||
| } | ||||
|  | ||||
| static void spectral_flatness(AudioFFTDeNoiseContext *s, const double *const spectral, | ||||
|                               double floor, int len, double *rnum, double *rden) | ||||
| { | ||||
|     double num = 0., den = 0.; | ||||
|     int size = 0; | ||||
|  | ||||
|     for (int n = 0; n < len; n++) { | ||||
|         const double v = spectral[n]; | ||||
|         if (v > floor) { | ||||
|             num += log(v); | ||||
|             den += v; | ||||
|             size++; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     size = FFMAX(size, 1); | ||||
|  | ||||
|     num /= size; | ||||
|     den /= size; | ||||
|  | ||||
|     num = exp(num); | ||||
|  | ||||
|     *rnum = num; | ||||
|     *rden = den; | ||||
| } | ||||
|  | ||||
| static void set_parameters(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, int update_var, int update_auto_var); | ||||
|  | ||||
| static double floor_offset(const double *S, int size, double mean) | ||||
| { | ||||
|     double offset = 0.0; | ||||
|  | ||||
|     for (int n = 0; n < size; n++) { | ||||
|         const double p = S[n] - mean; | ||||
|  | ||||
|         offset = fmax(offset, fabs(p)); | ||||
|     } | ||||
|  | ||||
|     return offset / mean; | ||||
| } | ||||
|  | ||||
| static void process_frame(AVFilterContext *ctx, | ||||
|                           AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, | ||||
|                           AVComplexFloat *fft_data, | ||||
|                           double *prior, double *prior_band_excit, int track_noise) | ||||
| { | ||||
|     AVFilterLink *outlink = ctx->outputs[0]; | ||||
|     const double sample_floor = s->sample_floor; | ||||
|     const double *abs_var = dnch->abs_var; | ||||
|     const double ratio = outlink->frame_count_out ? s->ratio : 1.0; | ||||
|     const double rratio = 1. - ratio; | ||||
|     const int *bin2band = s->bin2band; | ||||
|     double *noisy_data = dnch->noisy_data; | ||||
|     double *band_excit = dnch->band_excit; | ||||
|     double *band_amt = dnch->band_amt; | ||||
|     double *gain = dnch->gain; | ||||
|     int n = 0, i1; | ||||
|  | ||||
|     for (int i = 0; i < s->fft_length2; i++) { | ||||
|         double sqr_new_gain, new_gain, mag, mag_abs_var, new_mag_abs_var; | ||||
|     for (int i = 0; i < s->bin_count; i++) { | ||||
|         double sqr_new_gain, new_gain, power, mag, mag_abs_var, new_mag_abs_var; | ||||
|  | ||||
|         mag = fft_data[i].re * fft_data[i].re + fft_data[i].im * fft_data[i].im; | ||||
|         if (mag > sample_floor) | ||||
|             n = i; | ||||
|  | ||||
|         dnch->noisy_data[i] = mag; | ||||
|         mag_abs_var = mag / abs_var[i]; | ||||
|         noisy_data[i] = mag = hypot(fft_data[i].re, fft_data[i].im); | ||||
|         power = mag * mag; | ||||
|         mag_abs_var = power / abs_var[i]; | ||||
|         new_mag_abs_var = ratio * prior[i] + rratio * fmax(mag_abs_var - 1.0, 0.0); | ||||
|         new_gain = new_mag_abs_var / (1.0 + new_mag_abs_var); | ||||
|         sqr_new_gain = new_gain * new_gain; | ||||
|         prior[i] = mag_abs_var * sqr_new_gain; | ||||
|         dnch->clean_data[i] = mag * sqr_new_gain; | ||||
|         dnch->clean_data[i] = power * sqr_new_gain; | ||||
|         gain[i] = new_gain; | ||||
|     } | ||||
|  | ||||
|     if (n > s->fft_length2 - 2) { | ||||
|         n = s->bin_count; | ||||
|         i1 = s->noise_band_count; | ||||
|     } else { | ||||
|         i1 = 0; | ||||
|         for (int i = 0; i <= s->noise_band_count; i++) { | ||||
|             if (n > 1.1 * s->noise_band_edge[i]) { | ||||
|                 i1 = i; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     if (track_noise) { | ||||
|         double flatness, num, den; | ||||
|  | ||||
|     if (track_noise && (i1 > s->noise_band_count / 2)) { | ||||
|         int j = FFMIN(n, s->noise_band_edge[i1]); | ||||
|         int m = 3, k; | ||||
|         spectral_flatness(s, noisy_data, s->floor, s->bin_count, &num, &den); | ||||
|  | ||||
|         for (k = i1 - 1; k >= 0; k--) { | ||||
|             int i = s->noise_band_edge[k]; | ||||
|             calculate_sfm(s, dnch, i, j); | ||||
|             dnch->noise_band_sample[k] = dnch->sfm_results[0]; | ||||
|             if (dnch->sfm_results[2] + 0.013 * m * fmax(0.0, dnch->sfm_results[1] - 20.53) >= dnch->sfm_threshold) { | ||||
|                 break; | ||||
|             } | ||||
|             j = i; | ||||
|             m++; | ||||
|         } | ||||
|         flatness = num / den; | ||||
|         if (flatness > 0.8) { | ||||
|             const double offset = floor_offset(noisy_data, s->bin_count, den); | ||||
|             const double new_floor = av_clipd(10.0 * log10(den) - 100.0 + offset, -90., -20.); | ||||
|  | ||||
|         if (k < i1 - 1) { | ||||
|             double sum = 0.0, min, max; | ||||
|             int i; | ||||
|  | ||||
|             for (i = i1 - 1; i > k; i--) { | ||||
|                 min = log(dnch->noise_band_sample[i] / dnch->noise_band_auto_var[i]); | ||||
|                 sum += min; | ||||
|             } | ||||
|  | ||||
|             i = i1 - k - 1; | ||||
|             if (i < 5) { | ||||
|                 min = 3.0E-4 * i * i; | ||||
|             } else { | ||||
|                 min = 3.0E-4 * (8 * i - 16); | ||||
|             } | ||||
|             if (i < 3) { | ||||
|                 max = 2.0E-4 * i * i; | ||||
|             } else { | ||||
|                 max = 2.0E-4 * (4 * i - 4); | ||||
|             } | ||||
|  | ||||
|             if (s->track_residual) { | ||||
|                 if (dnch->last_noise_floor > dnch->last_residual_floor + 9) { | ||||
|                     min *= 0.5; | ||||
|                     max *= 0.75; | ||||
|                 } else if (dnch->last_noise_floor > dnch->last_residual_floor + 6) { | ||||
|                     min *= 0.4; | ||||
|                     max *= 1.0; | ||||
|                 } else if (dnch->last_noise_floor > dnch->last_residual_floor + 4) { | ||||
|                     min *= 0.3; | ||||
|                     max *= 1.3; | ||||
|                 } else if (dnch->last_noise_floor > dnch->last_residual_floor + 2) { | ||||
|                     min *= 0.2; | ||||
|                     max *= 1.6; | ||||
|                 } else if (dnch->last_noise_floor > dnch->last_residual_floor) { | ||||
|                     min *= 0.1; | ||||
|                     max *= 2.0; | ||||
|                 } else { | ||||
|                     min = 0.0; | ||||
|                     max *= 2.5; | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             sum = av_clipd(sum, -min, max); | ||||
|             sum = exp(sum); | ||||
|             for (int i = 0; i < NB_PROFILE_BANDS; i++) | ||||
|                 dnch->noise_band_auto_var[i] *= sum; | ||||
|         } else if (dnch->sfm_results[2] >= dnch->sfm_threshold) { | ||||
|             dnch->sfm_fail_flags[s->block_count & SFM_FLAGS_MASK] = 1; | ||||
|             dnch->sfm_fail_total += 1; | ||||
|             dnch->noise_floor = 0.1 * new_floor + dnch->noise_floor * 0.9; | ||||
|             set_parameters(s, dnch, 1, 0); | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -495,7 +425,7 @@ static void process_frame(AVFilterContext *ctx, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     for (int i = 0; i < s->fft_length2; i++) { | ||||
|     for (int i = 0; i < s->bin_count; i++) { | ||||
|         const double new_gain = gain[i]; | ||||
|  | ||||
|         fft_data[i].re *= new_gain; | ||||
| @@ -556,7 +486,6 @@ static void set_band_parameters(AudioFFTDeNoiseContext *s, | ||||
|         d4 = (m - i) / d2; | ||||
|         dnch->rel_var[m] = exp((d5 * d3 + band_noise * d4) * C); | ||||
|     } | ||||
|     dnch->rel_var[s->fft_length2] = exp(band_noise * C); | ||||
|  | ||||
|     for (i = 0; i < NB_PROFILE_BANDS; i++) | ||||
|         dnch->noise_band_auto_var[i] = dnch->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C); | ||||
| @@ -667,7 +596,7 @@ static int config_input(AVFilterLink *inlink) | ||||
|     s->fft_length2 = 1 << (32 - ff_clz(s->window_length)); | ||||
|     s->fft_length = s->fft_length2; | ||||
|     s->buffer_length = s->fft_length * 2; | ||||
|     s->bin_count = s->fft_length2 + 1; | ||||
|     s->bin_count = s->fft_length2 / 2 + 1; | ||||
|  | ||||
|     s->band_centre[0] = 80; | ||||
|     for (i = 1; i < NB_PROFILE_BANDS; i++) { | ||||
| @@ -712,7 +641,7 @@ static int config_input(AVFilterLink *inlink) | ||||
|     for (i = 0; i < s->bin_count; i++) | ||||
|         s->bin2band[i] = lrint(sdiv * freq2bark((0.5 * i * s->sample_rate) / s->fft_length2)); | ||||
|  | ||||
|     s->number_of_bands = s->bin2band[s->fft_length2] + 1; | ||||
|     s->number_of_bands = s->bin2band[s->bin_count - 1] + 1; | ||||
|  | ||||
|     s->band_alpha = av_calloc(s->number_of_bands, sizeof(*s->band_alpha)); | ||||
|     s->band_beta = av_calloc(s->number_of_bands, sizeof(*s->band_beta)); | ||||
| @@ -745,19 +674,6 @@ static int config_input(AVFilterLink *inlink) | ||||
|  | ||||
|         reduce_mean(dnch->band_noise); | ||||
|  | ||||
|         dnch->sfm_threshold = 0.8; | ||||
|         dnch->sfm_alpha = 0.05; | ||||
|         for (i = 0; i < SFM_FLAGS_SIZE; i++) | ||||
|             dnch->sfm_fail_flags[i] = 0; | ||||
|  | ||||
|         dnch->sfm_fail_total = 0; | ||||
|         j = FFMAX((int)(10.0 * (1.3 - dnch->sfm_threshold)), 1); | ||||
|  | ||||
|         for (i = 0; i < SFM_FLAGS_SIZE; i += j) { | ||||
|             dnch->sfm_fail_flags[i] = 1; | ||||
|             dnch->sfm_fail_total += 1; | ||||
|         } | ||||
|  | ||||
|         dnch->amt = av_calloc(s->bin_count, sizeof(*dnch->amt)); | ||||
|         dnch->band_amt = av_calloc(s->number_of_bands, sizeof(*dnch->band_amt)); | ||||
|         dnch->band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->band_excit)); | ||||
| @@ -884,7 +800,6 @@ static int config_input(AVFilterLink *inlink) | ||||
|     s->window_weight = 0.5 * sum; | ||||
|     s->floor = (1LL << 48) * exp(-23.025558369790467) * s->window_weight; | ||||
|     s->sample_floor = s->floor * exp(4.144600506562284); | ||||
|     s->auto_floor = s->floor * exp(6.907667510937141); | ||||
|  | ||||
|     for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { | ||||
|         DeNoiseChannel *dnch = &s->dnch[ch]; | ||||
| @@ -965,7 +880,6 @@ static void sample_noise_block(AudioFFTDeNoiseContext *s, | ||||
|  | ||||
|         mag2 = fmax(mag2, s->sample_floor); | ||||
|  | ||||
|         dnch->noisy_data[i] = mag2; | ||||
|         var += mag2; | ||||
|         n++; | ||||
|     } | ||||
| @@ -1034,7 +948,7 @@ static void set_noise_profile(AudioFFTDeNoiseContext *s, | ||||
|     for (int m = 0; m < NB_PROFILE_BANDS; m++) | ||||
|         temp[m] -= d1; | ||||
|  | ||||
|     new_noise_floor = round(d1) + 2.5; | ||||
|     new_noise_floor = d1; | ||||
|  | ||||
|     if (new_profile) { | ||||
|         av_log(s, AV_LOG_INFO, "bn="); | ||||
| @@ -1066,16 +980,6 @@ static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_job | ||||
|         double *dst = dnch->out_samples; | ||||
|         float *fft_in = dnch->fft_in; | ||||
|  | ||||
|         if (s->track_noise) { | ||||
|             int i = s->block_count & SFM_FLAGS_MASK; | ||||
|  | ||||
|             if (dnch->sfm_fail_flags[i]) | ||||
|                 dnch->sfm_fail_total--; | ||||
|             dnch->sfm_fail_flags[i] = 0; | ||||
|             dnch->sfm_threshold *= 1.0 - dnch->sfm_alpha; | ||||
|             dnch->sfm_threshold += dnch->sfm_alpha * ((1.0 / SFM_FLAGS_SIZE) * dnch->sfm_fail_total); | ||||
|         } | ||||
|  | ||||
|         for (int m = 0; m < window_length; m++) | ||||
|             fft_in[m] = window[m] * src[m] * (1LL << 23); | ||||
|  | ||||
| @@ -1098,25 +1002,6 @@ static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_job | ||||
|     return 0; | ||||
| } | ||||
|  | ||||
| static void get_auto_noise_levels(AudioFFTDeNoiseContext *s, | ||||
|                                   DeNoiseChannel *dnch, | ||||
|                                   double *levels) | ||||
| { | ||||
|     if (s->noise_band_count > 0) { | ||||
|         for (int i = 0; i < s->noise_band_count; i++) { | ||||
|             levels[i] = 10.0 * log10(dnch->noise_band_auto_var[i] / s->floor) - 100.0; | ||||
|         } | ||||
|         if (s->noise_band_count < NB_PROFILE_BANDS) { | ||||
|             for (int i = s->noise_band_count; i < NB_PROFILE_BANDS; i++) | ||||
|                 levels[i] = levels[i - 1]; | ||||
|         } | ||||
|     } else { | ||||
|         for (int i = 0; i < NB_PROFILE_BANDS; i++) { | ||||
|             levels[i] = -100.0; | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| static int output_frame(AVFilterLink *inlink, AVFrame *in) | ||||
| { | ||||
|     AVFilterContext *ctx = inlink->dst; | ||||
| @@ -1139,10 +1024,7 @@ static int output_frame(AVFilterLink *inlink, AVFrame *in) | ||||
|  | ||||
|         for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { | ||||
|             DeNoiseChannel *dnch = &s->dnch[ch]; | ||||
|             double levels[NB_PROFILE_BANDS]; | ||||
|  | ||||
|             get_auto_noise_levels(s, dnch, levels); | ||||
|             set_noise_profile(s, dnch, levels, 0); | ||||
|             average += dnch->noise_floor; | ||||
|             max = fmax(max, dnch->noise_floor); | ||||
|             min = fmin(min, dnch->noise_floor); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user