From 3879555cd540f7df02ef527fcbc0fda4c68fbfa9 Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Sat, 31 Dec 2022 23:31:31 +0100 Subject: [PATCH] avfilter/afir_template: make IR transitions artifacts free --- libavfilter/af_afir.c | 84 +++++++++++++++++----- libavfilter/af_afir.h | 9 ++- libavfilter/afir_template.c | 135 ++++++++++++++++++++++++------------ 3 files changed, 160 insertions(+), 68 deletions(-) diff --git a/libavfilter/af_afir.c b/libavfilter/af_afir.c index 2d09b2a0e1..11fa5074d0 100644 --- a/libavfilter/af_afir.c +++ b/libavfilter/af_afir.c @@ -155,7 +155,7 @@ static int fir_frame(AudioFIRContext *s, AVFrame *in, AVFilterLink *outlink) return ff_filter_frame(outlink, out); } -static int init_segment(AVFilterContext *ctx, AudioFIRSegment *seg, +static int init_segment(AVFilterContext *ctx, AudioFIRSegment *seg, int selir, int offset, int nb_partitions, int part_size, int index) { AudioFIRContext *s = ctx->priv; @@ -221,12 +221,10 @@ static int init_segment(AVFilterContext *ctx, AudioFIRSegment *seg, seg->tempin = ff_get_audio_buffer(ctx->inputs[0], seg->block_size); seg->tempout = ff_get_audio_buffer(ctx->inputs[0], seg->block_size); seg->buffer = ff_get_audio_buffer(ctx->inputs[0], seg->part_size); - seg->coeff = ff_get_audio_buffer(ctx->inputs[0], seg->nb_partitions * seg->coeff_size * 2); seg->input = ff_get_audio_buffer(ctx->inputs[0], seg->input_size); - seg->output = ff_get_audio_buffer(ctx->inputs[0], seg->part_size); - seg->loaded = ff_get_audio_buffer(ctx->inputs[0], seg->nb_partitions); + seg->output = ff_get_audio_buffer(ctx->inputs[0], seg->part_size * 5); if (!seg->buffer || !seg->sumin || !seg->sumout || !seg->blockout || - !seg->coeff || !seg->input || !seg->output || !seg->loaded || !seg->tempin || !seg->tempout) + !seg->input || !seg->output || !seg->tempin || !seg->tempout) return AVERROR(ENOMEM); return 0; @@ -264,18 +262,18 @@ static void uninit_segment(AVFilterContext *ctx, AudioFIRSegment *seg) av_frame_free(&seg->sumin); av_frame_free(&seg->sumout); av_frame_free(&seg->buffer); - av_frame_free(&seg->coeff); av_frame_free(&seg->input); av_frame_free(&seg->output); - av_frame_free(&seg->loaded); seg->input_size = 0; + + for (int i = 0; i < MAX_IR_STREAMS; i++) + av_frame_free(&seg->coeff[i]); } static int convert_coeffs(AVFilterContext *ctx, int selir) { AudioFIRContext *s = ctx->priv; - const int prev_selir = s->prev_selir; - int ret, nb_taps, cur_nb_taps, prev_nb_taps; + int ret, nb_taps, cur_nb_taps; if (!s->nb_taps[selir]) { int part_size, max_part_size; @@ -302,7 +300,7 @@ static int convert_coeffs(AVFilterContext *ctx, int selir) int nb_partitions = FFMIN(step, (left + part_size - 1) / part_size); s->nb_segments = i + 1; - ret = init_segment(ctx, &s->seg[i], offset, nb_partitions, part_size, i); + ret = init_segment(ctx, &s->seg[i], selir, offset, nb_partitions, part_size, i); if (ret < 0) return ret; offset += nb_partitions * part_size; @@ -333,19 +331,68 @@ skip: } cur_nb_taps = s->ir[selir]->nb_samples; - prev_nb_taps = s->ir[prev_selir]->nb_samples; - nb_taps = FFMAX(cur_nb_taps, prev_nb_taps); + nb_taps = cur_nb_taps; - if (!s->norm_ir || s->norm_ir->nb_samples < nb_taps) { - av_frame_free(&s->norm_ir); - s->norm_ir = ff_get_audio_buffer(ctx->inputs[0], FFALIGN(nb_taps, 8)); - if (!s->norm_ir) + if (!s->norm_ir[selir] || s->norm_ir[selir]->nb_samples < nb_taps) { + av_frame_free(&s->norm_ir[selir]); + s->norm_ir[selir] = ff_get_audio_buffer(ctx->inputs[0], FFALIGN(nb_taps, 8)); + if (!s->norm_ir[selir]) return AVERROR(ENOMEM); } av_log(ctx, AV_LOG_DEBUG, "nb_taps: %d\n", cur_nb_taps); av_log(ctx, AV_LOG_DEBUG, "nb_segments: %d\n", s->nb_segments); + switch (s->format) { + case AV_SAMPLE_FMT_FLTP: + for (int ch = 0; ch < s->nb_channels; ch++) { + const float *tsrc = (const float *)s->ir[selir]->extended_data[!s->one2many * ch]; + float *time = (float *)s->norm_ir[selir]->extended_data[ch]; + + memcpy(time, tsrc, sizeof(*time) * nb_taps); + for (int i = FFMAX(1, s->length * nb_taps); i < nb_taps; i++) + time[i] = 0; + + get_power_float(ctx, s, nb_taps, ch, time); + + for (int n = 0; n < s->nb_segments; n++) { + AudioFIRSegment *seg = &s->seg[n]; + + if (!seg->coeff[selir]) + seg->coeff[selir] = ff_get_audio_buffer(ctx->inputs[0], seg->nb_partitions * seg->coeff_size * 2); + if (!seg->coeff[selir]) + return AVERROR(ENOMEM); + + for (int i = 0; i < seg->nb_partitions; i++) + convert_channel_float(ctx, s, ch, seg, i, selir); + } + } + break; + case AV_SAMPLE_FMT_DBLP: + for (int ch = 0; ch < s->nb_channels; ch++) { + const double *tsrc = (const double *)s->ir[selir]->extended_data[!s->one2many * ch]; + double *time = (double *)s->norm_ir[selir]->extended_data[ch]; + + memcpy(time, tsrc, sizeof(*time) * nb_taps); + for (int i = FFMAX(1, s->length * nb_taps); i < nb_taps; i++) + time[i] = 0; + + get_power_double(ctx, s, nb_taps, ch, time); + for (int n = 0; n < s->nb_segments; n++) { + AudioFIRSegment *seg = &s->seg[n]; + + if (!seg->coeff[selir]) + seg->coeff[selir] = ff_get_audio_buffer(ctx->inputs[0], seg->nb_partitions * seg->coeff_size * 2); + if (!seg->coeff[selir]) + return AVERROR(ENOMEM); + + for (int i = 0; i < seg->nb_partitions; i++) + convert_channel_double(ctx, s, ch, seg, i, selir); + } + } + break; + } + s->have_coeffs[selir] = 1; return 0; @@ -536,9 +583,10 @@ static av_cold void uninit(AVFilterContext *ctx) av_freep(&s->fdsp); - av_frame_free(&s->norm_ir); - for (int i = 0; i < s->nb_irs; i++) + for (int i = 0; i < s->nb_irs; i++) { av_frame_free(&s->ir[i]); + av_frame_free(&s->norm_ir[i]); + } av_frame_free(&s->video); } diff --git a/libavfilter/af_afir.h b/libavfilter/af_afir.h index 21b0cf729e..a9f6d217f4 100644 --- a/libavfilter/af_afir.h +++ b/libavfilter/af_afir.h @@ -28,6 +28,8 @@ #include "avfilter.h" #include "af_afirdsp.h" +#define MAX_IR_STREAMS 32 + typedef struct AudioFIRSegment { int nb_partitions; int part_size; @@ -47,17 +49,14 @@ typedef struct AudioFIRSegment { AVFrame *tempin; AVFrame *tempout; AVFrame *buffer; - AVFrame *coeff; + AVFrame *coeff[MAX_IR_STREAMS]; AVFrame *input; AVFrame *output; - AVFrame *loaded; AVTXContext **ctx, **tx, **itx; av_tx_fn ctx_fn, tx_fn, itx_fn; } AudioFIRSegment; -#define MAX_IR_STREAMS 32 - typedef struct AudioFIRContext { const AVClass *class; @@ -91,7 +90,7 @@ typedef struct AudioFIRContext { AVFrame *in; AVFrame *ir[MAX_IR_STREAMS]; - AVFrame *norm_ir; + AVFrame *norm_ir[MAX_IR_STREAMS]; AVFrame *video; int min_part_size; int64_t pts; diff --git a/libavfilter/afir_template.c b/libavfilter/afir_template.c index d42ff882d3..3f3778c675 100644 --- a/libavfilter/afir_template.c +++ b/libavfilter/afir_template.c @@ -141,7 +141,8 @@ end: } static int fn(get_power)(AVFilterContext *ctx, AudioFIRContext *s, - int cur_nb_taps, int ch) + int cur_nb_taps, int ch, + ftype *time) { ftype ch_gain = 1; @@ -151,7 +152,6 @@ static int fn(get_power)(AVFilterContext *ctx, AudioFIRContext *s, break; case 0: { - ftype *time = (ftype *)s->norm_ir->extended_data[ch]; ftype sum = 0; for (int i = 0; i < cur_nb_taps; i++) @@ -161,7 +161,6 @@ static int fn(get_power)(AVFilterContext *ctx, AudioFIRContext *s, break; case 1: { - ftype *time = (ftype *)s->norm_ir->extended_data[ch]; ftype sum = 0; for (int i = 0; i < cur_nb_taps; i++) @@ -171,7 +170,6 @@ static int fn(get_power)(AVFilterContext *ctx, AudioFIRContext *s, break; case 2: { - ftype *time = (ftype *)s->norm_ir->extended_data[ch]; ftype sum = 0; for (int i = 0; i < cur_nb_taps; i++) @@ -182,7 +180,7 @@ static int fn(get_power)(AVFilterContext *ctx, AudioFIRContext *s, case 3: case 4: { - ftype *inc, *outc, scale; + ftype *inc, *outc, scale, power; AVTXContext *tx; av_tx_fn tx_fn; int ret, size; @@ -205,7 +203,6 @@ static int fn(get_power)(AVFilterContext *ctx, AudioFIRContext *s, } { - ftype power, *time = (ftype *)s->norm_ir->extended_data[ch]; memcpy(inc, time, cur_nb_taps * sizeof(SAMPLE_FORMAT)); tx_fn(tx, outc, inc, sizeof(SAMPLE_FORMAT)); @@ -233,7 +230,6 @@ static int fn(get_power)(AVFilterContext *ctx, AudioFIRContext *s, } if (ch_gain != 1. || s->ir_gain != 1.) { - ftype *time = (ftype *)s->norm_ir->extended_data[ch]; ftype gain = ch_gain * s->ir_gain; av_log(ctx, AV_LOG_DEBUG, "ch%d gain %f\n", ch, gain); @@ -248,45 +244,24 @@ static int fn(get_power)(AVFilterContext *ctx, AudioFIRContext *s, } static void fn(convert_channel)(AVFilterContext *ctx, AudioFIRContext *s, int ch, - AudioFIRSegment *seg) + AudioFIRSegment *seg, int coeff_partition, int selir) { - const int coeff_partition = seg->loading[ch]; const int coffset = coeff_partition * seg->coeff_size; - const int selir = s->selir; const int nb_taps = s->nb_taps[selir]; - ftype *tsrc = (ftype *)s->ir[selir]->extended_data[!s->one2many * ch]; - ftype *time = (ftype *)s->norm_ir->extended_data[ch]; + ftype *time = (ftype *)s->norm_ir[selir]->extended_data[ch]; ftype *tempin = (ftype *)seg->tempin->extended_data[ch]; ftype *tempout = (ftype *)seg->tempout->extended_data[ch]; - ctype *coeff = (ctype *)seg->coeff->extended_data[ch]; - int *loaded = (int *)seg->loaded->extended_data[ch]; + ctype *coeff = (ctype *)seg->coeff[selir]->extended_data[ch]; const int remaining = nb_taps - (seg->input_offset + coeff_partition * seg->part_size); const int size = remaining >= seg->part_size ? seg->part_size : remaining; - if (loaded[coeff_partition] == selir + 1) - return; - loaded[coeff_partition] = selir + 1; - - memcpy(time, tsrc, sizeof(*time) * nb_taps); - for (int i = FFMAX(1, s->length * nb_taps); i < nb_taps; i++) - time[i] = 0; - -#if DEPTH == 32 - get_power_float(ctx, s, nb_taps, ch); -#else - get_power_double(ctx, s, nb_taps, ch); -#endif - - av_log(ctx, AV_LOG_DEBUG, "channel: %d\n", ch); - memset(tempin + size, 0, sizeof(*tempin) * (seg->block_size - size)); memcpy(tempin, time + seg->input_offset + coeff_partition * seg->part_size, size * sizeof(*tempin)); - seg->ctx_fn(seg->ctx[ch], tempout, tempin, sizeof(*tempin)); - memcpy(coeff + coffset, tempout, seg->coeff_size * sizeof(*coeff)); + av_log(ctx, AV_LOG_DEBUG, "channel: %d\n", ch); av_log(ctx, AV_LOG_DEBUG, "nb_partitions: %d\n", seg->nb_partitions); av_log(ctx, AV_LOG_DEBUG, "partition size: %d\n", seg->part_size); av_log(ctx, AV_LOG_DEBUG, "block size: %d\n", seg->block_size); @@ -314,11 +289,12 @@ static int fn(fir_quantum)(AVFilterContext *ctx, AVFrame *out, int ch, int offse { AudioFIRContext *s = ctx->priv; const ftype *in = (const ftype *)s->in->extended_data[ch] + offset; - ftype *blockout, *buf, *ptr = (ftype *)out->extended_data[ch] + offset; + ftype *blockout, *ptr = (ftype *)out->extended_data[ch] + offset; const int min_part_size = s->min_part_size; const int nb_samples = FFMIN(min_part_size, out->nb_samples - offset); const int nb_segments = s->nb_segments; const float dry_gain = s->dry_gain; + const int selir = s->selir; for (int segment = 0; segment < nb_segments; segment++) { AudioFIRSegment *seg = &s->seg[segment]; @@ -327,6 +303,7 @@ static int fn(fir_quantum)(AVFilterContext *ctx, AVFrame *out, int ch, int offse ftype *sumin = (ftype *)seg->sumin->extended_data[ch]; ftype *sumout = (ftype *)seg->sumout->extended_data[ch]; ftype *tempin = (ftype *)seg->tempin->extended_data[ch]; + ftype *buf = (ftype *)seg->buffer->extended_data[ch]; int *output_offset = &seg->output_offset[ch]; const int nb_partitions = seg->nb_partitions; const int input_offset = seg->input_offset; @@ -359,28 +336,71 @@ static int fn(fir_quantum)(AVFilterContext *ctx, AVFrame *out, int ch, int offse } memset(sumin, 0, sizeof(*sumin) * seg->fft_length); + + if (seg->loading[ch] < nb_partitions) { + j = seg->part_index[ch] <= 0 ? nb_partitions - 1 : seg->part_index[ch] - 1; + for (int i = 0; i < nb_partitions; i++) { + const int input_partition = j; + const int coeff_partition = i; + const int coffset = coeff_partition * seg->coeff_size; + const ftype *blockout = (const ftype *)seg->blockout->extended_data[ch] + input_partition * seg->block_size; + const ctype *coeff = ((const ctype *)seg->coeff[selir]->extended_data[ch]) + coffset; + + if (j == 0) + j = nb_partitions; + j--; + +#if DEPTH == 32 + s->afirdsp.fcmul_add(sumin, blockout, (const ftype *)coeff, part_size); +#else + s->afirdsp.dcmul_add(sumin, blockout, (const ftype *)coeff, part_size); +#endif + } + + seg->itx_fn(seg->itx[ch], sumout, sumin, sizeof(ctype)); + memcpy(dst + part_size, sumout + part_size, part_size * sizeof(*buf)); + memset(sumin, 0, sizeof(*sumin) * seg->fft_length); + } + blockout = (ftype *)seg->blockout->extended_data[ch] + seg->part_index[ch] * seg->block_size; memset(tempin + part_size, 0, sizeof(*tempin) * (seg->block_size - part_size)); memcpy(tempin, src, sizeof(*src) * part_size); - seg->tx_fn(seg->tx[ch], blockout, tempin, sizeof(ftype)); - j = seg->part_index[ch]; if (seg->loading[ch] < nb_partitions) { + const int selir = s->prev_selir; + + j = seg->part_index[ch]; + for (int i = 0; i < nb_partitions; i++) { + const int input_partition = j; + const int coeff_partition = i; + const int coffset = coeff_partition * seg->coeff_size; + const ftype *blockout = (const ftype *)seg->blockout->extended_data[ch] + input_partition * seg->block_size; + const ctype *coeff = ((const ctype *)seg->coeff[selir]->extended_data[ch]) + coffset; + + if (j == 0) + j = nb_partitions; + j--; + #if DEPTH == 32 - convert_channel_float(ctx, s, ch, seg); + s->afirdsp.fcmul_add(sumin, blockout, (const ftype *)coeff, part_size); #else - convert_channel_double(ctx, s, ch, seg); + s->afirdsp.dcmul_add(sumin, blockout, (const ftype *)coeff, part_size); #endif - seg->loading[ch]++; + } + + seg->itx_fn(seg->itx[ch], sumout, sumin, sizeof(ctype)); + memcpy(dst + 2 * part_size, sumout, 2 * part_size * sizeof(*dst)); + memset(sumin, 0, sizeof(*sumin) * seg->fft_length); } + j = seg->part_index[ch]; for (int i = 0; i < nb_partitions; i++) { const int input_partition = j; const int coeff_partition = i; const int coffset = coeff_partition * seg->coeff_size; const ftype *blockout = (const ftype *)seg->blockout->extended_data[ch] + input_partition * seg->block_size; - const ctype *coeff = ((const ctype *)seg->coeff->extended_data[ch]) + coffset; + const ctype *coeff = ((const ctype *)seg->coeff[selir]->extended_data[ch]) + coffset; if (j == 0) j = nb_partitions; @@ -395,18 +415,43 @@ static int fn(fir_quantum)(AVFilterContext *ctx, AVFrame *out, int ch, int offse seg->itx_fn(seg->itx[ch], sumout, sumin, sizeof(ctype)); - buf = (ftype *)seg->buffer->extended_data[ch]; - fn(fir_fadd)(s, buf, sumout, part_size); + if (seg->loading[ch] < nb_partitions) { + ftype *ptr1 = dst + part_size; + ftype *ptr2 = dst + part_size * 2; + ftype *ptr3 = dst + part_size * 3; + ftype *ptr4 = dst + part_size * 4; + if (seg->loading[ch] == 0) + memcpy(ptr4, buf, sizeof(*ptr4) * part_size); + for (int n = 0; n < part_size; n++) + ptr2[n] += ptr4[n]; - memcpy(dst, buf, part_size * sizeof(*dst)); - memcpy(buf, sumout + part_size, part_size * sizeof(*buf)); + if (seg->loading[ch] < nb_partitions - 1) + memcpy(ptr4, ptr3, part_size * sizeof(*dst)); + for (int n = 0; n < part_size; n++) + ptr1[n] += sumout[n]; - seg->part_index[ch] = (seg->part_index[ch] + 1) % nb_partitions; + if (seg->loading[ch] == nb_partitions - 1) + memcpy(buf, sumout + part_size, part_size * sizeof(*buf)); + + for (int i = 0; i < part_size; i++) { + const ftype factor = (part_size * seg->loading[ch] + i) / (ftype)(part_size * nb_partitions); + const ftype ifactor = 1 - factor; + dst[i] = ptr1[i] * factor + ptr2[i] * ifactor; + } + } else { + fn(fir_fadd)(s, buf, sumout, part_size); + memcpy(dst, buf, part_size * sizeof(*dst)); + memcpy(buf, sumout + part_size, part_size * sizeof(*buf)); + } + + fn(fir_fadd)(s, ptr, dst, nb_samples); if (part_size != min_part_size) memmove(src, src + min_part_size, (seg->input_size - min_part_size) * sizeof(*src)); - fn(fir_fadd)(s, ptr, dst, nb_samples); + seg->part_index[ch] = (seg->part_index[ch] + 1) % nb_partitions; + if (seg->loading[ch] < nb_partitions) + seg->loading[ch]++; } if (s->wet_gain == 1.f)