avfilter/af_afir: add irnorm and irlink options

Deprecate gtype option.
2025-01-29 22:00:58 +02:00 · 2023-11-18 00:36:18 +01:00 · 2023-11-18 00:36:18 +01:00 · 5452cbdc15
commit 5452cbdc15
parent 07c303b708
4 changed files with 90 additions and 118 deletions
--- a/doc/filters.texi
+++ b/doc/filters.texi
@ -1794,33 +1794,24 @@ Set wet gain. This sets final output gain.
 Set Impulse Response filter length. Default is 1, which means whole IR is processed.

@item gtype
-Enable applying gain measured from power of IR.
+This option is deprecated, and does nothing.

-Set which approach to use for auto gain measurement.
+@item irnorm
+Set norm to be applied to IR coefficients before filtering.
+Allowed range is from @var{-1} to @var{2}.
+IR coefficients are normalized with calculated vector norm set by this option.
+For negative values, no norm is calculated, and IR coefficients are not modified at all.
+Default is @var{1}.

-@table @option
-@item none
-Do not apply any gain.
-
-@item peak
-select peak gain, very conservative approach. This is default value.
-
-@item dc
-select DC gain, limited application.
-
-@item gn
-select gain to noise approach, this is most popular one.
-
-@item ac
-select AC gain.
-
-@item rms
-select RMS gain.
-@end table
+@item irlink
+For multichannel IR if this option is set to @var{true}. All IR channels will be
+normalized with maximal measured gain of all IR channels coefficients as set by @code{irnorm} option.
+When disabled, all IR coefficients in each IR channel will be normalized independently.
+Default is @var{true}.

@item irgain
 Set gain to be applied to IR coefficients before filtering.
-Allowed range is 0 to 1. This gain is applied after any gain applied with @var{gtype} option.
+Allowed range is 0 to 1. This gain is applied after any gain applied with @var{irnorm} option.

@item irfmt
 Set format of IR stream. Can be @code{mono} or @code{input}.
@ -1899,7 +1890,7 @@ ffmpeg -i input.wav -i middle_tunnel_1way_mono.wav -lavfi afir output.wav
 Apply true stereo processing given input stereo stream, and two stereo impulse responses for left and right channel,
 the impulse response files are files with names l_ir.wav and r_ir.wav:
@example
-"pan=4C|c0=FL|c1=FL|c2=FR|c3=FR[a];amovie=l_ir.wav[LIR];amovie=r_ir.wav[RIR];[LIR][RIR]amerge[ir];[a][ir]afir=irfmt=input:gtype=gn:irgain=-5dB,pan=stereo|FL<c0+c2|FR<c1+c3"
+"pan=4C|c0=FL|c1=FL|c2=FR|c3=FR[a];amovie=l_ir.wav[LIR];amovie=r_ir.wav[RIR];[LIR][RIR]amerge[ir];[a][ir]afir=irfmt=input:irgain=-5dB,pan=stereo|FL<c0+c2|FR<c1+c3"
@end example
@end itemize

--- a/libavfilter/af_afir.c
+++ b/libavfilter/af_afir.c
@ -393,6 +393,22 @@ skip:

    switch (s->format) {
    case AV_SAMPLE_FMT_FLTP:
+        for (int ch = 0; ch < s->nb_channels; ch++) {
+            const float *tsrc = (const float *)s->ir[selir]->extended_data[!s->one2many * ch];
+
+            s->ch_gain[ch] = ir_gain_float(ctx, s, nb_taps, tsrc);
+        }
+
+        if (s->ir_link) {
+            float gain = +INFINITY;
+
+            for (int ch = 0; ch < s->nb_channels; ch++)
+                gain = fminf(gain, s->ch_gain[ch]);
+
+            for (int ch = 0; ch < s->nb_channels; ch++)
+                s->ch_gain[ch] = gain;
+        }
+
        for (int ch = 0; ch < s->nb_channels; ch++) {
            const float *tsrc = (const float *)s->ir[selir]->extended_data[!s->one2many * ch];
            float *time = (float *)s->norm_ir[selir]->extended_data[ch];
@ -401,7 +417,7 @@ skip:
            for (int i = FFMAX(1, s->length * nb_taps); i < nb_taps; i++)
                time[i] = 0;

-            get_power_float(ctx, s, nb_taps, ch, time);
+            ir_scale_float(ctx, s, nb_taps, ch, time, s->ch_gain[ch]);

            for (int n = 0; n < s->nb_segments[selir]; n++) {
                AudioFIRSegment *seg = &s->seg[selir][n];
@ -417,6 +433,22 @@ skip:
        }
        break;
    case AV_SAMPLE_FMT_DBLP:
+        for (int ch = 0; ch < s->nb_channels; ch++) {
+            const double *tsrc = (const double *)s->ir[selir]->extended_data[!s->one2many * ch];
+
+            s->ch_gain[ch] = ir_gain_double(ctx, s, nb_taps, tsrc);
+        }
+
+        if (s->ir_link) {
+            double gain = +INFINITY;
+
+            for (int ch = 0; ch < s->nb_channels; ch++)
+                gain = fmin(gain, s->ch_gain[ch]);
+
+            for (int ch = 0; ch < s->nb_channels; ch++)
+                s->ch_gain[ch] = gain;
+        }
+
        for (int ch = 0; ch < s->nb_channels; ch++) {
            const double *tsrc = (const double *)s->ir[selir]->extended_data[!s->one2many * ch];
            double *time = (double *)s->norm_ir[selir]->extended_data[ch];
@ -425,7 +457,8 @@ skip:
            for (int i = FFMAX(1, s->length * nb_taps); i < nb_taps; i++)
                time[i] = 0;

-            get_power_double(ctx, s, nb_taps, ch, time);
+            ir_scale_double(ctx, s, nb_taps, ch, time, s->ch_gain[ch]);
+
            for (int n = 0; n < s->nb_segments[selir]; n++) {
                AudioFIRSegment *seg = &s->seg[selir][n];

@ -627,8 +660,9 @@ FF_ENABLE_DEPRECATION_WARNINGS

    s->format = outlink->format;
    s->nb_channels = outlink->ch_layout.nb_channels;
+    s->ch_gain = av_calloc(ctx->inputs[0]->ch_layout.nb_channels, sizeof(*s->ch_gain));
    s->loading = av_calloc(ctx->inputs[0]->ch_layout.nb_channels, sizeof(*s->loading));
-    if (!s->loading)
+    if (!s->loading || !s->ch_gain)
        return AVERROR(ENOMEM);

    s->fadein[0] = ff_get_audio_buffer(outlink, s->min_part_size);
@ -674,6 +708,7 @@ static av_cold void uninit(AVFilterContext *ctx)
    AudioFIRContext *s = ctx->priv;

    av_freep(&s->fdsp);
+    av_freep(&s->ch_gain);
    av_freep(&s->loading);

    for (int i = 0; i < s->nb_irs; i++) {
@ -812,13 +847,15 @@ static const AVOption afir_options[] = {
    { "dry",    "set dry gain",      OFFSET(dry_gain),   AV_OPT_TYPE_FLOAT, {.dbl=1},    0, 10, AFR },
    { "wet",    "set wet gain",      OFFSET(wet_gain),   AV_OPT_TYPE_FLOAT, {.dbl=1},    0, 10, AFR },
    { "length", "set IR length",     OFFSET(length),     AV_OPT_TYPE_FLOAT, {.dbl=1},    0,  1, AF },
-    { "gtype",  "set IR auto gain type",OFFSET(gtype),   AV_OPT_TYPE_INT,   {.i64=0},   -1,  4, AF, "gtype" },
-    {  "none",  "without auto gain", 0,                  AV_OPT_TYPE_CONST, {.i64=-1},   0,  0, AF, "gtype" },
-    {  "peak",  "peak gain",         0,                  AV_OPT_TYPE_CONST, {.i64=0},    0,  0, AF, "gtype" },
-    {  "dc",    "DC gain",           0,                  AV_OPT_TYPE_CONST, {.i64=1},    0,  0, AF, "gtype" },
-    {  "gn",    "gain to noise",     0,                  AV_OPT_TYPE_CONST, {.i64=2},    0,  0, AF, "gtype" },
-    {  "ac",    "AC gain",           0,                  AV_OPT_TYPE_CONST, {.i64=3},    0,  0, AF, "gtype" },
-    {  "rms",   "RMS gain",          0,                  AV_OPT_TYPE_CONST, {.i64=4},    0,  0, AF, "gtype" },
+    { "gtype",  "set IR auto gain type",OFFSET(gtype),   AV_OPT_TYPE_INT,   {.i64=0},   -1,  4, AF|AV_OPT_FLAG_DEPRECATED, "gtype" },
+    {  "none",  "without auto gain", 0,                  AV_OPT_TYPE_CONST, {.i64=-1},   0,  0, AF|AV_OPT_FLAG_DEPRECATED, "gtype" },
+    {  "peak",  "peak gain",         0,                  AV_OPT_TYPE_CONST, {.i64=0},    0,  0, AF|AV_OPT_FLAG_DEPRECATED, "gtype" },
+    {  "dc",    "DC gain",           0,                  AV_OPT_TYPE_CONST, {.i64=1},    0,  0, AF|AV_OPT_FLAG_DEPRECATED, "gtype" },
+    {  "gn",    "gain to noise",     0,                  AV_OPT_TYPE_CONST, {.i64=2},    0,  0, AF|AV_OPT_FLAG_DEPRECATED, "gtype" },
+    {  "ac",    "AC gain",           0,                  AV_OPT_TYPE_CONST, {.i64=3},    0,  0, AF|AV_OPT_FLAG_DEPRECATED, "gtype" },
+    {  "rms",   "RMS gain",          0,                  AV_OPT_TYPE_CONST, {.i64=4},    0,  0, AF|AV_OPT_FLAG_DEPRECATED, "gtype" },
+    { "irnorm", "set IR norm",       OFFSET(ir_norm),    AV_OPT_TYPE_FLOAT, {.dbl=1},   -1,  2, AF },
+    { "irlink", "set IR link",       OFFSET(ir_link),    AV_OPT_TYPE_BOOL,  {.i64=1},    0,  1, AF },
    { "irgain", "set IR gain",       OFFSET(ir_gain),    AV_OPT_TYPE_FLOAT, {.dbl=1},    0,  1, AF },
    { "irfmt",  "set IR format",     OFFSET(ir_format),  AV_OPT_TYPE_INT,   {.i64=1},    0,  1, AF, "irfmt" },
    {  "mono",  "single channel",    0,                  AV_OPT_TYPE_CONST, {.i64=0},    0,  0, AF, "irfmt" },
--- a/libavfilter/af_afir.h
+++ b/libavfilter/af_afir.h
@ -63,6 +63,8 @@ typedef struct AudioFIRContext {
    float dry_gain;
    float length;
    int gtype;
+    float ir_norm;
+    float ir_link;
    float ir_gain;
    int ir_format;
    int ir_load;
@ -87,6 +89,7 @@ typedef struct AudioFIRContext {
    int nb_channels;
    int one2many;
    int *loading;
+    double *ch_gain;

    AudioFIRSegment seg[MAX_IR_STREAMS][1024];

--- a/libavfilter/afir_template.c
+++ b/libavfilter/afir_template.c
@ -29,6 +29,8 @@
 #undef HYPOT
 #undef SAMPLE_FORMAT
 #undef TX_TYPE
+#undef FABS
+#undef POW
 #if DEPTH == 32
 #define SAMPLE_FORMAT float
 #define SQRT sqrtf
@ -36,6 +38,8 @@
 #define ctype AVComplexFloat
 #define ftype float
 #define TX_TYPE AV_TX_FLOAT_RDFT
+#define FABS fabsf
+#define POW powf
 #else
 #define SAMPLE_FORMAT double
 #define SQRT sqrt
@ -43,6 +47,8 @@
 #define ctype AVComplexDouble
 #define ftype double
 #define TX_TYPE AV_TX_DOUBLE_RDFT
+#define FABS fabs
+#define POW pow
 #endif

 #define fn3(a,b)   a##_##b
@ -139,95 +145,32 @@ end:
    av_free(mag);
 }

-static int fn(get_power)(AVFilterContext *ctx, AudioFIRContext *s,
-                         int cur_nb_taps, int ch,
-                         ftype *time)
+static ftype fn(ir_gain)(AVFilterContext *ctx, AudioFIRContext *s,
+                         int cur_nb_taps, const ftype *time)
 {
-    ftype ch_gain = 1;
+    ftype ch_gain, sum = 0;

-    switch (s->gtype) {
-    case -1:
+    if (s->ir_norm < 0.f) {
        ch_gain = 1;
-        break;
-    case 0:
-        {
-            ftype sum = 0;
+    } else if (s->ir_norm == 0.f) {
+        for (int i = 0; i < cur_nb_taps; i++)
+            sum += time[i];
+        ch_gain = 1. / sum;
+    } else {
+        ftype ir_norm = s->ir_norm;

-            for (int i = 0; i < cur_nb_taps; i++)
-                sum += FFABS(time[i]);
-            ch_gain = 1. / sum;
-        }
-        break;
-    case 1:
-        {
-            ftype sum = 0;
-
-            for (int i = 0; i < cur_nb_taps; i++)
-                sum += time[i];
-            ch_gain = 1. / sum;
-        }
-        break;
-    case 2:
-        {
-            ftype sum = 0;
-
-            for (int i = 0; i < cur_nb_taps; i++)
-                sum += time[i] * time[i];
-            ch_gain = 1. / SQRT(sum);
-        }
-        break;
-    case 3:
-    case 4:
-        {
-            ftype *inc, *outc, scale, power;
-            AVTXContext *tx;
-            av_tx_fn tx_fn;
-            int ret, size;
-
-            size = 1 << av_ceil_log2_c(cur_nb_taps);
-            inc = av_calloc(size + 2, sizeof(SAMPLE_FORMAT));
-            outc = av_calloc(size + 2, sizeof(SAMPLE_FORMAT));
-            if (!inc || !outc) {
-                av_free(outc);
-                av_free(inc);
-                break;
-            }
-
-            scale = 1.;
-            ret = av_tx_init(&tx, &tx_fn, TX_TYPE, 0, size, &scale, 0);
-            if (ret < 0) {
-                av_free(outc);
-                av_free(inc);
-                break;
-            }
-
-            {
-                memcpy(inc, time, cur_nb_taps * sizeof(SAMPLE_FORMAT));
-                tx_fn(tx, outc, inc, sizeof(SAMPLE_FORMAT));
-
-                power = 0;
-                if (s->gtype == 3) {
-                    for (int i = 0; i < size / 2 + 1; i++)
-                        power = FFMAX(power, HYPOT(outc[i * 2], outc[i * 2 + 1]));
-                } else {
-                    ftype sum = 0;
-                    for (int i = 0; i < size / 2 + 1; i++)
-                        sum += HYPOT(outc[i * 2], outc[i * 2 + 1]);
-                    power = SQRT(sum / (size / 2 + 1));
-                }
-
-                ch_gain = 1. / power;
-            }
-
-            av_tx_uninit(&tx);
-            av_free(outc);
-            av_free(inc);
-        }
-        break;
-    default:
-        return AVERROR_BUG;
+        for (int i = 0; i < cur_nb_taps; i++)
+            sum += POW(FABS(time[i]), ir_norm);
+        ch_gain = 1. / POW(sum, 1. / ir_norm);
    }

+    return ch_gain;
+}
+
+static void fn(ir_scale)(AVFilterContext *ctx, AudioFIRContext *s,
+                         int cur_nb_taps, int ch,
+                         ftype *time, ftype ch_gain)
+{
    if (ch_gain != 1. || s->ir_gain != 1.) {
        ftype gain = ch_gain * s->ir_gain;

@ -238,8 +181,6 @@ static int fn(get_power)(AVFilterContext *ctx, AudioFIRContext *s,
        s->fdsp->vector_dmul_scalar(time, time, gain, FFALIGN(cur_nb_taps, 8));
 #endif
    }
-
-    return 0;
 }

 static void fn(convert_channel)(AVFilterContext *ctx, AudioFIRContext *s, int ch,