From 1309867022a5edf74e80ee6114049d7d053296fc Mon Sep 17 00:00:00 2001
From: Paul B Mahol <onemda@gmail.com>
Date: Mon, 9 May 2022 01:33:50 +0200
Subject: [PATCH] avfilter/af_biquads: add option for block based linear phase
 processing

---
 doc/filters.texi         |  56 ++++++++++++++++
 libavfilter/af_biquads.c | 134 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 186 insertions(+), 4 deletions(-)

diff --git a/doc/filters.texi b/doc/filters.texi
index dc8bb54994..468c277798 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -3500,6 +3500,13 @@ Always use float 32-bit.
 @item f64
 Always use float 64-bit.
 @end table
+
+@item block_size, b
+Set block size used for reverse IIR processing. If this value is set to high enough
+value (higher than impulse response length truncated when reaches near zero values) filtering
+will become linear phase otherwise if not big enough it will just produce nasty artifacts.
+
+Note that filter delay will be exactly this many samples when set to non-zero value.
 @end table
 
 @subsection Commands
@@ -3589,6 +3596,13 @@ Always use float 32-bit.
 @item f64
 Always use float 64-bit.
 @end table
+
+@item block_size, b
+Set block size used for reverse IIR processing. If this value is set to high enough
+value (higher than impulse response length truncated when reaches near zero values) filtering
+will become linear phase otherwise if not big enough it will just produce nasty artifacts.
+
+Note that filter delay will be exactly this many samples when set to non-zero value.
 @end table
 
 @subsection Commands
@@ -3688,6 +3702,13 @@ Always use float 32-bit.
 @item f64
 Always use float 64-bit.
 @end table
+
+@item block_size, b
+Set block size used for reverse IIR processing. If this value is set to high enough
+value (higher than impulse response length truncated when reaches near zero values) filtering
+will become linear phase otherwise if not big enough it will just produce nasty artifacts.
+
+Note that filter delay will be exactly this many samples when set to non-zero value.
 @end table
 
 @subsection Commands
@@ -3772,6 +3793,13 @@ Always use float 32-bit.
 @item f64
 Always use float 64-bit.
 @end table
+
+@item block_size, b
+Set block size used for reverse IIR processing. If this value is set to high enough
+value (higher than impulse response length truncated when reaches near zero values) filtering
+will become linear phase otherwise if not big enough it will just produce nasty artifacts.
+
+Note that filter delay will be exactly this many samples when set to non-zero value.
 @end table
 
 @section bs2b
@@ -4566,6 +4594,13 @@ Always use float 32-bit.
 @item f64
 Always use float 64-bit.
 @end table
+
+@item block_size, b
+Set block size used for reverse IIR processing. If this value is set to high enough
+value (higher than impulse response length truncated when reaches near zero values) filtering
+will become linear phase otherwise if not big enough it will just produce nasty artifacts.
+
+Note that filter delay will be exactly this many samples when set to non-zero value.
 @end table
 
 @subsection Examples
@@ -5069,6 +5104,13 @@ Always use float 32-bit.
 @item f64
 Always use float 64-bit.
 @end table
+
+@item block_size, b
+Set block size used for reverse IIR processing. If this value is set to high enough
+value (higher than impulse response length truncated when reaches near zero values) filtering
+will become linear phase otherwise if not big enough it will just produce nasty artifacts.
+
+Note that filter delay will be exactly this many samples when set to non-zero value.
 @end table
 
 @subsection Commands
@@ -5421,6 +5463,13 @@ Always use float 32-bit.
 @item f64
 Always use float 64-bit.
 @end table
+
+@item block_size, b
+Set block size used for reverse IIR processing. If this value is set to high enough
+value (higher than impulse response length truncated when reaches near zero values) filtering
+will become linear phase otherwise if not big enough it will just produce nasty artifacts.
+
+Note that filter delay will be exactly this many samples when set to non-zero value.
 @end table
 
 @subsection Examples
@@ -6659,6 +6708,13 @@ Always use float 32-bit.
 @item f64
 Always use float 64-bit.
 @end table
+
+@item block_size, b
+Set block size used for reverse IIR processing. If this value is set to high enough
+value (higher than impulse response length truncated when reaches near zero values) filtering
+will become linear phase otherwise if not big enough it will just produce nasty artifacts.
+
+Note that filter delay will be exactly this many samples when set to non-zero value.
 @end table
 
 @subsection Commands
diff --git a/libavfilter/af_biquads.c b/libavfilter/af_biquads.c
index 8caf169d50..2ec32e915d 100644
--- a/libavfilter/af_biquads.c
+++ b/libavfilter/af_biquads.c
@@ -70,6 +70,7 @@
 #include "libavutil/opt.h"
 #include "audio.h"
 #include "avfilter.h"
+#include "filters.h"
 #include "internal.h"
 
 enum FilterType {
@@ -109,6 +110,8 @@ enum TransformType {
 typedef struct ChanCache {
     double i1, i2;
     double o1, o2;
+    double ri1, ri2;
+    double ro1, ro2;
     int clippings;
 } ChanCache;
 
@@ -121,6 +124,7 @@ typedef struct BiquadsContext {
     int csg;
     int transform_type;
     int precision;
+    int block_samples;
 
     int bypass;
 
@@ -139,6 +143,8 @@ typedef struct BiquadsContext {
     double oa0, oa1, oa2;
     double ob0, ob1, ob2;
 
+    AVFrame *block[3];
+
     ChanCache *cache;
     int block_align;
 
@@ -782,6 +788,14 @@ static int config_filter(AVFilterLink *outlink, int reset)
     if (reset)
         memset(s->cache, 0, sizeof(ChanCache) * inlink->ch_layout.nb_channels);
 
+    if (reset && s->block_samples > 0) {
+        for (int i = 0; i < 3; i++) {
+            s->block[i] = ff_get_audio_buffer(outlink, s->block_samples * 2);
+            if (!s->block[i])
+                return AVERROR(ENOMEM);
+        }
+    }
+
     switch (s->transform_type) {
     case DI:
         switch (inlink->format) {
@@ -908,6 +922,41 @@ typedef struct ThreadData {
     AVFrame *in, *out;
 } ThreadData;
 
+static void reverse_samples(AVFrame *out, AVFrame *in, int p,
+                            int oo, int io, int nb_samples)
+{
+    switch (out->format) {
+    case AV_SAMPLE_FMT_S16P: {
+        const int16_t *src = ((const int16_t *)out->extended_data[p]) + io;
+        int16_t *dst = ((int16_t *)out->extended_data[p]) + oo;
+        for (int i = 0, j = nb_samples - 1; i < nb_samples; i++, j--)
+            dst[i] = src[j];
+    }
+        break;
+    case AV_SAMPLE_FMT_S32P: {
+        const int32_t *src = ((const int32_t *)out->extended_data[p]) + io;
+        int32_t *dst = ((int32_t *)out->extended_data[p]) + oo;
+        for (int i = 0, j = nb_samples - 1; i < nb_samples; i++, j--)
+            dst[i] = src[j];
+    }
+        break;
+    case AV_SAMPLE_FMT_FLTP: {
+        const float *src = ((const float *)in->extended_data[p]) + io;
+        float *dst = ((float *)out->extended_data[p]) + oo;
+        for (int i = 0, j = nb_samples - 1; i < nb_samples; i++, j--)
+            dst[i] = src[j];
+    }
+        break;
+    case AV_SAMPLE_FMT_DBLP: {
+        const double *src = ((const double *)in->extended_data[p]) + io;
+        double *dst = ((double *)out->extended_data[p]) + oo;
+        for (int i = 0, j = nb_samples - 1; i < nb_samples; i++, j--)
+            dst[i] = src[j];
+    }
+        break;
+    }
+}
+
 static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
     AVFilterLink *inlink = ctx->inputs[0];
@@ -930,9 +979,37 @@ static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_job
             continue;
         }
 
-        s->filter(s, buf->extended_data[ch], out_buf->extended_data[ch], buf->nb_samples,
-                  &s->cache[ch].i1, &s->cache[ch].i2, &s->cache[ch].o1, &s->cache[ch].o2,
-                  s->b0, s->b1, s->b2, s->a1, s->a2, &s->cache[ch].clippings, ctx->is_disabled);
+        if (!s->block_samples) {
+            s->filter(s, buf->extended_data[ch], out_buf->extended_data[ch], buf->nb_samples,
+                      &s->cache[ch].i1, &s->cache[ch].i2, &s->cache[ch].o1, &s->cache[ch].o2,
+                      s->b0, s->b1, s->b2, s->a1, s->a2, &s->cache[ch].clippings, ctx->is_disabled);
+        } else {
+            memcpy(s->block[0]->extended_data[ch] + s->block_align * s->block_samples, buf->extended_data[ch],
+                   buf->nb_samples * s->block_align);
+            s->filter(s, s->block[0]->extended_data[ch], s->block[1]->extended_data[ch], s->block_samples,
+                      &s->cache[ch].i1, &s->cache[ch].i2, &s->cache[ch].o1, &s->cache[ch].o2,
+                      s->b0, s->b1, s->b2, s->a1, s->a2, &s->cache[ch].clippings, ctx->is_disabled);
+            s->cache[ch].ri1 = s->cache[ch].i1;
+            s->cache[ch].ri2 = s->cache[ch].i2;
+            s->cache[ch].ro1 = s->cache[ch].o1;
+            s->cache[ch].ro2 = s->cache[ch].o2;
+            s->filter(s, s->block[0]->extended_data[ch] + s->block_samples * s->block_align,
+                      s->block[1]->extended_data[ch] + s->block_samples * s->block_align,
+                      s->block_samples,
+                      &s->cache[ch].ri1, &s->cache[ch].ri2, &s->cache[ch].ro1, &s->cache[ch].ro2,
+                      s->b0, s->b1, s->b2, s->a1, s->a2, &s->cache[ch].clippings, ctx->is_disabled);
+            reverse_samples(s->block[2], s->block[1], ch, 0, 0, s->block_samples * 2);
+            s->cache[ch].ri1 = 0.;
+            s->cache[ch].ri2 = 0.;
+            s->cache[ch].ro1 = 0.;
+            s->cache[ch].ro2 = 0.;
+            s->filter(s, s->block[2]->extended_data[ch], s->block[2]->extended_data[ch], s->block[2]->nb_samples,
+                      &s->cache[ch].ri1, &s->cache[ch].ri2, &s->cache[ch].ro1, &s->cache[ch].ro2,
+                      s->b0, s->b1, s->b2, s->a1, s->a2, &s->cache[ch].clippings, ctx->is_disabled);
+            reverse_samples(out_buf, s->block[2], ch, 0, s->block_samples, out_buf->nb_samples);
+            memmove(s->block[0]->extended_data[ch], s->block[0]->extended_data[ch] + s->block_align * s->block_samples,
+                    s->block_samples * s->block_align);
+        }
     }
 
     return 0;
@@ -988,6 +1065,37 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
     return ff_filter_frame(outlink, out_buf);
 }
 
+static int activate(AVFilterContext *ctx)
+{
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVFilterLink *outlink = ctx->outputs[0];
+    BiquadsContext *s = ctx->priv;
+    AVFrame *in = NULL;
+    int ret;
+
+    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+    if (s->block_samples > 0) {
+        ret = ff_inlink_consume_samples(inlink, s->block_samples, s->block_samples, &in);
+    } else {
+        ret = ff_inlink_consume_frame(inlink, &in);
+    }
+    if (ret < 0)
+        return ret;
+    if (ret > 0)
+        return filter_frame(inlink, in);
+
+    if (s->block_samples > 0 && ff_inlink_queued_samples(inlink) >= s->block_samples) {
+        ff_filter_set_ready(ctx, 10);
+        return 0;
+    }
+
+    FF_FILTER_FORWARD_STATUS(inlink, outlink);
+    FF_FILTER_FORWARD_WANTED(outlink, inlink);
+
+    return FFERROR_NOT_READY;
+}
+
 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
                            char *res, int res_len, int flags)
 {
@@ -1005,6 +1113,8 @@ static av_cold void uninit(AVFilterContext *ctx)
 {
     BiquadsContext *s = ctx->priv;
 
+    for (int i = 0; i < 3; i++)
+        av_frame_free(&s->block[i]);
     av_freep(&s->cache);
     av_channel_layout_uninit(&s->ch_layout);
 }
@@ -1013,7 +1123,6 @@ static const AVFilterPad inputs[] = {
     {
         .name         = "default",
         .type         = AVMEDIA_TYPE_AUDIO,
-        .filter_frame = filter_frame,
     },
 };
 
@@ -1043,6 +1152,7 @@ const AVFilter ff_af_##name_ = {                         \
     .priv_class    = &priv_class_##_class,               \
     .priv_size     = sizeof(BiquadsContext),             \
     .init          = name_##_init,                       \
+    .activate      = activate,                           \
     .uninit        = uninit,                             \
     FILTER_INPUTS(inputs),                               \
     FILTER_OUTPUTS(outputs),                             \
@@ -1091,6 +1201,8 @@ static const AVOption equalizer_options[] = {
     {"s32", "signed 32-bit",         0, AV_OPT_TYPE_CONST, {.i64=1},  0, 0, AF, "precision"},
     {"f32", "floating-point single", 0, AV_OPT_TYPE_CONST, {.i64=2},  0, 0, AF, "precision"},
     {"f64", "floating-point double", 0, AV_OPT_TYPE_CONST, {.i64=3},  0, 0, AF, "precision"},
+    {"blocksize", "set the block size", OFFSET(block_samples), AV_OPT_TYPE_INT, {.i64=0}, 0, 32768, AF},
+    {"b",         "set the block size", OFFSET(block_samples), AV_OPT_TYPE_INT, {.i64=0}, 0, 32768, AF},
     {NULL}
 };
 
@@ -1134,6 +1246,8 @@ static const AVOption bass_lowshelf_options[] = {
     {"s32", "signed 32-bit",         0, AV_OPT_TYPE_CONST, {.i64=1},  0, 0, AF, "precision"},
     {"f32", "floating-point single", 0, AV_OPT_TYPE_CONST, {.i64=2},  0, 0, AF, "precision"},
     {"f64", "floating-point double", 0, AV_OPT_TYPE_CONST, {.i64=3},  0, 0, AF, "precision"},
+    {"blocksize", "set the block size", OFFSET(block_samples), AV_OPT_TYPE_INT, {.i64=0}, 0, 32768, AF},
+    {"b",         "set the block size", OFFSET(block_samples), AV_OPT_TYPE_INT, {.i64=0}, 0, 32768, AF},
     {NULL}
 };
 
@@ -1184,6 +1298,8 @@ static const AVOption treble_highshelf_options[] = {
     {"s32", "signed 32-bit",         0, AV_OPT_TYPE_CONST, {.i64=1},  0, 0, AF, "precision"},
     {"f32", "floating-point single", 0, AV_OPT_TYPE_CONST, {.i64=2},  0, 0, AF, "precision"},
     {"f64", "floating-point double", 0, AV_OPT_TYPE_CONST, {.i64=3},  0, 0, AF, "precision"},
+    {"blocksize", "set the block size", OFFSET(block_samples), AV_OPT_TYPE_INT, {.i64=0}, 0, 32768, AF},
+    {"b",         "set the block size", OFFSET(block_samples), AV_OPT_TYPE_INT, {.i64=0}, 0, 32768, AF},
     {NULL}
 };
 
@@ -1233,6 +1349,8 @@ static const AVOption bandpass_options[] = {
     {"s32", "signed 32-bit",         0, AV_OPT_TYPE_CONST, {.i64=1},  0, 0, AF, "precision"},
     {"f32", "floating-point single", 0, AV_OPT_TYPE_CONST, {.i64=2},  0, 0, AF, "precision"},
     {"f64", "floating-point double", 0, AV_OPT_TYPE_CONST, {.i64=3},  0, 0, AF, "precision"},
+    {"blocksize", "set the block size", OFFSET(block_samples), AV_OPT_TYPE_INT, {.i64=0}, 0, 32768, AF},
+    {"b",         "set the block size", OFFSET(block_samples), AV_OPT_TYPE_INT, {.i64=0}, 0, 32768, AF},
     {NULL}
 };
 
@@ -1272,6 +1390,8 @@ static const AVOption bandreject_options[] = {
     {"s32", "signed 32-bit",         0, AV_OPT_TYPE_CONST, {.i64=1},  0, 0, AF, "precision"},
     {"f32", "floating-point single", 0, AV_OPT_TYPE_CONST, {.i64=2},  0, 0, AF, "precision"},
     {"f64", "floating-point double", 0, AV_OPT_TYPE_CONST, {.i64=3},  0, 0, AF, "precision"},
+    {"blocksize", "set the block size", OFFSET(block_samples), AV_OPT_TYPE_INT, {.i64=0}, 0, 32768, AF},
+    {"b",         "set the block size", OFFSET(block_samples), AV_OPT_TYPE_INT, {.i64=0}, 0, 32768, AF},
     {NULL}
 };
 
@@ -1313,6 +1433,8 @@ static const AVOption lowpass_options[] = {
     {"s32", "signed 32-bit",         0, AV_OPT_TYPE_CONST, {.i64=1},  0, 0, AF, "precision"},
     {"f32", "floating-point single", 0, AV_OPT_TYPE_CONST, {.i64=2},  0, 0, AF, "precision"},
     {"f64", "floating-point double", 0, AV_OPT_TYPE_CONST, {.i64=3},  0, 0, AF, "precision"},
+    {"blocksize", "set the block size", OFFSET(block_samples), AV_OPT_TYPE_INT, {.i64=0}, 0, 32768, AF},
+    {"b",         "set the block size", OFFSET(block_samples), AV_OPT_TYPE_INT, {.i64=0}, 0, 32768, AF},
     {NULL}
 };
 
@@ -1354,6 +1476,8 @@ static const AVOption highpass_options[] = {
     {"s32", "signed 32-bit",         0, AV_OPT_TYPE_CONST, {.i64=1},  0, 0, AF, "precision"},
     {"f32", "floating-point single", 0, AV_OPT_TYPE_CONST, {.i64=2},  0, 0, AF, "precision"},
     {"f64", "floating-point double", 0, AV_OPT_TYPE_CONST, {.i64=3},  0, 0, AF, "precision"},
+    {"blocksize", "set the block size", OFFSET(block_samples), AV_OPT_TYPE_INT, {.i64=0}, 0, 32768, AF},
+    {"b",         "set the block size", OFFSET(block_samples), AV_OPT_TYPE_INT, {.i64=0}, 0, 32768, AF},
     {NULL}
 };
 
@@ -1429,6 +1553,8 @@ static const AVOption biquad_options[] = {
     {"s32", "signed 32-bit",         0, AV_OPT_TYPE_CONST, {.i64=1},  0, 0, AF, "precision"},
     {"f32", "floating-point single", 0, AV_OPT_TYPE_CONST, {.i64=2},  0, 0, AF, "precision"},
     {"f64", "floating-point double", 0, AV_OPT_TYPE_CONST, {.i64=3},  0, 0, AF, "precision"},
+    {"blocksize", "set the block size", OFFSET(block_samples), AV_OPT_TYPE_INT, {.i64=0}, 0, 32768, AF},
+    {"b",         "set the block size", OFFSET(block_samples), AV_OPT_TYPE_INT, {.i64=0}, 0, 32768, AF},
     {NULL}
 };