diff --git a/Changelog b/Changelog index a18dcfbd42..1e9476b24d 100644 --- a/Changelog +++ b/Changelog @@ -24,6 +24,7 @@ version : - AV1 encoding support via librav1e - AV1 frame merge bitstream filter - AV1 Annex B demuxer +- axcorrelate filter version 4.2: diff --git a/doc/filters.texi b/doc/filters.texi index 39570d893b..16bf2df6c2 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -2531,6 +2531,39 @@ ffmpeg -i INPUT -af atrim=end_sample=1000 @end itemize +@section axcorrelate +Calculate normalized cross-correlation between two input audio streams. + +Resulted samples are always between -1 and 1 inclusive. +If result is 1 it means two input samples are highly correlated in that selected segment. +Result 0 means they are not correlated at all. +If result is -1 it means two input samples are out of phase, which means they cancel each +other. + +The filter accepts the following options: + +@table @option +@item size +Set size of segment over which cross-correlation is calculated. +Default is 256. Allowed range is from 2 to 131072. + +@item algo +Set algorithm for cross-correlation. Can be @code{slow} or @code{fast}. +Default is @code{slow}. Fast algorithm assumes mean values over any given segment +are always zero and thus need much less calculations to make. +This is generally not true, but is valid for typical audio streams. +@end table + +@subsection Examples + +@itemize +@item +Calculate correlation between channels in stereo audio stream: +@example +ffmpeg -i stereo.wav -af channelsplit,axcorrelate=size=1024:algo=fast correlation.wav +@end example +@end itemize + @section bandpass Apply a two-pole Butterworth band-pass filter with central diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 8434983b7d..46e3eecf9a 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -88,6 +88,7 @@ OBJS-$(CONFIG_ASTATS_FILTER) += af_astats.o OBJS-$(CONFIG_ASTREAMSELECT_FILTER) += f_streamselect.o framesync.o OBJS-$(CONFIG_ATEMPO_FILTER) += af_atempo.o OBJS-$(CONFIG_ATRIM_FILTER) += trim.o +OBJS-$(CONFIG_AXCORRELATE_FILTER) += af_axcorrelate.o OBJS-$(CONFIG_AZMQ_FILTER) += f_zmq.o OBJS-$(CONFIG_BANDPASS_FILTER) += af_biquads.o OBJS-$(CONFIG_BANDREJECT_FILTER) += af_biquads.o diff --git a/libavfilter/af_axcorrelate.c b/libavfilter/af_axcorrelate.c new file mode 100644 index 0000000000..861903b0f1 --- /dev/null +++ b/libavfilter/af_axcorrelate.c @@ -0,0 +1,378 @@ +/* + * Copyright (c) 2019 Paul B Mahol + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/avassert.h" +#include "libavutil/audio_fifo.h" +#include "libavutil/channel_layout.h" +#include "libavutil/common.h" +#include "libavutil/opt.h" + +#include "audio.h" +#include "avfilter.h" +#include "formats.h" +#include "filters.h" +#include "internal.h" + +typedef struct AudioXCorrelateContext { + const AVClass *class; + + int size; + int algo; + int64_t pts; + + AVAudioFifo *fifo[2]; + AVFrame *cache[2]; + AVFrame *mean_sum[2]; + AVFrame *num_sum; + AVFrame *den_sum[2]; + int used; + + int (*xcorrelate)(AVFilterContext *ctx, AVFrame *out); +} AudioXCorrelateContext; + +static int query_formats(AVFilterContext *ctx) +{ + AVFilterFormats *formats; + AVFilterChannelLayouts *layouts; + static const enum AVSampleFormat sample_fmts[] = { + AV_SAMPLE_FMT_FLTP, + AV_SAMPLE_FMT_NONE + }; + int ret; + + layouts = ff_all_channel_counts(); + if (!layouts) + return AVERROR(ENOMEM); + ret = ff_set_common_channel_layouts(ctx, layouts); + if (ret < 0) + return ret; + + formats = ff_make_format_list(sample_fmts); + if (!formats) + return AVERROR(ENOMEM); + ret = ff_set_common_formats(ctx, formats); + if (ret < 0) + return ret; + + formats = ff_all_samplerates(); + if (!formats) + return AVERROR(ENOMEM); + return ff_set_common_samplerates(ctx, formats); +} + +static float mean_sum(const float *in, int size) +{ + float mean_sum = 0.f; + + for (int i = 0; i < size; i++) + mean_sum += in[i]; + + return mean_sum; +} + +static float square_sum(const float *x, const float *y, int size) +{ + float square_sum = 0.f; + + for (int i = 0; i < size; i++) + square_sum += x[i] * y[i]; + + return square_sum; +} + +static float xcorrelate(const float *x, const float *y, float sumx, float sumy, int size) +{ + const float xm = sumx / size, ym = sumy / size; + float num = 0.f, den, den0 = 0.f, den1 = 0.f; + + for (int i = 0; i < size; i++) { + float xd = x[i] - xm; + float yd = y[i] - ym; + + num += xd * yd; + den0 += xd * xd; + den1 += yd * yd; + } + + num /= size; + den = sqrtf((den0 * den1) / (size * size)); + + return den <= 1e-6f ? 0.f : num / den; +} + +static int xcorrelate_slow(AVFilterContext *ctx, AVFrame *out) +{ + AudioXCorrelateContext *s = ctx->priv; + const int size = s->size; + int used; + + for (int ch = 0; ch < out->channels; ch++) { + const float *x = (const float *)s->cache[0]->extended_data[ch]; + const float *y = (const float *)s->cache[1]->extended_data[ch]; + float *sumx = (float *)s->mean_sum[0]->extended_data[ch]; + float *sumy = (float *)s->mean_sum[1]->extended_data[ch]; + float *dst = (float *)out->extended_data[ch]; + + used = s->used; + if (!used) { + sumx[0] = mean_sum(x, size); + sumy[0] = mean_sum(y, size); + used = 1; + } + + for (int n = 0; n < out->nb_samples; n++) { + dst[n] = xcorrelate(x + n, y + n, sumx[0], sumy[0], size); + + sumx[0] -= x[n]; + sumx[0] += x[n + size]; + sumy[0] -= y[n]; + sumy[0] += y[n + size]; + } + } + + return used; +} + +static int xcorrelate_fast(AVFilterContext *ctx, AVFrame *out) +{ + AudioXCorrelateContext *s = ctx->priv; + const int size = s->size; + int used; + + for (int ch = 0; ch < out->channels; ch++) { + const float *x = (const float *)s->cache[0]->extended_data[ch]; + const float *y = (const float *)s->cache[1]->extended_data[ch]; + float *num_sum = (float *)s->num_sum->extended_data[ch]; + float *den_sumx = (float *)s->den_sum[0]->extended_data[ch]; + float *den_sumy = (float *)s->den_sum[1]->extended_data[ch]; + float *dst = (float *)out->extended_data[ch]; + + used = s->used; + if (!used) { + num_sum[0] = square_sum(x, y, size); + den_sumx[0] = square_sum(x, x, size); + den_sumy[0] = square_sum(y, y, size); + used = 1; + } + + for (int n = 0; n < out->nb_samples; n++) { + float num, den; + + num = num_sum[0] / size; + den = sqrtf((den_sumx[0] * den_sumy[0]) / (size * size)); + + dst[n] = den <= 1e-6f ? 0.f : num / den; + + num_sum[0] -= x[n] * y[n]; + num_sum[0] += x[n + size] * y[n + size]; + den_sumx[0] -= x[n] * x[n]; + den_sumx[0] = FFMAX(den_sumx[0], 0.f); + den_sumx[0] += x[n + size] * x[n + size]; + den_sumy[0] -= y[n] * y[n]; + den_sumy[0] = FFMAX(den_sumy[0], 0.f); + den_sumy[0] += y[n + size] * y[n + size]; + } + } + + return used; +} + +static int activate(AVFilterContext *ctx) +{ + AudioXCorrelateContext *s = ctx->priv; + AVFrame *frame = NULL; + int ret, status; + int available; + int64_t pts; + + FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[0], ctx); + + for (int i = 0; i < 2; i++) { + ret = ff_inlink_consume_frame(ctx->inputs[i], &frame); + if (ret > 0) { + if (s->pts == AV_NOPTS_VALUE) + s->pts = frame->pts; + ret = av_audio_fifo_write(s->fifo[i], (void **)frame->extended_data, + frame->nb_samples); + av_frame_free(&frame); + if (ret < 0) + return ret; + } + } + + available = FFMIN(av_audio_fifo_size(s->fifo[0]), av_audio_fifo_size(s->fifo[1])); + if (available > s->size) { + const int out_samples = available - s->size; + AVFrame *out; + + if (!s->cache[0] || s->cache[0]->nb_samples < available) { + av_frame_free(&s->cache[0]); + s->cache[0] = ff_get_audio_buffer(ctx->outputs[0], available); + if (!s->cache[0]) + return AVERROR(ENOMEM); + } + + if (!s->cache[1] || s->cache[1]->nb_samples < available) { + av_frame_free(&s->cache[1]); + s->cache[1] = ff_get_audio_buffer(ctx->outputs[0], available); + if (!s->cache[1]) + return AVERROR(ENOMEM); + } + + ret = av_audio_fifo_peek(s->fifo[0], (void **)s->cache[0]->extended_data, available); + if (ret < 0) + return ret;; + + ret = av_audio_fifo_peek(s->fifo[1], (void **)s->cache[1]->extended_data, available); + if (ret < 0) + return ret;; + + out = ff_get_audio_buffer(ctx->outputs[0], out_samples); + if (!out) + return AVERROR(ENOMEM); + + s->used = s->xcorrelate(ctx, out); + + out->pts = s->pts; + s->pts += out_samples; + + av_audio_fifo_drain(s->fifo[0], out_samples); + av_audio_fifo_drain(s->fifo[1], out_samples); + + return ff_filter_frame(ctx->outputs[0], out); + } + + if (av_audio_fifo_size(s->fifo[0]) > s->size && + av_audio_fifo_size(s->fifo[1]) > s->size) { + ff_filter_set_ready(ctx, 10); + return 0; + } + + for (int i = 0; i < 2; i++) { + if (ff_inlink_acknowledge_status(ctx->inputs[i], &status, &pts)) { + ff_outlink_set_status(ctx->outputs[0], status, pts); + return 0; + } + } + + if (ff_outlink_frame_wanted(ctx->outputs[0])) { + for (int i = 0; i < 2; i++) { + if (av_audio_fifo_size(s->fifo[i]) > s->size) + continue; + ff_inlink_request_frame(ctx->inputs[i]); + return 0; + } + } + + return FFERROR_NOT_READY; +} + +static int config_output(AVFilterLink *outlink) +{ + AVFilterContext *ctx = outlink->src; + AVFilterLink *inlink = ctx->inputs[0]; + AudioXCorrelateContext *s = ctx->priv; + + s->pts = AV_NOPTS_VALUE; + + outlink->format = inlink->format; + outlink->channels = inlink->channels; + s->fifo[0] = av_audio_fifo_alloc(outlink->format, outlink->channels, s->size); + s->fifo[1] = av_audio_fifo_alloc(outlink->format, outlink->channels, s->size); + if (!s->fifo[0] || !s->fifo[1]) + return AVERROR(ENOMEM); + + s->mean_sum[0] = ff_get_audio_buffer(outlink, 1); + s->mean_sum[1] = ff_get_audio_buffer(outlink, 1); + s->num_sum = ff_get_audio_buffer(outlink, 1); + s->den_sum[0] = ff_get_audio_buffer(outlink, 1); + s->den_sum[1] = ff_get_audio_buffer(outlink, 1); + if (!s->mean_sum[0] || !s->mean_sum[1] || !s->num_sum || + !s->den_sum[0] || !s->den_sum[1]) + return AVERROR(ENOMEM); + + switch (s->algo) { + case 0: s->xcorrelate = xcorrelate_slow; break; + case 1: s->xcorrelate = xcorrelate_fast; break; + } + + return 0; +} + +static av_cold void uninit(AVFilterContext *ctx) +{ + AudioXCorrelateContext *s = ctx->priv; + + av_audio_fifo_free(s->fifo[0]); + av_audio_fifo_free(s->fifo[1]); + av_frame_free(&s->cache[0]); + av_frame_free(&s->cache[1]); + av_frame_free(&s->mean_sum[0]); + av_frame_free(&s->mean_sum[1]); + av_frame_free(&s->num_sum); + av_frame_free(&s->den_sum[0]); + av_frame_free(&s->den_sum[1]); +} + +static const AVFilterPad inputs[] = { + { + .name = "axcorrelate0", + .type = AVMEDIA_TYPE_AUDIO, + }, + { + .name = "axcorrelate1", + .type = AVMEDIA_TYPE_AUDIO, + }, + { NULL } +}; + +static const AVFilterPad outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_AUDIO, + .config_props = config_output, + }, + { NULL } +}; + +#define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM +#define OFFSET(x) offsetof(AudioXCorrelateContext, x) + +static const AVOption axcorrelate_options[] = { + { "size", "set segment size", OFFSET(size), AV_OPT_TYPE_INT, {.i64=256}, 2, 131072, AF }, + { "algo", "set alghorithm", OFFSET(algo), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, AF, "algo" }, + { "slow", "slow algorithm", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, AF, "algo" }, + { "fast", "fast algorithm", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, AF, "algo" }, + { NULL } +}; + +AVFILTER_DEFINE_CLASS(axcorrelate); + +AVFilter ff_af_axcorrelate = { + .name = "axcorrelate", + .description = NULL_IF_CONFIG_SMALL("Cross-correlate two audio streams."), + .priv_size = sizeof(AudioXCorrelateContext), + .priv_class = &axcorrelate_class, + .query_formats = query_formats, + .activate = activate, + .uninit = uninit, + .inputs = inputs, + .outputs = outputs, +}; diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 7c1e19e1da..2a69227476 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -81,6 +81,7 @@ extern AVFilter ff_af_astats; extern AVFilter ff_af_astreamselect; extern AVFilter ff_af_atempo; extern AVFilter ff_af_atrim; +extern AVFilter ff_af_axcorrelate; extern AVFilter ff_af_azmq; extern AVFilter ff_af_bandpass; extern AVFilter ff_af_bandreject; diff --git a/libavfilter/version.h b/libavfilter/version.h index 2e3ff53b20..7e8d849e0c 100644 --- a/libavfilter/version.h +++ b/libavfilter/version.h @@ -30,7 +30,7 @@ #include "libavutil/version.h" #define LIBAVFILTER_VERSION_MAJOR 7 -#define LIBAVFILTER_VERSION_MINOR 66 +#define LIBAVFILTER_VERSION_MINOR 67 #define LIBAVFILTER_VERSION_MICRO 100