2021-07-18 10:20:33 +02:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2014 - 2021 Jason Jang
|
|
|
|
* Copyright (c) 2021 Paul B Mahol
|
|
|
|
*
|
|
|
|
* This file is part of FFmpeg.
|
|
|
|
*
|
|
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public License
|
|
|
|
* as published by the Free Software Foundation; either
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public License
|
|
|
|
* along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "libavutil/opt.h"
|
|
|
|
#include "libavutil/tx.h"
|
|
|
|
#include "audio.h"
|
|
|
|
#include "avfilter.h"
|
|
|
|
#include "filters.h"
|
|
|
|
#include "internal.h"
|
|
|
|
|
|
|
|
typedef struct AudioPsyClipContext {
|
|
|
|
const AVClass *class;
|
|
|
|
|
|
|
|
double level_in;
|
|
|
|
double level_out;
|
|
|
|
double clip_level;
|
|
|
|
double adaptive;
|
|
|
|
int auto_level;
|
|
|
|
int diff_only;
|
|
|
|
int iterations;
|
|
|
|
char *protections_str;
|
|
|
|
double *protections;
|
|
|
|
|
|
|
|
int num_psy_bins;
|
|
|
|
int fft_size;
|
|
|
|
int overlap;
|
|
|
|
int channels;
|
|
|
|
|
|
|
|
int spread_table_rows;
|
|
|
|
int *spread_table_index;
|
|
|
|
int (*spread_table_range)[2];
|
|
|
|
float *window, *inv_window, *spread_table, *margin_curve;
|
|
|
|
|
|
|
|
AVFrame *in;
|
|
|
|
AVFrame *in_buffer;
|
|
|
|
AVFrame *in_frame;
|
|
|
|
AVFrame *out_dist_frame;
|
|
|
|
AVFrame *windowed_frame;
|
|
|
|
AVFrame *clipping_delta;
|
|
|
|
AVFrame *spectrum_buf;
|
|
|
|
AVFrame *mask_curve;
|
|
|
|
|
|
|
|
AVTXContext **tx_ctx;
|
|
|
|
av_tx_fn tx_fn;
|
|
|
|
AVTXContext **itx_ctx;
|
|
|
|
av_tx_fn itx_fn;
|
|
|
|
} AudioPsyClipContext;
|
|
|
|
|
|
|
|
#define OFFSET(x) offsetof(AudioPsyClipContext, x)
|
|
|
|
#define FLAGS AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_RUNTIME_PARAM
|
|
|
|
|
|
|
|
static const AVOption apsyclip_options[] = {
|
|
|
|
{ "level_in", "set input level", OFFSET(level_in), AV_OPT_TYPE_DOUBLE, {.dbl=1},.015625, 64, FLAGS },
|
|
|
|
{ "level_out", "set output level", OFFSET(level_out), AV_OPT_TYPE_DOUBLE, {.dbl=1},.015625, 64, FLAGS },
|
|
|
|
{ "clip", "set clip level", OFFSET(clip_level), AV_OPT_TYPE_DOUBLE, {.dbl=1},.015625, 1, FLAGS },
|
|
|
|
{ "diff", "enable difference", OFFSET(diff_only), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
|
|
|
|
{ "adaptive", "set adaptive distortion", OFFSET(adaptive), AV_OPT_TYPE_DOUBLE, {.dbl=0.5}, 0, 1, FLAGS },
|
|
|
|
{ "iterations", "set iterations", OFFSET(iterations), AV_OPT_TYPE_INT, {.i64=10}, 1, 20, FLAGS },
|
|
|
|
{ "level", "set auto level", OFFSET(auto_level), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
|
|
|
|
{NULL}
|
|
|
|
};
|
|
|
|
|
|
|
|
AVFILTER_DEFINE_CLASS(apsyclip);
|
|
|
|
|
|
|
|
static void generate_hann_window(float *window, float *inv_window, int size)
|
|
|
|
{
|
|
|
|
for (int i = 0; i < size; i++) {
|
|
|
|
float value = 0.5f * (1.f - cosf(2.f * M_PI * i / size));
|
|
|
|
|
|
|
|
window[i] = value;
|
|
|
|
// 1/window to calculate unwindowed peak.
|
2022-01-29 03:12:41 +02:00
|
|
|
inv_window[i] = value > 0.1f ? 1.f / value : 0.f;
|
2021-07-18 10:20:33 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void set_margin_curve(AudioPsyClipContext *s,
|
|
|
|
const int (*points)[2], int num_points, int sample_rate)
|
|
|
|
{
|
|
|
|
int j = 0;
|
|
|
|
|
|
|
|
s->margin_curve[0] = points[0][1];
|
|
|
|
|
|
|
|
for (int i = 0; i < num_points - 1; i++) {
|
|
|
|
while (j < s->fft_size / 2 + 1 && j * sample_rate / s->fft_size < points[i + 1][0]) {
|
|
|
|
// linearly interpolate between points
|
|
|
|
int binHz = j * sample_rate / s->fft_size;
|
|
|
|
s->margin_curve[j] = points[i][1] + (binHz - points[i][0]) * (points[i + 1][1] - points[i][1]) / (points[i + 1][0] - points[i][0]);
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// handle bins after the last point
|
|
|
|
while (j < s->fft_size / 2 + 1) {
|
|
|
|
s->margin_curve[j] = points[num_points - 1][1];
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
|
|
|
|
// convert margin curve to linear amplitude scale
|
|
|
|
for (j = 0; j < s->fft_size / 2 + 1; j++)
|
|
|
|
s->margin_curve[j] = powf(10.f, s->margin_curve[j] / 20.f);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void generate_spread_table(AudioPsyClipContext *s)
|
|
|
|
{
|
|
|
|
// Calculate tent-shape function in log-log scale.
|
|
|
|
|
|
|
|
// As an optimization, only consider bins close to "bin"
|
|
|
|
// This reduces the number of multiplications needed in calculate_mask_curve
|
|
|
|
// The masking contribution at faraway bins is negligeable
|
|
|
|
|
|
|
|
// Another optimization to save memory and speed up the calculation of the
|
|
|
|
// spread table is to calculate and store only 2 spread functions per
|
|
|
|
// octave, and reuse the same spread function for multiple bins.
|
|
|
|
int table_index = 0;
|
|
|
|
int bin = 0;
|
|
|
|
int increment = 1;
|
|
|
|
|
|
|
|
while (bin < s->num_psy_bins) {
|
|
|
|
float sum = 0;
|
|
|
|
int base_idx = table_index * s->num_psy_bins;
|
|
|
|
int start_bin = bin * 3 / 4;
|
|
|
|
int end_bin = FFMIN(s->num_psy_bins, ((bin + 1) * 4 + 2) / 3);
|
|
|
|
int next_bin;
|
|
|
|
|
|
|
|
for (int j = start_bin; j < end_bin; j++) {
|
|
|
|
// add 0.5 so i=0 doesn't get log(0)
|
|
|
|
float rel_idx_log = FFABS(logf((j + 0.5f) / (bin + 0.5f)));
|
|
|
|
float value;
|
|
|
|
if (j >= bin) {
|
|
|
|
// mask up
|
|
|
|
value = expf(-rel_idx_log * 40.f);
|
|
|
|
} else {
|
|
|
|
// mask down
|
|
|
|
value = expf(-rel_idx_log * 80.f);
|
|
|
|
}
|
|
|
|
// the spreading function is centred in the row
|
|
|
|
sum += value;
|
|
|
|
s->spread_table[base_idx + s->num_psy_bins / 2 + j - bin] = value;
|
|
|
|
}
|
|
|
|
// now normalize it
|
|
|
|
for (int j = start_bin; j < end_bin; j++) {
|
|
|
|
s->spread_table[base_idx + s->num_psy_bins / 2 + j - bin] /= sum;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->spread_table_range[table_index][0] = start_bin - bin;
|
|
|
|
s->spread_table_range[table_index][1] = end_bin - bin;
|
|
|
|
|
|
|
|
if (bin <= 1) {
|
|
|
|
next_bin = bin + 1;
|
|
|
|
} else {
|
|
|
|
if ((bin & (bin - 1)) == 0) {
|
|
|
|
// power of 2
|
|
|
|
increment = bin / 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
next_bin = bin + increment;
|
|
|
|
}
|
|
|
|
|
|
|
|
// set bins between "bin" and "next_bin" to use this table_index
|
|
|
|
for (int i = bin; i < next_bin; i++)
|
|
|
|
s->spread_table_index[i] = table_index;
|
|
|
|
|
|
|
|
bin = next_bin;
|
|
|
|
table_index++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int config_input(AVFilterLink *inlink)
|
|
|
|
{
|
|
|
|
AVFilterContext *ctx = inlink->dst;
|
|
|
|
AudioPsyClipContext *s = ctx->priv;
|
2022-01-29 03:15:54 +02:00
|
|
|
static const int points[][2] = { {0,14}, {125,14}, {250,16}, {500,18}, {1000,20}, {2000,20}, {4000,20}, {8000,17}, {16000,14}, {20000,-10} };
|
2021-07-18 10:20:33 +02:00
|
|
|
static const int num_points = 10;
|
2022-11-19 01:04:06 +02:00
|
|
|
float scale = 1.f;
|
2021-07-18 10:20:33 +02:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
s->fft_size = inlink->sample_rate > 100000 ? 1024 : inlink->sample_rate > 50000 ? 512 : 256;
|
|
|
|
s->overlap = s->fft_size / 4;
|
|
|
|
|
|
|
|
// The psy masking calculation is O(n^2),
|
|
|
|
// so skip it for frequencies not covered by base sampling rantes (i.e. 44k)
|
|
|
|
if (inlink->sample_rate <= 50000) {
|
|
|
|
s->num_psy_bins = s->fft_size / 2;
|
|
|
|
} else if (inlink->sample_rate <= 100000) {
|
|
|
|
s->num_psy_bins = s->fft_size / 4;
|
|
|
|
} else {
|
|
|
|
s->num_psy_bins = s->fft_size / 8;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->window = av_calloc(s->fft_size, sizeof(*s->window));
|
|
|
|
s->inv_window = av_calloc(s->fft_size, sizeof(*s->inv_window));
|
|
|
|
if (!s->window || !s->inv_window)
|
|
|
|
return AVERROR(ENOMEM);
|
|
|
|
|
|
|
|
s->in_buffer = ff_get_audio_buffer(inlink, s->fft_size * 2);
|
|
|
|
s->in_frame = ff_get_audio_buffer(inlink, s->fft_size * 2);
|
|
|
|
s->out_dist_frame = ff_get_audio_buffer(inlink, s->fft_size * 2);
|
|
|
|
s->windowed_frame = ff_get_audio_buffer(inlink, s->fft_size * 2);
|
|
|
|
s->clipping_delta = ff_get_audio_buffer(inlink, s->fft_size * 2);
|
|
|
|
s->spectrum_buf = ff_get_audio_buffer(inlink, s->fft_size * 2);
|
|
|
|
s->mask_curve = ff_get_audio_buffer(inlink, s->fft_size / 2 + 1);
|
|
|
|
if (!s->in_buffer || !s->in_frame ||
|
|
|
|
!s->out_dist_frame || !s->windowed_frame ||
|
|
|
|
!s->clipping_delta || !s->spectrum_buf || !s->mask_curve)
|
|
|
|
return AVERROR(ENOMEM);
|
|
|
|
|
|
|
|
generate_hann_window(s->window, s->inv_window, s->fft_size);
|
|
|
|
|
|
|
|
s->margin_curve = av_calloc(s->fft_size / 2 + 1, sizeof(*s->margin_curve));
|
|
|
|
if (!s->margin_curve)
|
|
|
|
return AVERROR(ENOMEM);
|
|
|
|
|
|
|
|
s->spread_table_rows = av_log2(s->num_psy_bins) * 2;
|
|
|
|
s->spread_table = av_calloc(s->spread_table_rows * s->num_psy_bins, sizeof(*s->spread_table));
|
|
|
|
if (!s->spread_table)
|
|
|
|
return AVERROR(ENOMEM);
|
|
|
|
|
|
|
|
s->spread_table_range = av_calloc(s->spread_table_rows * 2, sizeof(*s->spread_table_range));
|
|
|
|
if (!s->spread_table_range)
|
|
|
|
return AVERROR(ENOMEM);
|
|
|
|
|
|
|
|
s->spread_table_index = av_calloc(s->num_psy_bins, sizeof(*s->spread_table_index));
|
|
|
|
if (!s->spread_table_index)
|
|
|
|
return AVERROR(ENOMEM);
|
|
|
|
|
|
|
|
set_margin_curve(s, points, num_points, inlink->sample_rate);
|
|
|
|
|
|
|
|
generate_spread_table(s);
|
|
|
|
|
2021-08-31 16:03:14 +02:00
|
|
|
s->channels = inlink->ch_layout.nb_channels;
|
2021-07-18 10:20:33 +02:00
|
|
|
|
|
|
|
s->tx_ctx = av_calloc(s->channels, sizeof(*s->tx_ctx));
|
|
|
|
s->itx_ctx = av_calloc(s->channels, sizeof(*s->itx_ctx));
|
|
|
|
if (!s->tx_ctx || !s->itx_ctx)
|
|
|
|
return AVERROR(ENOMEM);
|
|
|
|
|
|
|
|
for (int ch = 0; ch < s->channels; ch++) {
|
|
|
|
ret = av_tx_init(&s->tx_ctx[ch], &s->tx_fn, AV_TX_FLOAT_FFT, 0, s->fft_size, &scale, 0);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
ret = av_tx_init(&s->itx_ctx[ch], &s->itx_fn, AV_TX_FLOAT_FFT, 1, s->fft_size, &scale, 0);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void apply_window(AudioPsyClipContext *s,
|
|
|
|
const float *in_frame, float *out_frame, const int add_to_out_frame)
|
|
|
|
{
|
|
|
|
const float *window = s->window;
|
|
|
|
|
|
|
|
for (int i = 0; i < s->fft_size; i++) {
|
|
|
|
if (add_to_out_frame) {
|
|
|
|
out_frame[i] += in_frame[i] * window[i];
|
|
|
|
} else {
|
|
|
|
out_frame[i] = in_frame[i] * window[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void calculate_mask_curve(AudioPsyClipContext *s,
|
|
|
|
const float *spectrum, float *mask_curve)
|
|
|
|
{
|
|
|
|
for (int i = 0; i < s->fft_size / 2 + 1; i++)
|
|
|
|
mask_curve[i] = 0;
|
|
|
|
|
|
|
|
for (int i = 0; i < s->num_psy_bins; i++) {
|
2021-09-11 21:45:08 +02:00
|
|
|
int base_idx, start_bin, end_bin, table_idx;
|
2021-07-18 10:20:33 +02:00
|
|
|
float magnitude;
|
|
|
|
int range[2];
|
|
|
|
|
|
|
|
if (i == 0) {
|
|
|
|
magnitude = FFABS(spectrum[0]);
|
|
|
|
} else if (i == s->fft_size / 2) {
|
2022-01-29 03:08:27 +02:00
|
|
|
magnitude = FFABS(spectrum[s->fft_size]);
|
2021-07-18 10:20:33 +02:00
|
|
|
} else {
|
2022-01-29 03:08:27 +02:00
|
|
|
// Because the input signal is real, the + and - frequencies are redundant.
|
2021-07-18 10:20:33 +02:00
|
|
|
// Multiply the magnitude by 2 to simulate adding up the + and - frequencies.
|
|
|
|
magnitude = hypotf(spectrum[2 * i], spectrum[2 * i + 1]) * 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
table_idx = s->spread_table_index[i];
|
|
|
|
range[0] = s->spread_table_range[table_idx][0];
|
|
|
|
range[1] = s->spread_table_range[table_idx][1];
|
2021-09-11 21:45:08 +02:00
|
|
|
base_idx = table_idx * s->num_psy_bins;
|
|
|
|
start_bin = FFMAX(0, i + range[0]);
|
|
|
|
end_bin = FFMIN(s->num_psy_bins, i + range[1]);
|
2021-07-18 10:20:33 +02:00
|
|
|
|
|
|
|
for (int j = start_bin; j < end_bin; j++)
|
|
|
|
mask_curve[j] += s->spread_table[base_idx + s->num_psy_bins / 2 + j - i] * magnitude;
|
|
|
|
}
|
|
|
|
|
|
|
|
// for ultrasonic frequencies, skip the O(n^2) spread calculation and just copy the magnitude
|
|
|
|
for (int i = s->num_psy_bins; i < s->fft_size / 2 + 1; i++) {
|
|
|
|
float magnitude;
|
|
|
|
if (i == s->fft_size / 2) {
|
2022-01-29 03:08:27 +02:00
|
|
|
magnitude = FFABS(spectrum[s->fft_size]);
|
2021-07-18 10:20:33 +02:00
|
|
|
} else {
|
2022-01-29 03:08:27 +02:00
|
|
|
// Because the input signal is real, the + and - frequencies are redundant.
|
2021-07-18 10:20:33 +02:00
|
|
|
// Multiply the magnitude by 2 to simulate adding up the + and - frequencies.
|
|
|
|
magnitude = hypotf(spectrum[2 * i], spectrum[2 * i + 1]) * 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
mask_curve[i] = magnitude;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int i = 0; i < s->fft_size / 2 + 1; i++)
|
|
|
|
mask_curve[i] = mask_curve[i] / s->margin_curve[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
static void clip_to_window(AudioPsyClipContext *s,
|
|
|
|
const float *windowed_frame, float *clipping_delta, float delta_boost)
|
|
|
|
{
|
|
|
|
const float *window = s->window;
|
|
|
|
|
|
|
|
for (int i = 0; i < s->fft_size; i++) {
|
|
|
|
const float limit = s->clip_level * window[i];
|
|
|
|
const float effective_value = windowed_frame[i] + clipping_delta[i];
|
|
|
|
|
|
|
|
if (effective_value > limit) {
|
|
|
|
clipping_delta[i] += (limit - effective_value) * delta_boost;
|
|
|
|
} else if (effective_value < -limit) {
|
|
|
|
clipping_delta[i] += (-limit - effective_value) * delta_boost;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void limit_clip_spectrum(AudioPsyClipContext *s,
|
|
|
|
float *clip_spectrum, const float *mask_curve)
|
|
|
|
{
|
|
|
|
// bin 0
|
|
|
|
float relative_distortion_level = FFABS(clip_spectrum[0]) / mask_curve[0];
|
|
|
|
|
|
|
|
if (relative_distortion_level > 1.f)
|
|
|
|
clip_spectrum[0] /= relative_distortion_level;
|
|
|
|
|
|
|
|
// bin 1..N/2-1
|
|
|
|
for (int i = 1; i < s->fft_size / 2; i++) {
|
|
|
|
float real = clip_spectrum[i * 2];
|
|
|
|
float imag = clip_spectrum[i * 2 + 1];
|
2022-01-29 03:08:27 +02:00
|
|
|
// Because the input signal is real, the + and - frequencies are redundant.
|
2021-07-18 10:20:33 +02:00
|
|
|
// Multiply the magnitude by 2 to simulate adding up the + and - frequencies.
|
|
|
|
relative_distortion_level = hypotf(real, imag) * 2 / mask_curve[i];
|
|
|
|
if (relative_distortion_level > 1.0) {
|
|
|
|
clip_spectrum[i * 2] /= relative_distortion_level;
|
|
|
|
clip_spectrum[i * 2 + 1] /= relative_distortion_level;
|
2022-01-29 03:08:27 +02:00
|
|
|
clip_spectrum[s->fft_size * 2 - i * 2] /= relative_distortion_level;
|
|
|
|
clip_spectrum[s->fft_size * 2 - i * 2 + 1] /= relative_distortion_level;
|
2021-07-18 10:20:33 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
// bin N/2
|
2022-01-29 03:08:27 +02:00
|
|
|
relative_distortion_level = FFABS(clip_spectrum[s->fft_size]) / mask_curve[s->fft_size / 2];
|
2021-07-18 10:20:33 +02:00
|
|
|
if (relative_distortion_level > 1.f)
|
2022-01-29 03:08:27 +02:00
|
|
|
clip_spectrum[s->fft_size] /= relative_distortion_level;
|
2021-07-18 10:20:33 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void r2c(float *buffer, int size)
|
|
|
|
{
|
|
|
|
for (int i = size - 1; i >= 0; i--)
|
|
|
|
buffer[2 * i] = buffer[i];
|
|
|
|
|
|
|
|
for (int i = size - 1; i >= 0; i--)
|
|
|
|
buffer[2 * i + 1] = 0.f;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void c2r(float *buffer, int size)
|
|
|
|
{
|
|
|
|
for (int i = 0; i < size; i++)
|
|
|
|
buffer[i] = buffer[2 * i];
|
|
|
|
|
|
|
|
for (int i = 0; i < size; i++)
|
|
|
|
buffer[i + size] = 0.f;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void feed(AVFilterContext *ctx, int ch,
|
|
|
|
const float *in_samples, float *out_samples, int diff_only,
|
|
|
|
float *in_frame, float *out_dist_frame,
|
|
|
|
float *windowed_frame, float *clipping_delta,
|
|
|
|
float *spectrum_buf, float *mask_curve)
|
|
|
|
{
|
|
|
|
AudioPsyClipContext *s = ctx->priv;
|
|
|
|
const float clip_level_inv = 1.f / s->clip_level;
|
|
|
|
const float level_out = s->level_out;
|
|
|
|
float orig_peak = 0;
|
|
|
|
float peak;
|
|
|
|
|
|
|
|
// shift in/out buffers
|
|
|
|
for (int i = 0; i < s->fft_size - s->overlap; i++) {
|
|
|
|
in_frame[i] = in_frame[i + s->overlap];
|
|
|
|
out_dist_frame[i] = out_dist_frame[i + s->overlap];
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int i = 0; i < s->overlap; i++) {
|
|
|
|
in_frame[i + s->fft_size - s->overlap] = in_samples[i];
|
|
|
|
out_dist_frame[i + s->fft_size - s->overlap] = 0.f;
|
|
|
|
}
|
|
|
|
|
|
|
|
apply_window(s, in_frame, windowed_frame, 0);
|
|
|
|
r2c(windowed_frame, s->fft_size);
|
2022-11-19 01:04:06 +02:00
|
|
|
s->tx_fn(s->tx_ctx[ch], spectrum_buf, windowed_frame, sizeof(AVComplexFloat));
|
2021-07-18 10:20:33 +02:00
|
|
|
c2r(windowed_frame, s->fft_size);
|
|
|
|
calculate_mask_curve(s, spectrum_buf, mask_curve);
|
|
|
|
|
|
|
|
// It would be easier to calculate the peak from the unwindowed input.
|
|
|
|
// This is just for consistency with the clipped peak calculateion
|
|
|
|
// because the inv_window zeros out samples on the edge of the window.
|
|
|
|
for (int i = 0; i < s->fft_size; i++)
|
|
|
|
orig_peak = FFMAX(orig_peak, FFABS(windowed_frame[i] * s->inv_window[i]));
|
|
|
|
orig_peak *= clip_level_inv;
|
|
|
|
peak = orig_peak;
|
|
|
|
|
|
|
|
// clear clipping_delta
|
|
|
|
for (int i = 0; i < s->fft_size * 2; i++)
|
|
|
|
clipping_delta[i] = 0.f;
|
|
|
|
|
|
|
|
// repeat clipping-filtering process a few times to control both the peaks and the spectrum
|
|
|
|
for (int i = 0; i < s->iterations; i++) {
|
|
|
|
float mask_curve_shift = 1.122f; // 1.122 is 1dB
|
|
|
|
// The last 1/3 of rounds have boosted delta to help reach the peak target faster
|
|
|
|
float delta_boost = 1.f;
|
|
|
|
if (i >= s->iterations - s->iterations / 3) {
|
|
|
|
// boosting the delta when largs peaks are still present is dangerous
|
|
|
|
if (peak < 2.f)
|
|
|
|
delta_boost = 2.f;
|
|
|
|
}
|
|
|
|
|
|
|
|
clip_to_window(s, windowed_frame, clipping_delta, delta_boost);
|
|
|
|
|
|
|
|
r2c(clipping_delta, s->fft_size);
|
2022-11-19 01:04:06 +02:00
|
|
|
s->tx_fn(s->tx_ctx[ch], spectrum_buf, clipping_delta, sizeof(AVComplexFloat));
|
2021-07-18 10:20:33 +02:00
|
|
|
|
|
|
|
limit_clip_spectrum(s, spectrum_buf, mask_curve);
|
|
|
|
|
2022-11-19 01:04:06 +02:00
|
|
|
s->itx_fn(s->itx_ctx[ch], clipping_delta, spectrum_buf, sizeof(AVComplexFloat));
|
2021-07-18 10:20:33 +02:00
|
|
|
c2r(clipping_delta, s->fft_size);
|
|
|
|
|
|
|
|
for (int i = 0; i < s->fft_size; i++)
|
|
|
|
clipping_delta[i] /= s->fft_size;
|
|
|
|
|
|
|
|
peak = 0;
|
|
|
|
for (int i = 0; i < s->fft_size; i++)
|
|
|
|
peak = FFMAX(peak, FFABS((windowed_frame[i] + clipping_delta[i]) * s->inv_window[i]));
|
|
|
|
peak *= clip_level_inv;
|
|
|
|
|
|
|
|
// Automatically adjust mask_curve as necessary to reach peak target
|
|
|
|
if (orig_peak > 1.f && peak > 1.f) {
|
|
|
|
float diff_achieved = orig_peak - peak;
|
|
|
|
if (i + 1 < s->iterations - s->iterations / 3 && diff_achieved > 0) {
|
|
|
|
float diff_needed = orig_peak - 1.f;
|
|
|
|
float diff_ratio = diff_needed / diff_achieved;
|
|
|
|
// If a good amount of peak reduction was already achieved,
|
|
|
|
// don't shift the mask_curve by the full peak value
|
|
|
|
// On the other hand, if only a little peak reduction was achieved,
|
|
|
|
// don't shift the mask_curve by the enormous diff_ratio.
|
|
|
|
diff_ratio = FFMIN(diff_ratio, peak);
|
|
|
|
mask_curve_shift = FFMAX(mask_curve_shift, diff_ratio);
|
|
|
|
} else {
|
|
|
|
// If the peak got higher than the input or we are in the last 1/3 rounds,
|
|
|
|
// go back to the heavy-handed peak heuristic.
|
|
|
|
mask_curve_shift = FFMAX(mask_curve_shift, peak);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
mask_curve_shift = 1.f + (mask_curve_shift - 1.f) * s->adaptive;
|
|
|
|
|
|
|
|
// Be less strict in the next iteration.
|
|
|
|
// This helps with peak control.
|
|
|
|
for (int i = 0; i < s->fft_size / 2 + 1; i++)
|
|
|
|
mask_curve[i] *= mask_curve_shift;
|
|
|
|
}
|
|
|
|
|
|
|
|
// do overlap & add
|
|
|
|
apply_window(s, clipping_delta, out_dist_frame, 1);
|
|
|
|
|
|
|
|
for (int i = 0; i < s->overlap; i++) {
|
|
|
|
// 4 times overlap with squared hanning window results in 1.5 time increase in amplitude
|
|
|
|
if (!ctx->is_disabled) {
|
|
|
|
out_samples[i] = out_dist_frame[i] / 1.5f;
|
|
|
|
if (!diff_only)
|
|
|
|
out_samples[i] += in_frame[i];
|
|
|
|
if (s->auto_level)
|
|
|
|
out_samples[i] *= clip_level_inv;
|
|
|
|
out_samples[i] *= level_out;
|
|
|
|
} else {
|
|
|
|
out_samples[i] = in_frame[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int psy_channel(AVFilterContext *ctx, AVFrame *in, AVFrame *out, int ch)
|
|
|
|
{
|
|
|
|
AudioPsyClipContext *s = ctx->priv;
|
|
|
|
const float *src = (const float *)in->extended_data[ch];
|
|
|
|
float *in_buffer = (float *)s->in_buffer->extended_data[ch];
|
|
|
|
float *dst = (float *)out->extended_data[ch];
|
|
|
|
|
|
|
|
for (int n = 0; n < s->overlap; n++)
|
|
|
|
in_buffer[n] = src[n] * s->level_in;
|
|
|
|
|
|
|
|
feed(ctx, ch, in_buffer, dst, s->diff_only,
|
|
|
|
(float *)(s->in_frame->extended_data[ch]),
|
|
|
|
(float *)(s->out_dist_frame->extended_data[ch]),
|
|
|
|
(float *)(s->windowed_frame->extended_data[ch]),
|
|
|
|
(float *)(s->clipping_delta->extended_data[ch]),
|
|
|
|
(float *)(s->spectrum_buf->extended_data[ch]),
|
|
|
|
(float *)(s->mask_curve->extended_data[ch]));
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int psy_channels(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
|
|
|
|
{
|
|
|
|
AudioPsyClipContext *s = ctx->priv;
|
|
|
|
AVFrame *out = arg;
|
2021-08-31 16:03:14 +02:00
|
|
|
const int start = (out->ch_layout.nb_channels * jobnr) / nb_jobs;
|
|
|
|
const int end = (out->ch_layout.nb_channels * (jobnr+1)) / nb_jobs;
|
2021-07-18 10:20:33 +02:00
|
|
|
|
|
|
|
for (int ch = start; ch < end; ch++)
|
|
|
|
psy_channel(ctx, s->in, out, ch);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
|
|
|
|
{
|
|
|
|
AVFilterContext *ctx = inlink->dst;
|
|
|
|
AVFilterLink *outlink = ctx->outputs[0];
|
|
|
|
AudioPsyClipContext *s = ctx->priv;
|
|
|
|
AVFrame *out;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
out = ff_get_audio_buffer(outlink, s->overlap);
|
|
|
|
if (!out) {
|
|
|
|
ret = AVERROR(ENOMEM);
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->in = in;
|
2022-12-01 20:47:24 +02:00
|
|
|
av_frame_copy_props(out, in);
|
2021-07-18 10:20:33 +02:00
|
|
|
ff_filter_execute(ctx, psy_channels, out, NULL,
|
2021-08-31 16:03:14 +02:00
|
|
|
FFMIN(outlink->ch_layout.nb_channels, ff_filter_get_nb_threads(ctx)));
|
2021-07-18 10:20:33 +02:00
|
|
|
|
|
|
|
out->pts = in->pts;
|
|
|
|
out->nb_samples = in->nb_samples;
|
|
|
|
ret = ff_filter_frame(outlink, out);
|
|
|
|
fail:
|
|
|
|
av_frame_free(&in);
|
|
|
|
s->in = NULL;
|
|
|
|
return ret < 0 ? ret : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int activate(AVFilterContext *ctx)
|
|
|
|
{
|
|
|
|
AVFilterLink *inlink = ctx->inputs[0];
|
|
|
|
AVFilterLink *outlink = ctx->outputs[0];
|
|
|
|
AudioPsyClipContext *s = ctx->priv;
|
|
|
|
AVFrame *in = NULL;
|
|
|
|
int ret = 0, status;
|
|
|
|
int64_t pts;
|
|
|
|
|
|
|
|
FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
|
|
|
|
|
|
|
|
ret = ff_inlink_consume_samples(inlink, s->overlap, s->overlap, &in);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
if (ret > 0) {
|
|
|
|
return filter_frame(inlink, in);
|
|
|
|
} else if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
|
|
|
|
ff_outlink_set_status(outlink, status, pts);
|
|
|
|
return 0;
|
|
|
|
} else {
|
|
|
|
if (ff_inlink_queued_samples(inlink) >= s->overlap) {
|
|
|
|
ff_filter_set_ready(ctx, 10);
|
|
|
|
} else if (ff_outlink_frame_wanted(outlink)) {
|
|
|
|
ff_inlink_request_frame(inlink);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static av_cold void uninit(AVFilterContext *ctx)
|
|
|
|
{
|
|
|
|
AudioPsyClipContext *s = ctx->priv;
|
|
|
|
|
|
|
|
av_freep(&s->window);
|
|
|
|
av_freep(&s->inv_window);
|
|
|
|
av_freep(&s->spread_table);
|
|
|
|
av_freep(&s->spread_table_range);
|
|
|
|
av_freep(&s->spread_table_index);
|
|
|
|
av_freep(&s->margin_curve);
|
|
|
|
|
|
|
|
av_frame_free(&s->in_buffer);
|
|
|
|
av_frame_free(&s->in_frame);
|
|
|
|
av_frame_free(&s->out_dist_frame);
|
|
|
|
av_frame_free(&s->windowed_frame);
|
|
|
|
av_frame_free(&s->clipping_delta);
|
|
|
|
av_frame_free(&s->spectrum_buf);
|
|
|
|
av_frame_free(&s->mask_curve);
|
|
|
|
|
|
|
|
for (int ch = 0; ch < s->channels; ch++) {
|
|
|
|
if (s->tx_ctx)
|
|
|
|
av_tx_uninit(&s->tx_ctx[ch]);
|
|
|
|
if (s->itx_ctx)
|
|
|
|
av_tx_uninit(&s->itx_ctx[ch]);
|
|
|
|
}
|
|
|
|
|
|
|
|
av_freep(&s->tx_ctx);
|
|
|
|
av_freep(&s->itx_ctx);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const AVFilterPad inputs[] = {
|
|
|
|
{
|
|
|
|
.name = "default",
|
|
|
|
.type = AVMEDIA_TYPE_AUDIO,
|
|
|
|
.config_props = config_input,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
|
|
|
static const AVFilterPad outputs[] = {
|
|
|
|
{
|
|
|
|
.name = "default",
|
|
|
|
.type = AVMEDIA_TYPE_AUDIO,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
|
|
|
const AVFilter ff_af_apsyclip = {
|
|
|
|
.name = "apsyclip",
|
|
|
|
.description = NULL_IF_CONFIG_SMALL("Audio Psychoacoustic Clipper."),
|
|
|
|
.priv_size = sizeof(AudioPsyClipContext),
|
|
|
|
.priv_class = &apsyclip_class,
|
|
|
|
.uninit = uninit,
|
|
|
|
FILTER_INPUTS(inputs),
|
|
|
|
FILTER_OUTPUTS(outputs),
|
2021-09-27 13:10:11 +02:00
|
|
|
FILTER_SINGLE_SAMPLEFMT(AV_SAMPLE_FMT_FLTP),
|
2021-07-18 10:20:33 +02:00
|
|
|
.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
|
|
|
|
AVFILTER_FLAG_SLICE_THREADS,
|
|
|
|
.activate = activate,
|
|
|
|
.process_command = ff_filter_process_command,
|
|
|
|
};
|