mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
avfilter: add loudnorm
Signed-off-by: Kyle Swanson <k@ylo.ph>
This commit is contained in:
parent
42ee137a0a
commit
c0c378009b
@ -35,6 +35,7 @@ version <next>:
|
||||
- Generic OpenMAX IL encoder with support for Raspberry Pi
|
||||
- IFF ANIM demuxer & decoder
|
||||
- Direct Stream Transfer (DST) decoder
|
||||
- loudnorm filter
|
||||
|
||||
version 3.0:
|
||||
- Common Encryption (CENC) MP4 encoding and decoding support
|
||||
|
@ -358,6 +358,7 @@ Filters:
|
||||
af_compand.c Paul B Mahol
|
||||
af_firequalizer.c Muhammad Faiz
|
||||
af_ladspa.c Paul B Mahol
|
||||
af_loudnorm.c Kyle Swanson
|
||||
af_pan.c Nicolas George
|
||||
af_sidechaincompress.c Paul B Mahol
|
||||
af_silenceremove.c Paul B Mahol
|
||||
|
5
configure
vendored
5
configure
vendored
@ -226,6 +226,8 @@ External library support:
|
||||
--enable-libcdio enable audio CD grabbing with libcdio [no]
|
||||
--enable-libdc1394 enable IIDC-1394 grabbing using libdc1394
|
||||
and libraw1394 [no]
|
||||
--enable-libebur128 enable libebur128 for EBU R128 measurement,
|
||||
needed for loudnorm filter [no]
|
||||
--enable-libfaac enable AAC encoding via libfaac [no]
|
||||
--enable-libfdk-aac enable AAC de/encoding via libfdk-aac [no]
|
||||
--enable-libflite enable flite (voice synthesis) support via libflite [no]
|
||||
@ -1472,6 +1474,7 @@ EXTERNAL_LIBRARY_LIST="
|
||||
libcdio
|
||||
libcelt
|
||||
libdc1394
|
||||
libebur128
|
||||
libfaac
|
||||
libfdk_aac
|
||||
libflite
|
||||
@ -2987,6 +2990,7 @@ hqdn3d_filter_deps="gpl"
|
||||
interlace_filter_deps="gpl"
|
||||
kerndeint_filter_deps="gpl"
|
||||
ladspa_filter_deps="ladspa dlopen"
|
||||
loudnorm_filter_deps="libebur128"
|
||||
mcdeint_filter_deps="avcodec gpl"
|
||||
movie_filter_deps="avcodec avformat"
|
||||
mpdecimate_filter_deps="gpl"
|
||||
@ -5593,6 +5597,7 @@ enabled libcelt && require libcelt celt/celt.h celt_decode -lcelt0 &&
|
||||
{ check_lib celt/celt.h celt_decoder_create_custom -lcelt0 ||
|
||||
die "ERROR: libcelt must be installed and version must be >= 0.11.0."; }
|
||||
enabled libcaca && require_pkg_config caca caca.h caca_create_canvas
|
||||
enabled libebur128 && require ebur128 ebur128.h ebur128_relative_threshold -lebur128
|
||||
enabled libfaac && require2 libfaac "stdint.h faac.h" faacEncGetVersion -lfaac
|
||||
enabled libfdk_aac && { use_pkg_config fdk-aac "fdk-aac/aacenc_lib.h" aacEncOpen ||
|
||||
{ require libfdk_aac fdk-aac/aacenc_lib.h aacEncOpen -lfdk-aac &&
|
||||
|
@ -2711,6 +2711,61 @@ Modify the @var{N}-th control value.
|
||||
If the specified value is not valid, it is ignored and prior one is kept.
|
||||
@end table
|
||||
|
||||
@section loudnorm
|
||||
|
||||
EBU R128 loudness normalization. Includes both dynamic and linear normalization modes.
|
||||
Support for both single pass (livestreams, files) and double pass (files) modes.
|
||||
This algorithm can target IL, LRA, and maximum true peak.
|
||||
|
||||
To enable compilation of this filter you need to configure FFmpeg with
|
||||
@code{--enable-libebur128}.
|
||||
|
||||
The filter accepts the following options:
|
||||
|
||||
@table @option
|
||||
@item I, i
|
||||
Set integrated loudness target.
|
||||
Range is -70.0 - -5.0. Default value is -24.0.
|
||||
|
||||
@item LRA, lra
|
||||
Set loudness range target.
|
||||
Range is 1.0 - 20.0. Default value is 7.0.
|
||||
|
||||
@item TP, tp
|
||||
Set maximum true peak.
|
||||
Range is -9.0 - +0.0. Default value is -2.0.
|
||||
|
||||
@item measured_I, measured_i
|
||||
Measured IL of input file.
|
||||
Range is -99.0 - +0.0.
|
||||
|
||||
@item measured_LRA, measured_lra
|
||||
Measured LRA of input file.
|
||||
Range is 0.0 - 99.0.
|
||||
|
||||
@item measured_TP, measured_tp
|
||||
Measured true peak of input file.
|
||||
Range is -99.0 - +99.0.
|
||||
|
||||
@item measured_thresh
|
||||
Measured threshold of input file.
|
||||
Range is -99.0 - +0.0.
|
||||
|
||||
@item offset
|
||||
Set offset gain. Gain is applied before the true-peak limiter.
|
||||
Range is -99.0 - +99.0. Default is +0.0.
|
||||
|
||||
@item linear
|
||||
Normalize linearly if possible.
|
||||
measured_I, measured_LRA, measured_TP, and measured_thresh must also
|
||||
to be specified in order to use this mode.
|
||||
Options are true or false. Default is true.
|
||||
|
||||
@item print_format
|
||||
Set print format for stats. Options are summary, json, or none.
|
||||
Default value is none.
|
||||
@end table
|
||||
|
||||
@section lowpass
|
||||
|
||||
Apply a low-pass filter with 3dB point frequency.
|
||||
|
@ -89,6 +89,7 @@ OBJS-$(CONFIG_FLANGER_FILTER) += af_flanger.o generate_wave_table
|
||||
OBJS-$(CONFIG_HIGHPASS_FILTER) += af_biquads.o
|
||||
OBJS-$(CONFIG_JOIN_FILTER) += af_join.o
|
||||
OBJS-$(CONFIG_LADSPA_FILTER) += af_ladspa.o
|
||||
OBJS-$(CONFIG_LOUDNORM_FILTER) += af_loudnorm.o
|
||||
OBJS-$(CONFIG_LOWPASS_FILTER) += af_biquads.o
|
||||
OBJS-$(CONFIG_PAN_FILTER) += af_pan.o
|
||||
OBJS-$(CONFIG_REPLAYGAIN_FILTER) += af_replaygain.o
|
||||
|
907
libavfilter/af_loudnorm.c
Normal file
907
libavfilter/af_loudnorm.c
Normal file
@ -0,0 +1,907 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Kyle Swanson <k@ylo.ph>.
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/* http://k.ylo.ph/2016/04/04/loudnorm.html */
|
||||
|
||||
#include "libavutil/opt.h"
|
||||
#include "avfilter.h"
|
||||
#include "internal.h"
|
||||
#include "audio.h"
|
||||
#include <ebur128.h>
|
||||
|
||||
enum FrameType {
|
||||
FIRST_FRAME,
|
||||
INNER_FRAME,
|
||||
FINAL_FRAME,
|
||||
LINEAR_MODE,
|
||||
FRAME_NB
|
||||
};
|
||||
|
||||
enum LimiterState {
|
||||
OUT,
|
||||
ATTACK,
|
||||
SUSTAIN,
|
||||
RELEASE,
|
||||
STATE_NB
|
||||
};
|
||||
|
||||
enum PrintFormat {
|
||||
NONE,
|
||||
JSON,
|
||||
SUMMARY,
|
||||
PF_NB
|
||||
};
|
||||
|
||||
typedef struct LoudNormContext {
|
||||
const AVClass *class;
|
||||
double target_i;
|
||||
double target_lra;
|
||||
double target_tp;
|
||||
double measured_i;
|
||||
double measured_lra;
|
||||
double measured_tp;
|
||||
double measured_thresh;
|
||||
double offset;
|
||||
int linear;
|
||||
enum PrintFormat print_format;
|
||||
|
||||
double *buf;
|
||||
int buf_size;
|
||||
int buf_index;
|
||||
int prev_buf_index;
|
||||
|
||||
double delta[30];
|
||||
double weights[21];
|
||||
double prev_delta;
|
||||
int index;
|
||||
|
||||
double gain_reduction[2];
|
||||
double *limiter_buf;
|
||||
double *prev_smp;
|
||||
int limiter_buf_index;
|
||||
int limiter_buf_size;
|
||||
enum LimiterState limiter_state;
|
||||
int peak_index;
|
||||
int env_index;
|
||||
int env_cnt;
|
||||
int attack_length;
|
||||
int release_length;
|
||||
|
||||
int64_t pts;
|
||||
enum FrameType frame_type;
|
||||
int above_threshold;
|
||||
int prev_nb_samples;
|
||||
int channels;
|
||||
|
||||
ebur128_state *r128_in;
|
||||
ebur128_state *r128_out;
|
||||
} LoudNormContext;
|
||||
|
||||
#define OFFSET(x) offsetof(LoudNormContext, x)
|
||||
#define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
|
||||
|
||||
static const AVOption loudnorm_options[] = {
|
||||
{ "I", "set integrated loudness target", OFFSET(target_i), AV_OPT_TYPE_DOUBLE, {.dbl = -24.}, -70., -5., FLAGS },
|
||||
{ "i", "set integrated loudness target", OFFSET(target_i), AV_OPT_TYPE_DOUBLE, {.dbl = -24.}, -70., -5., FLAGS },
|
||||
{ "LRA", "set loudness range target", OFFSET(target_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 7.}, 1., 20., FLAGS },
|
||||
{ "lra", "set loudness range target", OFFSET(target_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 7.}, 1., 20., FLAGS },
|
||||
{ "TP", "set maximum true peak", OFFSET(target_tp), AV_OPT_TYPE_DOUBLE, {.dbl = -2.}, -9., 0., FLAGS },
|
||||
{ "tp", "set maximum true peak", OFFSET(target_tp), AV_OPT_TYPE_DOUBLE, {.dbl = -2.}, -9., 0., FLAGS },
|
||||
{ "measured_I", "measured IL of input file", OFFSET(measured_i), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 0., FLAGS },
|
||||
{ "measured_i", "measured IL of input file", OFFSET(measured_i), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 0., FLAGS },
|
||||
{ "measured_LRA", "measured LRA of input file", OFFSET(measured_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, 0., 99., FLAGS },
|
||||
{ "measured_lra", "measured LRA of input file", OFFSET(measured_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, 0., 99., FLAGS },
|
||||
{ "measured_TP", "measured true peak of input file", OFFSET(measured_tp), AV_OPT_TYPE_DOUBLE, {.dbl = 99.}, -99., 99., FLAGS },
|
||||
{ "measured_tp", "measured true peak of input file", OFFSET(measured_tp), AV_OPT_TYPE_DOUBLE, {.dbl = 99.}, -99., 99., FLAGS },
|
||||
{ "measured_thresh", "measured threshold of input file", OFFSET(measured_thresh), AV_OPT_TYPE_DOUBLE, {.dbl = -70.}, -99., 0., FLAGS },
|
||||
{ "offset", "set offset gain", OFFSET(offset), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 99., FLAGS },
|
||||
{ "linear", "normalize linearly if possible", OFFSET(linear), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS },
|
||||
{ "print_format", "set print format for stats", OFFSET(print_format), AV_OPT_TYPE_INT, {.i64 = NONE}, NONE, PF_NB -1, FLAGS, "print_format" },
|
||||
{ "none", 0, 0, AV_OPT_TYPE_CONST, {.i64 = NONE}, 0, 0, FLAGS, "print_format" },
|
||||
{ "json", 0, 0, AV_OPT_TYPE_CONST, {.i64 = JSON}, 0, 0, FLAGS, "print_format" },
|
||||
{ "summary", 0, 0, AV_OPT_TYPE_CONST, {.i64 = SUMMARY}, 0, 0, FLAGS, "print_format" },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
AVFILTER_DEFINE_CLASS(loudnorm);
|
||||
|
||||
static inline int frame_size(int sample_rate, int frame_len_msec)
|
||||
{
|
||||
const int frame_size = round((double)sample_rate * (frame_len_msec / 1000.0));
|
||||
return frame_size + (frame_size % 2);
|
||||
}
|
||||
|
||||
static void init_gaussian_filter(LoudNormContext *s)
|
||||
{
|
||||
double total_weight = 0.0;
|
||||
const double sigma = 3.5;
|
||||
double adjust;
|
||||
int i;
|
||||
|
||||
const int offset = 21 / 2;
|
||||
const double c1 = 1.0 / (sigma * sqrt(2.0 * M_PI));
|
||||
const double c2 = 2.0 * pow(sigma, 2.0);
|
||||
|
||||
for (i = 0; i < 21; i++) {
|
||||
const int x = i - offset;
|
||||
s->weights[i] = c1 * exp(-(pow(x, 2.0) / c2));
|
||||
total_weight += s->weights[i];
|
||||
}
|
||||
|
||||
adjust = 1.0 / total_weight;
|
||||
for (i = 0; i < 21; i++)
|
||||
s->weights[i] *= adjust;
|
||||
}
|
||||
|
||||
static double gaussian_filter(LoudNormContext *s, int index)
|
||||
{
|
||||
double result = 0.;
|
||||
int i;
|
||||
|
||||
index = index - 10 > 0 ? index - 10 : index + 20;
|
||||
for (i = 0; i < 21; i++)
|
||||
result += s->delta[((index + i) < 30) ? (index + i) : (index + i - 30)] * s->weights[i];
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void detect_peak(LoudNormContext *s, int offset, int nb_samples, int channels, int *peak_delta, double *peak_value)
|
||||
{
|
||||
int n, c, i, index;
|
||||
double ceiling;
|
||||
double *buf;
|
||||
|
||||
*peak_delta = -1;
|
||||
buf = s->limiter_buf;
|
||||
ceiling = s->target_tp;
|
||||
|
||||
index = s->limiter_buf_index + (offset * channels) + (1920 * channels);
|
||||
if (index >= s->limiter_buf_size)
|
||||
index -= s->limiter_buf_size;
|
||||
|
||||
if (s->frame_type == FIRST_FRAME) {
|
||||
for (c = 0; c < channels; c++)
|
||||
s->prev_smp[c] = fabs(buf[index + c - channels]);
|
||||
}
|
||||
|
||||
for (n = 0; n < nb_samples; n++) {
|
||||
for (c = 0; c < channels; c++) {
|
||||
double this, next, max_peak;
|
||||
|
||||
this = fabs(buf[(index + c) < s->limiter_buf_size ? (index + c) : (index + c - s->limiter_buf_size)]);
|
||||
next = fabs(buf[(index + c + channels) < s->limiter_buf_size ? (index + c + channels) : (index + c + channels - s->limiter_buf_size)]);
|
||||
|
||||
if ((s->prev_smp[c] <= this) && (next <= this) && (this > ceiling) && (n > 0)) {
|
||||
int detected;
|
||||
|
||||
detected = 1;
|
||||
for (i = 2; i < 12; i++) {
|
||||
next = fabs(buf[(index + c + (i * channels)) < s->limiter_buf_size ? (index + c + (i * channels)) : (index + c + (i * channels) - s->limiter_buf_size)]);
|
||||
if (next > this) {
|
||||
detected = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!detected)
|
||||
continue;
|
||||
|
||||
for (c = 0; c < channels; c++) {
|
||||
if (c == 0 || fabs(buf[index + c]) > max_peak)
|
||||
max_peak = fabs(buf[index + c]);
|
||||
|
||||
s->prev_smp[c] = fabs(buf[(index + c) < s->limiter_buf_size ? (index + c) : (index + c - s->limiter_buf_size)]);
|
||||
}
|
||||
|
||||
*peak_delta = n;
|
||||
s->peak_index = index;
|
||||
*peak_value = max_peak;
|
||||
return;
|
||||
}
|
||||
|
||||
s->prev_smp[c] = this;
|
||||
}
|
||||
|
||||
index += channels;
|
||||
if (index >= s->limiter_buf_size)
|
||||
index -= s->limiter_buf_size;
|
||||
}
|
||||
}
|
||||
|
||||
static void true_peak_limiter(LoudNormContext *s, double *out, int nb_samples, int channels)
|
||||
{
|
||||
int n, c, index, peak_delta, smp_cnt;
|
||||
double ceiling, peak_value;
|
||||
double *buf;
|
||||
|
||||
buf = s->limiter_buf;
|
||||
ceiling = s->target_tp;
|
||||
index = s->limiter_buf_index;
|
||||
smp_cnt = 0;
|
||||
|
||||
if (s->frame_type == FIRST_FRAME) {
|
||||
double max;
|
||||
|
||||
max = 0.;
|
||||
for (n = 0; n < 1920; n++) {
|
||||
for (c = 0; c < channels; c++) {
|
||||
max = fabs(buf[c]) > max ? fabs(buf[c]) : max;
|
||||
}
|
||||
buf += channels;
|
||||
}
|
||||
|
||||
if (max > ceiling) {
|
||||
s->gain_reduction[1] = ceiling / max;
|
||||
s->limiter_state = SUSTAIN;
|
||||
buf = s->limiter_buf;
|
||||
|
||||
for (n = 0; n < 1920; n++) {
|
||||
for (c = 0; c < channels; c++) {
|
||||
double env;
|
||||
env = s->gain_reduction[1];
|
||||
buf[c] *= env;
|
||||
}
|
||||
buf += channels;
|
||||
}
|
||||
}
|
||||
|
||||
buf = s->limiter_buf;
|
||||
}
|
||||
|
||||
do {
|
||||
|
||||
switch(s->limiter_state) {
|
||||
case OUT:
|
||||
detect_peak(s, smp_cnt, nb_samples - smp_cnt, channels, &peak_delta, &peak_value);
|
||||
if (peak_delta != -1) {
|
||||
s->env_cnt = 0;
|
||||
smp_cnt += (peak_delta - s->attack_length);
|
||||
s->gain_reduction[0] = 1.;
|
||||
s->gain_reduction[1] = ceiling / peak_value;
|
||||
s->limiter_state = ATTACK;
|
||||
|
||||
s->env_index = s->peak_index - (s->attack_length * channels);
|
||||
if (s->env_index < 0)
|
||||
s->env_index += s->limiter_buf_size;
|
||||
|
||||
s->env_index += (s->env_cnt * channels);
|
||||
if (s->env_index > s->limiter_buf_size)
|
||||
s->env_index -= s->limiter_buf_size;
|
||||
|
||||
} else {
|
||||
smp_cnt = nb_samples;
|
||||
}
|
||||
break;
|
||||
|
||||
case ATTACK:
|
||||
for (; s->env_cnt < s->attack_length; s->env_cnt++) {
|
||||
for (c = 0; c < channels; c++) {
|
||||
double env;
|
||||
env = s->gain_reduction[0] - ((double) s->env_cnt / (s->attack_length - 1) * (s->gain_reduction[0] - s->gain_reduction[1]));
|
||||
buf[s->env_index + c] *= env;
|
||||
}
|
||||
|
||||
s->env_index += channels;
|
||||
if (s->env_index >= s->limiter_buf_size)
|
||||
s->env_index -= s->limiter_buf_size;
|
||||
|
||||
smp_cnt++;
|
||||
if (smp_cnt >= nb_samples) {
|
||||
s->env_cnt++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (smp_cnt < nb_samples) {
|
||||
s->env_cnt = 0;
|
||||
s->attack_length = 1920;
|
||||
s->limiter_state = SUSTAIN;
|
||||
}
|
||||
break;
|
||||
|
||||
case SUSTAIN:
|
||||
detect_peak(s, smp_cnt, nb_samples, channels, &peak_delta, &peak_value);
|
||||
if (peak_delta == -1) {
|
||||
s->limiter_state = RELEASE;
|
||||
s->gain_reduction[0] = s->gain_reduction[1];
|
||||
s->gain_reduction[1] = 1.;
|
||||
s->env_cnt = 0;
|
||||
break;
|
||||
} else {
|
||||
double gain_reduction;
|
||||
gain_reduction = ceiling / peak_value;
|
||||
|
||||
if (gain_reduction < s->gain_reduction[1]) {
|
||||
s->limiter_state = ATTACK;
|
||||
|
||||
s->attack_length = peak_delta;
|
||||
if (s->attack_length <= 1)
|
||||
s->attack_length = 2;
|
||||
|
||||
s->gain_reduction[0] = s->gain_reduction[1];
|
||||
s->gain_reduction[1] = gain_reduction;
|
||||
s->env_cnt = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
for (s->env_cnt = 0; s->env_cnt < peak_delta; s->env_cnt++) {
|
||||
for (c = 0; c < channels; c++) {
|
||||
double env;
|
||||
env = s->gain_reduction[1];
|
||||
buf[s->env_index + c] *= env;
|
||||
}
|
||||
|
||||
s->env_index += channels;
|
||||
if (s->env_index >= s->limiter_buf_size)
|
||||
s->env_index -= s->limiter_buf_size;
|
||||
|
||||
smp_cnt++;
|
||||
if (smp_cnt >= nb_samples) {
|
||||
s->env_cnt++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case RELEASE:
|
||||
for (; s->env_cnt < s->release_length; s->env_cnt++) {
|
||||
for (c = 0; c < channels; c++) {
|
||||
double env;
|
||||
env = s->gain_reduction[0] + (((double) s->env_cnt / (s->release_length - 1)) * (s->gain_reduction[1] - s->gain_reduction[0]));
|
||||
buf[s->env_index + c] *= env;
|
||||
}
|
||||
|
||||
s->env_index += channels;
|
||||
if (s->env_index >= s->limiter_buf_size)
|
||||
s->env_index -= s->limiter_buf_size;
|
||||
|
||||
smp_cnt++;
|
||||
if (smp_cnt >= nb_samples) {
|
||||
s->env_cnt++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (smp_cnt < nb_samples) {
|
||||
s->env_cnt = 0;
|
||||
s->limiter_state = OUT;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
} while (smp_cnt < nb_samples);
|
||||
|
||||
for (n = 0; n < nb_samples; n++) {
|
||||
for (c = 0; c < channels; c++) {
|
||||
out[c] = buf[index + c];
|
||||
if (fabs(out[c]) > ceiling) {
|
||||
out[c] = ceiling * (out[c] < 0 ? -1 : 1);
|
||||
}
|
||||
}
|
||||
out += channels;
|
||||
index += channels;
|
||||
if (index >= s->limiter_buf_size)
|
||||
index -= s->limiter_buf_size;
|
||||
}
|
||||
}
|
||||
|
||||
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
|
||||
{
|
||||
AVFilterContext *ctx = inlink->dst;
|
||||
LoudNormContext *s = ctx->priv;
|
||||
AVFilterLink *outlink = ctx->outputs[0];
|
||||
AVFrame *out;
|
||||
const double *src;
|
||||
double *dst;
|
||||
double *buf;
|
||||
double *limiter_buf;
|
||||
int i, n, c, subframe_length, src_index;
|
||||
double gain, gain_next, env_global, env_shortterm,
|
||||
global, shortterm, lra, relative_threshold;
|
||||
|
||||
if (av_frame_is_writable(in)) {
|
||||
out = in;
|
||||
} else {
|
||||
out = ff_get_audio_buffer(inlink, in->nb_samples);
|
||||
if (!out) {
|
||||
av_frame_free(&in);
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
av_frame_copy_props(out, in);
|
||||
}
|
||||
|
||||
out->pts = s->pts;
|
||||
src = (const double *)in->data[0];
|
||||
dst = (double *)out->data[0];
|
||||
buf = s->buf;
|
||||
limiter_buf = s->limiter_buf;
|
||||
|
||||
ebur128_add_frames_double(s->r128_in, src, in->nb_samples);
|
||||
|
||||
if (s->frame_type == FIRST_FRAME && in->nb_samples < frame_size(inlink->sample_rate, 3000)) {
|
||||
double offset, offset_tp, true_peak;
|
||||
|
||||
ebur128_loudness_global(s->r128_in, &global);
|
||||
for (c = 0; c < inlink->channels; c++) {
|
||||
double tmp;
|
||||
ebur128_sample_peak(s->r128_in, c, &tmp);
|
||||
if (c == 0 || tmp > true_peak)
|
||||
true_peak = tmp;
|
||||
}
|
||||
|
||||
offset = s->target_i - global;
|
||||
offset_tp = true_peak + offset;
|
||||
s->offset = offset_tp < s->target_tp ? offset : s->target_tp - true_peak;
|
||||
s->offset = pow(10., s->offset / 20.);
|
||||
s->frame_type = LINEAR_MODE;
|
||||
}
|
||||
|
||||
switch (s->frame_type) {
|
||||
case FIRST_FRAME:
|
||||
for (n = 0; n < in->nb_samples; n++) {
|
||||
for (c = 0; c < inlink->channels; c++) {
|
||||
buf[s->buf_index + c] = src[c];
|
||||
}
|
||||
src += inlink->channels;
|
||||
s->buf_index += inlink->channels;
|
||||
}
|
||||
|
||||
ebur128_loudness_shortterm(s->r128_in, &shortterm);
|
||||
|
||||
if (shortterm < s->measured_thresh) {
|
||||
s->above_threshold = 0;
|
||||
env_shortterm = shortterm <= -70. ? 0. : s->target_i - s->measured_i;
|
||||
} else {
|
||||
s->above_threshold = 1;
|
||||
env_shortterm = shortterm <= -70. ? 0. : s->target_i - shortterm;
|
||||
}
|
||||
|
||||
for (n = 0; n < 30; n++)
|
||||
s->delta[n] = pow(10., env_shortterm / 20.);
|
||||
s->prev_delta = s->delta[s->index];
|
||||
|
||||
s->buf_index =
|
||||
s->limiter_buf_index = 0;
|
||||
|
||||
for (n = 0; n < (s->limiter_buf_size / inlink->channels); n++) {
|
||||
for (c = 0; c < inlink->channels; c++) {
|
||||
limiter_buf[s->limiter_buf_index + c] = buf[s->buf_index + c] * s->delta[s->index] * s->offset;
|
||||
}
|
||||
s->limiter_buf_index += inlink->channels;
|
||||
if (s->limiter_buf_index >= s->limiter_buf_size)
|
||||
s->limiter_buf_index -= s->limiter_buf_size;
|
||||
|
||||
s->buf_index += inlink->channels;
|
||||
}
|
||||
|
||||
subframe_length = frame_size(inlink->sample_rate, 100);
|
||||
true_peak_limiter(s, dst, subframe_length, inlink->channels);
|
||||
ebur128_add_frames_double(s->r128_out, dst, subframe_length);
|
||||
|
||||
s->pts +=
|
||||
out->nb_samples =
|
||||
inlink->min_samples =
|
||||
inlink->max_samples =
|
||||
inlink->partial_buf_size = subframe_length;
|
||||
|
||||
s->frame_type = INNER_FRAME;
|
||||
break;
|
||||
|
||||
case INNER_FRAME:
|
||||
gain = gaussian_filter(s, s->index + 10 < 30 ? s->index + 10 : s->index + 10 - 30);
|
||||
gain_next = gaussian_filter(s, s->index + 11 < 30 ? s->index + 11 : s->index + 11 - 30);
|
||||
|
||||
for (n = 0; n < in->nb_samples; n++) {
|
||||
for (c = 0; c < inlink->channels; c++) {
|
||||
buf[s->prev_buf_index + c] = src[c];
|
||||
limiter_buf[s->limiter_buf_index + c] = buf[s->buf_index + c] * (gain + (((double) n / in->nb_samples) * (gain_next - gain))) * s->offset;
|
||||
}
|
||||
src += inlink->channels;
|
||||
|
||||
s->limiter_buf_index += inlink->channels;
|
||||
if (s->limiter_buf_index >= s->limiter_buf_size)
|
||||
s->limiter_buf_index -= s->limiter_buf_size;
|
||||
|
||||
s->prev_buf_index += inlink->channels;
|
||||
if (s->prev_buf_index >= s->buf_size)
|
||||
s->prev_buf_index -= s->buf_size;
|
||||
|
||||
s->buf_index += inlink->channels;
|
||||
if (s->buf_index >= s->buf_size)
|
||||
s->buf_index -= s->buf_size;
|
||||
}
|
||||
|
||||
subframe_length = (frame_size(inlink->sample_rate, 100) - in->nb_samples) * inlink->channels;
|
||||
s->limiter_buf_index = s->limiter_buf_index + subframe_length < s->limiter_buf_size ? s->limiter_buf_index + subframe_length : s->limiter_buf_index + subframe_length - s->limiter_buf_size;
|
||||
|
||||
true_peak_limiter(s, dst, in->nb_samples, inlink->channels);
|
||||
ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
|
||||
|
||||
ebur128_loudness_range(s->r128_in, &lra);
|
||||
ebur128_loudness_global(s->r128_in, &global);
|
||||
ebur128_loudness_shortterm(s->r128_in, &shortterm);
|
||||
ebur128_relative_threshold(s->r128_in, &relative_threshold);
|
||||
|
||||
if (s->above_threshold == 0) {
|
||||
double shortterm_out;
|
||||
|
||||
if (shortterm > s->measured_thresh)
|
||||
s->prev_delta *= 1.0058;
|
||||
|
||||
ebur128_loudness_shortterm(s->r128_out, &shortterm_out);
|
||||
if (shortterm_out >= s->target_i)
|
||||
s->above_threshold = 1;
|
||||
}
|
||||
|
||||
if (shortterm < relative_threshold || shortterm <= -70. || s->above_threshold == 0) {
|
||||
s->delta[s->index] = s->prev_delta;
|
||||
} else {
|
||||
env_global = fabs(shortterm - global) < (s->target_lra / 2.) ? shortterm - global : (s->target_lra / 2.) * ((shortterm - global) < 0 ? -1 : 1);
|
||||
env_shortterm = s->target_i - shortterm;
|
||||
s->delta[s->index] = pow(10., (env_global + env_shortterm) / 20.);
|
||||
}
|
||||
|
||||
s->prev_delta = s->delta[s->index];
|
||||
s->index++;
|
||||
if (s->index >= 30)
|
||||
s->index -= 30;
|
||||
s->prev_nb_samples = in->nb_samples;
|
||||
s->pts += in->nb_samples;
|
||||
break;
|
||||
|
||||
case FINAL_FRAME:
|
||||
gain = gaussian_filter(s, s->index + 10 < 30 ? s->index + 10 : s->index + 10 - 30);
|
||||
s->limiter_buf_index = 0;
|
||||
src_index = 0;
|
||||
|
||||
for (n = 0; n < s->limiter_buf_size / inlink->channels; n++) {
|
||||
for (c = 0; c < inlink->channels; c++) {
|
||||
s->limiter_buf[s->limiter_buf_index + c] = src[src_index + c] * gain * s->offset;
|
||||
}
|
||||
src_index += inlink->channels;
|
||||
|
||||
s->limiter_buf_index += inlink->channels;
|
||||
if (s->limiter_buf_index >= s->limiter_buf_size)
|
||||
s->limiter_buf_index -= s->limiter_buf_size;
|
||||
}
|
||||
|
||||
subframe_length = frame_size(inlink->sample_rate, 100);
|
||||
for (i = 0; i < in->nb_samples / subframe_length; i++) {
|
||||
true_peak_limiter(s, dst, subframe_length, inlink->channels);
|
||||
|
||||
for (n = 0; n < subframe_length; n++) {
|
||||
for (c = 0; c < inlink->channels; c++) {
|
||||
if (src_index < (in->nb_samples * inlink->channels)) {
|
||||
limiter_buf[s->limiter_buf_index + c] = src[src_index + c] * gain * s->offset;
|
||||
} else {
|
||||
limiter_buf[s->limiter_buf_index + c] = 0.;
|
||||
}
|
||||
}
|
||||
|
||||
if (src_index < (in->nb_samples * inlink->channels))
|
||||
src_index += inlink->channels;
|
||||
|
||||
s->limiter_buf_index += inlink->channels;
|
||||
if (s->limiter_buf_index >= s->limiter_buf_size)
|
||||
s->limiter_buf_index -= s->limiter_buf_size;
|
||||
}
|
||||
|
||||
dst += (subframe_length * inlink->channels);
|
||||
}
|
||||
|
||||
dst = (double *)out->data[0];
|
||||
ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
|
||||
break;
|
||||
|
||||
case LINEAR_MODE:
|
||||
for (n = 0; n < in->nb_samples; n++) {
|
||||
for (c = 0; c < inlink->channels; c++) {
|
||||
dst[c] = src[c] * s->offset;
|
||||
}
|
||||
src += inlink->channels;
|
||||
dst += inlink->channels;
|
||||
}
|
||||
|
||||
dst = (double *)out->data[0];
|
||||
ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
|
||||
s->pts += in->nb_samples;
|
||||
break;
|
||||
}
|
||||
|
||||
if (in != out)
|
||||
av_frame_free(&in);
|
||||
|
||||
return ff_filter_frame(outlink, out);
|
||||
}
|
||||
|
||||
static int request_frame(AVFilterLink *outlink)
|
||||
{
|
||||
int ret;
|
||||
AVFilterContext *ctx = outlink->src;
|
||||
AVFilterLink *inlink = ctx->inputs[0];
|
||||
LoudNormContext *s = ctx->priv;
|
||||
|
||||
ret = ff_request_frame(inlink);
|
||||
if (ret == AVERROR_EOF && s->frame_type == INNER_FRAME) {
|
||||
double *src;
|
||||
double *buf;
|
||||
int nb_samples, n, c, offset;
|
||||
AVFrame *frame;
|
||||
|
||||
nb_samples = (s->buf_size / inlink->channels) - s->prev_nb_samples;
|
||||
nb_samples -= (frame_size(inlink->sample_rate, 100) - s->prev_nb_samples);
|
||||
|
||||
frame = ff_get_audio_buffer(outlink, nb_samples);
|
||||
if (!frame)
|
||||
return AVERROR(ENOMEM);
|
||||
frame->nb_samples = nb_samples;
|
||||
|
||||
buf = s->buf;
|
||||
src = (double *)frame->data[0];
|
||||
|
||||
offset = ((s->limiter_buf_size / inlink->channels) - s->prev_nb_samples) * inlink->channels;
|
||||
offset -= (frame_size(inlink->sample_rate, 100) - s->prev_nb_samples) * inlink->channels;
|
||||
s->buf_index = s->buf_index - offset < 0 ? s->buf_index - offset + s->buf_size : s->buf_index - offset;
|
||||
|
||||
for (n = 0; n < nb_samples; n++) {
|
||||
for (c = 0; c < inlink->channels; c++) {
|
||||
src[c] = buf[s->buf_index + c];
|
||||
}
|
||||
src += inlink->channels;
|
||||
s->buf_index += inlink->channels;
|
||||
if (s->buf_index >= s->buf_size)
|
||||
s->buf_index -= s->buf_size;
|
||||
}
|
||||
|
||||
s->frame_type = FINAL_FRAME;
|
||||
ret = filter_frame(inlink, frame);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int query_formats(AVFilterContext *ctx)
|
||||
{
|
||||
AVFilterFormats *formats;
|
||||
AVFilterChannelLayouts *layouts;
|
||||
AVFilterLink *inlink = ctx->inputs[0];
|
||||
AVFilterLink *outlink = ctx->outputs[0];
|
||||
static const int input_srate[] = {192000, -1};
|
||||
static const enum AVSampleFormat sample_fmts[] = {
|
||||
AV_SAMPLE_FMT_DBL,
|
||||
AV_SAMPLE_FMT_NONE
|
||||
};
|
||||
int ret;
|
||||
|
||||
layouts = ff_all_channel_counts();
|
||||
if (!layouts)
|
||||
return AVERROR(ENOMEM);
|
||||
ret = ff_set_common_channel_layouts(ctx, layouts);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
formats = ff_make_format_list(sample_fmts);
|
||||
if (!formats)
|
||||
return AVERROR(ENOMEM);
|
||||
ret = ff_set_common_formats(ctx, formats);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
formats = ff_make_format_list(input_srate);
|
||||
if (!formats)
|
||||
return AVERROR(ENOMEM);
|
||||
ret = ff_formats_ref(formats, &inlink->out_samplerates);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ret = ff_formats_ref(formats, &outlink->in_samplerates);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int config_input(AVFilterLink *inlink)
|
||||
{
|
||||
AVFilterContext *ctx = inlink->dst;
|
||||
LoudNormContext *s = ctx->priv;
|
||||
|
||||
s->r128_in = ebur128_init(inlink->channels, inlink->sample_rate, EBUR128_MODE_I | EBUR128_MODE_S | EBUR128_MODE_LRA | EBUR128_MODE_SAMPLE_PEAK);
|
||||
if (!s->r128_in)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
s->r128_out = ebur128_init(inlink->channels, inlink->sample_rate, EBUR128_MODE_I | EBUR128_MODE_S | EBUR128_MODE_LRA | EBUR128_MODE_SAMPLE_PEAK);
|
||||
if (!s->r128_out)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
s->buf_size = frame_size(inlink->sample_rate, 3000) * inlink->channels;
|
||||
s->buf = av_malloc_array(s->buf_size, sizeof(*s->buf));
|
||||
if (!s->buf)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
s->limiter_buf_size = frame_size(inlink->sample_rate, 210) * inlink->channels;
|
||||
s->limiter_buf = av_malloc_array(s->buf_size, sizeof(*s->limiter_buf));
|
||||
if (!s->limiter_buf)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
s->prev_smp = av_malloc_array(inlink->channels, sizeof(*s->prev_smp));
|
||||
if (!s->prev_smp)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
init_gaussian_filter(s);
|
||||
|
||||
s->frame_type = FIRST_FRAME;
|
||||
|
||||
if (s->linear) {
|
||||
double offset, offset_tp;
|
||||
offset = s->target_i - s->measured_i;
|
||||
offset_tp = s->measured_tp + offset;
|
||||
|
||||
if (s->measured_tp != 99 && s->measured_thresh != -70 && s->measured_lra != 0 && s->measured_i != 0) {
|
||||
if ((offset_tp <= s->target_tp) && (s->measured_lra <= s->target_lra)) {
|
||||
s->frame_type = LINEAR_MODE;
|
||||
s->offset = offset;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (s->frame_type != LINEAR_MODE) {
|
||||
inlink->min_samples =
|
||||
inlink->max_samples =
|
||||
inlink->partial_buf_size = frame_size(inlink->sample_rate, 3000);
|
||||
}
|
||||
|
||||
s->pts =
|
||||
s->buf_index =
|
||||
s->prev_buf_index =
|
||||
s->limiter_buf_index = 0;
|
||||
s->channels = inlink->channels;
|
||||
s->index = 1;
|
||||
s->limiter_state = OUT;
|
||||
s->offset = pow(10., s->offset / 20.);
|
||||
s->target_tp = pow(10., s->target_tp / 20.);
|
||||
s->attack_length = frame_size(inlink->sample_rate, 10);
|
||||
s->release_length = frame_size(inlink->sample_rate, 100);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static av_cold void uninit(AVFilterContext *ctx)
|
||||
{
|
||||
LoudNormContext *s = ctx->priv;
|
||||
double i_in, i_out, lra_in, lra_out, thresh_in, thresh_out, tp_in, tp_out;
|
||||
int c;
|
||||
|
||||
ebur128_loudness_range(s->r128_in, &lra_in);
|
||||
ebur128_loudness_global(s->r128_in, &i_in);
|
||||
ebur128_relative_threshold(s->r128_in, &thresh_in);
|
||||
for (c = 0; c < s->channels; c++) {
|
||||
double tmp;
|
||||
ebur128_sample_peak(s->r128_in, c, &tmp);
|
||||
if ((c == 0) || (tmp > tp_in))
|
||||
tp_in = tmp;
|
||||
}
|
||||
|
||||
ebur128_loudness_range(s->r128_out, &lra_out);
|
||||
ebur128_loudness_global(s->r128_out, &i_out);
|
||||
ebur128_relative_threshold(s->r128_out, &thresh_out);
|
||||
for (c = 0; c < s->channels; c++) {
|
||||
double tmp;
|
||||
ebur128_sample_peak(s->r128_out, c, &tmp);
|
||||
if ((c == 0) || (tmp > tp_out))
|
||||
tp_out = tmp;
|
||||
}
|
||||
|
||||
switch(s->print_format) {
|
||||
case NONE:
|
||||
break;
|
||||
|
||||
case JSON:
|
||||
av_log(ctx, AV_LOG_INFO,
|
||||
"\n{\n"
|
||||
"\t\"input_i\" : \"%.2f\",\n"
|
||||
"\t\"input_tp\" : \"%.2f\",\n"
|
||||
"\t\"input_lra\" : \"%.2f\",\n"
|
||||
"\t\"input_thresh\" : \"%.2f\",\n"
|
||||
"\t\"output_i\" : \"%.2f\",\n"
|
||||
"\t\"output_tp\" : \"%+.2f\",\n"
|
||||
"\t\"output_lra\" : \"%.2f\",\n"
|
||||
"\t\"output_thresh\" : \"%.2f\",\n"
|
||||
"\t\"normalization_type\" : \"%s\",\n"
|
||||
"\t\"target_offset\" : \"%.2f\"\n"
|
||||
"}\n",
|
||||
i_in,
|
||||
20. * log10(tp_in),
|
||||
lra_in,
|
||||
thresh_in,
|
||||
i_out,
|
||||
20. * log10(tp_out),
|
||||
lra_out,
|
||||
thresh_out,
|
||||
s->frame_type == LINEAR_MODE ? "linear" : "dynamic",
|
||||
s->target_i - i_out
|
||||
);
|
||||
break;
|
||||
|
||||
case SUMMARY:
|
||||
av_log(ctx, AV_LOG_INFO,
|
||||
"\n"
|
||||
"Input Integrated: %+6.1f LUFS\n"
|
||||
"Input True Peak: %+6.1f dBTP\n"
|
||||
"Input LRA: %6.1f LU\n"
|
||||
"Input Threshold: %+6.1f LUFS\n"
|
||||
"\n"
|
||||
"Output Integrated: %+6.1f LUFS\n"
|
||||
"Output True Peak: %+6.1f dBTP\n"
|
||||
"Output LRA: %6.1f LU\n"
|
||||
"Output Threshold: %+6.1f LUFS\n"
|
||||
"\n"
|
||||
"Normalization Type: %s\n"
|
||||
"Target Offset: %+6.1f LU\n",
|
||||
i_in,
|
||||
20. * log10(tp_in),
|
||||
lra_in,
|
||||
thresh_in,
|
||||
i_out,
|
||||
20. * log10(tp_out),
|
||||
lra_out,
|
||||
thresh_out,
|
||||
s->frame_type == LINEAR_MODE ? "Linear" : "Dynamic",
|
||||
s->target_i - i_out
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
ebur128_destroy(&s->r128_in);
|
||||
ebur128_destroy(&s->r128_out);
|
||||
av_freep(&s->limiter_buf);
|
||||
av_freep(&s->prev_smp);
|
||||
av_freep(&s->buf);
|
||||
}
|
||||
|
||||
static const AVFilterPad avfilter_af_loudnorm_inputs[] = {
|
||||
{
|
||||
.name = "default",
|
||||
.type = AVMEDIA_TYPE_AUDIO,
|
||||
.config_props = config_input,
|
||||
.filter_frame = filter_frame,
|
||||
},
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static const AVFilterPad avfilter_af_loudnorm_outputs[] = {
|
||||
{
|
||||
.name = "default",
|
||||
.request_frame = request_frame,
|
||||
.type = AVMEDIA_TYPE_AUDIO,
|
||||
},
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
AVFilter ff_af_loudnorm = {
|
||||
.name = "loudnorm",
|
||||
.description = NULL_IF_CONFIG_SMALL("EBU R128 loudness normalization"),
|
||||
.priv_size = sizeof(LoudNormContext),
|
||||
.priv_class = &loudnorm_class,
|
||||
.query_formats = query_formats,
|
||||
.uninit = uninit,
|
||||
.inputs = avfilter_af_loudnorm_inputs,
|
||||
.outputs = avfilter_af_loudnorm_outputs,
|
||||
};
|
@ -108,6 +108,7 @@ void avfilter_register_all(void)
|
||||
REGISTER_FILTER(HIGHPASS, highpass, af);
|
||||
REGISTER_FILTER(JOIN, join, af);
|
||||
REGISTER_FILTER(LADSPA, ladspa, af);
|
||||
REGISTER_FILTER(LOUDNORM, loudnorm, af);
|
||||
REGISTER_FILTER(LOWPASS, lowpass, af);
|
||||
REGISTER_FILTER(PAN, pan, af);
|
||||
REGISTER_FILTER(REPLAYGAIN, replaygain, af);
|
||||
|
@ -30,7 +30,7 @@
|
||||
#include "libavutil/version.h"
|
||||
|
||||
#define LIBAVFILTER_VERSION_MAJOR 6
|
||||
#define LIBAVFILTER_VERSION_MINOR 45
|
||||
#define LIBAVFILTER_VERSION_MINOR 46
|
||||
#define LIBAVFILTER_VERSION_MICRO 100
|
||||
|
||||
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
|
||||
|
Loading…
Reference in New Issue
Block a user