You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-10 06:10:52 +02:00
avfilter/af_speechnorm: implement rms option
This commit is contained in:
@@ -6338,6 +6338,10 @@ option. When enabled any half-cycle of samples with their local peak value below
|
|||||||
Link channels when calculating gain applied to each filtered channel sample, by default is disabled.
|
Link channels when calculating gain applied to each filtered channel sample, by default is disabled.
|
||||||
When disabled each filtered channel gain calculation is independent, otherwise when this option
|
When disabled each filtered channel gain calculation is independent, otherwise when this option
|
||||||
is enabled the minimum of all possible gains for each filtered channel is used.
|
is enabled the minimum of all possible gains for each filtered channel is used.
|
||||||
|
|
||||||
|
@item rms, m
|
||||||
|
Set the expansion target RMS value. This specifies the highest allowed RMS level for the normalized
|
||||||
|
audio input. Default value is 0.0, thus disabled. Allowed range is from 0.0 to 1.0.
|
||||||
@end table
|
@end table
|
||||||
|
|
||||||
@subsection Commands
|
@subsection Commands
|
||||||
|
@@ -46,6 +46,7 @@ typedef struct PeriodItem {
|
|||||||
int size;
|
int size;
|
||||||
int type;
|
int type;
|
||||||
double max_peak;
|
double max_peak;
|
||||||
|
double rms_sum;
|
||||||
} PeriodItem;
|
} PeriodItem;
|
||||||
|
|
||||||
typedef struct ChannelContext {
|
typedef struct ChannelContext {
|
||||||
@@ -54,6 +55,7 @@ typedef struct ChannelContext {
|
|||||||
PeriodItem pi[MAX_ITEMS];
|
PeriodItem pi[MAX_ITEMS];
|
||||||
double gain_state;
|
double gain_state;
|
||||||
double pi_max_peak;
|
double pi_max_peak;
|
||||||
|
double pi_rms_sum;
|
||||||
int pi_start;
|
int pi_start;
|
||||||
int pi_end;
|
int pi_end;
|
||||||
int pi_size;
|
int pi_size;
|
||||||
@@ -62,6 +64,7 @@ typedef struct ChannelContext {
|
|||||||
typedef struct SpeechNormalizerContext {
|
typedef struct SpeechNormalizerContext {
|
||||||
const AVClass *class;
|
const AVClass *class;
|
||||||
|
|
||||||
|
double rms_value;
|
||||||
double peak_value;
|
double peak_value;
|
||||||
double max_expansion;
|
double max_expansion;
|
||||||
double max_compression;
|
double max_compression;
|
||||||
@@ -110,6 +113,8 @@ static const AVOption speechnorm_options[] = {
|
|||||||
{ "i", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
|
{ "i", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
|
||||||
{ "link", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
|
{ "link", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
|
||||||
{ "l", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
|
{ "l", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
|
||||||
|
{ "rms", "set the RMS value", OFFSET(rms_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.0}, 0.0, 1.0, FLAGS },
|
||||||
|
{ "m", "set the RMS value", OFFSET(rms_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.0}, 0.0, 1.0, FLAGS },
|
||||||
{ NULL }
|
{ NULL }
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -161,12 +166,16 @@ static void consume_pi(ChannelContext *cc, int nb_samples)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static double next_gain(AVFilterContext *ctx, double pi_max_peak, int bypass, double state)
|
static double next_gain(AVFilterContext *ctx, double pi_max_peak, int bypass, double state,
|
||||||
|
double pi_rms_sum, int pi_size)
|
||||||
{
|
{
|
||||||
SpeechNormalizerContext *s = ctx->priv;
|
SpeechNormalizerContext *s = ctx->priv;
|
||||||
const double expansion = FFMIN(s->max_expansion, s->peak_value / pi_max_peak);
|
|
||||||
const double compression = 1. / s->max_compression;
|
const double compression = 1. / s->max_compression;
|
||||||
const int type = s->invert ? pi_max_peak <= s->threshold_value : pi_max_peak >= s->threshold_value;
|
const int type = s->invert ? pi_max_peak <= s->threshold_value : pi_max_peak >= s->threshold_value;
|
||||||
|
double expansion = FFMIN(s->max_expansion, s->peak_value / pi_max_peak);
|
||||||
|
|
||||||
|
if (s->rms_value > DBL_EPSILON)
|
||||||
|
expansion = FFMIN(expansion, s->rms_value / sqrt(pi_rms_sum / pi_size));
|
||||||
|
|
||||||
if (bypass) {
|
if (bypass) {
|
||||||
return 1.;
|
return 1.;
|
||||||
@@ -187,13 +196,15 @@ static void next_pi(AVFilterContext *ctx, ChannelContext *cc, int bypass)
|
|||||||
av_assert1(cc->pi[start].size > 0);
|
av_assert1(cc->pi[start].size > 0);
|
||||||
av_assert0(cc->pi[start].type > 0 || s->eof);
|
av_assert0(cc->pi[start].type > 0 || s->eof);
|
||||||
cc->pi_size = cc->pi[start].size;
|
cc->pi_size = cc->pi[start].size;
|
||||||
|
cc->pi_rms_sum = cc->pi[start].rms_sum;
|
||||||
cc->pi_max_peak = cc->pi[start].max_peak;
|
cc->pi_max_peak = cc->pi[start].max_peak;
|
||||||
av_assert1(cc->pi_start != cc->pi_end || s->eof);
|
av_assert1(cc->pi_start != cc->pi_end || s->eof);
|
||||||
start++;
|
start++;
|
||||||
if (start >= MAX_ITEMS)
|
if (start >= MAX_ITEMS)
|
||||||
start = 0;
|
start = 0;
|
||||||
cc->pi_start = start;
|
cc->pi_start = start;
|
||||||
cc->gain_state = next_gain(ctx, cc->pi_max_peak, bypass, cc->gain_state);
|
cc->gain_state = next_gain(ctx, cc->pi_max_peak, bypass, cc->gain_state,
|
||||||
|
cc->pi_rms_sum, cc->pi_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -209,7 +220,8 @@ static double min_gain(AVFilterContext *ctx, ChannelContext *cc, int max_size)
|
|||||||
while (size <= max_size) {
|
while (size <= max_size) {
|
||||||
if (idx == cc->pi_end)
|
if (idx == cc->pi_end)
|
||||||
break;
|
break;
|
||||||
gain_state = next_gain(ctx, cc->pi[idx].max_peak, 0, gain_state);
|
gain_state = next_gain(ctx, cc->pi[idx].max_peak, 0, gain_state,
|
||||||
|
cc->pi[idx].rms_sum, cc->pi[idx].size);
|
||||||
min_gain = FFMIN(min_gain, gain_state);
|
min_gain = FFMIN(min_gain, gain_state);
|
||||||
size += cc->pi[idx].size;
|
size += cc->pi[idx].size;
|
||||||
idx++;
|
idx++;
|
||||||
@@ -236,11 +248,13 @@ static void analyze_channel_## name (AVFilterContext *ctx, ChannelContext *cc,
|
|||||||
\
|
\
|
||||||
while (n < nb_samples) { \
|
while (n < nb_samples) { \
|
||||||
ptype new_max_peak; \
|
ptype new_max_peak; \
|
||||||
|
ptype new_rms_sum; \
|
||||||
int new_size; \
|
int new_size; \
|
||||||
\
|
\
|
||||||
if ((cc->state != (src[n] >= zero)) || \
|
if ((cc->state != (src[n] >= zero)) || \
|
||||||
(pi[pi_end].size > max_period)) { \
|
(pi[pi_end].size > max_period)) { \
|
||||||
ptype max_peak = pi[pi_end].max_peak; \
|
ptype max_peak = pi[pi_end].max_peak; \
|
||||||
|
ptype rms_sum = pi[pi_end].rms_sum; \
|
||||||
int state = cc->state; \
|
int state = cc->state; \
|
||||||
\
|
\
|
||||||
cc->state = src[n] >= zero; \
|
cc->state = src[n] >= zero; \
|
||||||
@@ -251,10 +265,13 @@ static void analyze_channel_## name (AVFilterContext *ctx, ChannelContext *cc,
|
|||||||
pi_end++; \
|
pi_end++; \
|
||||||
if (pi_end >= MAX_ITEMS) \
|
if (pi_end >= MAX_ITEMS) \
|
||||||
pi_end = 0; \
|
pi_end = 0; \
|
||||||
if (cc->state != state) \
|
if (cc->state != state) { \
|
||||||
pi[pi_end].max_peak = DBL_MIN; \
|
pi[pi_end].max_peak = DBL_MIN; \
|
||||||
else \
|
pi[pi_end].rms_sum = 0.0; \
|
||||||
|
} else { \
|
||||||
pi[pi_end].max_peak = max_peak; \
|
pi[pi_end].max_peak = max_peak; \
|
||||||
|
pi[pi_end].rms_sum = rms_sum; \
|
||||||
|
} \
|
||||||
pi[pi_end].type = 0; \
|
pi[pi_end].type = 0; \
|
||||||
pi[pi_end].size = 0; \
|
pi[pi_end].size = 0; \
|
||||||
av_assert1(pi_end != cc->pi_start); \
|
av_assert1(pi_end != cc->pi_start); \
|
||||||
@@ -262,10 +279,12 @@ static void analyze_channel_## name (AVFilterContext *ctx, ChannelContext *cc,
|
|||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
new_max_peak = pi[pi_end].max_peak; \
|
new_max_peak = pi[pi_end].max_peak; \
|
||||||
|
new_rms_sum = pi[pi_end].rms_sum; \
|
||||||
new_size = pi[pi_end].size; \
|
new_size = pi[pi_end].size; \
|
||||||
if (cc->state) { \
|
if (cc->state) { \
|
||||||
while (src[n] >= zero) { \
|
while (src[n] >= zero) { \
|
||||||
new_max_peak = FFMAX(new_max_peak, src[n]); \
|
new_max_peak = FFMAX(new_max_peak, src[n]); \
|
||||||
|
new_rms_sum += src[n] * src[n]; \
|
||||||
new_size++; \
|
new_size++; \
|
||||||
n++; \
|
n++; \
|
||||||
if (n >= nb_samples) \
|
if (n >= nb_samples) \
|
||||||
@@ -274,6 +293,7 @@ static void analyze_channel_## name (AVFilterContext *ctx, ChannelContext *cc,
|
|||||||
} else { \
|
} else { \
|
||||||
while (src[n] < zero) { \
|
while (src[n] < zero) { \
|
||||||
new_max_peak = FFMAX(new_max_peak, -src[n]); \
|
new_max_peak = FFMAX(new_max_peak, -src[n]); \
|
||||||
|
new_rms_sum += src[n] * src[n]; \
|
||||||
new_size++; \
|
new_size++; \
|
||||||
n++; \
|
n++; \
|
||||||
if (n >= nb_samples) \
|
if (n >= nb_samples) \
|
||||||
@@ -282,6 +302,7 @@ static void analyze_channel_## name (AVFilterContext *ctx, ChannelContext *cc,
|
|||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
pi[pi_end].max_peak = new_max_peak; \
|
pi[pi_end].max_peak = new_max_peak; \
|
||||||
|
pi[pi_end].rms_sum = new_rms_sum; \
|
||||||
pi[pi_end].size = new_size; \
|
pi[pi_end].size = new_size; \
|
||||||
} \
|
} \
|
||||||
cc->pi_end = pi_end; \
|
cc->pi_end = pi_end; \
|
||||||
|
Reference in New Issue
Block a user