mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
avfilter/af_silenceremove: add real peak detector
Rename old peak detector to more correct name one.
This commit is contained in:
parent
3235de4883
commit
2b5166addf
@ -6461,8 +6461,7 @@ With @var{all}, only if all channels are detected as non-silence will cause
|
||||
stopped trimming of silence.
|
||||
|
||||
@item detection
|
||||
Set how is silence detected. Can be @code{rms} or @code{peak}. Second is faster
|
||||
and works better with digital silence which is exactly 0.
|
||||
Set how is silence detected. Can be @code{avg}, @code{rms} or @code{peak}.
|
||||
Default value is @code{rms}.
|
||||
|
||||
@item window
|
||||
|
@ -33,8 +33,10 @@
|
||||
#include "internal.h"
|
||||
|
||||
enum SilenceDetect {
|
||||
D_PEAK,
|
||||
D_AVG,
|
||||
D_RMS,
|
||||
D_PEAK,
|
||||
D_NB
|
||||
};
|
||||
|
||||
enum ThresholdMode {
|
||||
@ -75,6 +77,12 @@ typedef struct SilenceRemoveContext {
|
||||
AVFrame *start_window;
|
||||
AVFrame *stop_window;
|
||||
|
||||
int *start_front;
|
||||
int *start_back;
|
||||
|
||||
int *stop_front;
|
||||
int *stop_back;
|
||||
|
||||
int64_t window_duration;
|
||||
|
||||
int start_window_pos;
|
||||
@ -100,8 +108,8 @@ typedef struct SilenceRemoveContext {
|
||||
|
||||
int detection;
|
||||
|
||||
float (*compute_flt)(float *c, float s, float ws, int size);
|
||||
double (*compute_dbl)(double *c, double s, double ws, int size);
|
||||
float (*compute_flt)(float *c, float s, float ws, int size, int *front, int *back);
|
||||
double (*compute_dbl)(double *c, double s, double ws, int size, int *front, int *back);
|
||||
} SilenceRemoveContext;
|
||||
|
||||
#define OFFSET(x) offsetof(SilenceRemoveContext, x)
|
||||
@ -120,9 +128,10 @@ static const AVOption silenceremove_options[] = {
|
||||
{ "stop_threshold", "set threshold for stop silence detection", OFFSET(stop_threshold), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0, DBL_MAX, AF },
|
||||
{ "stop_silence", "set stop duration of silence part to keep", OFFSET(stop_silence_opt), AV_OPT_TYPE_DURATION, {.i64=0}, 0, INT32_MAX, AF },
|
||||
{ "stop_mode", "set which channel will trigger trimming from end", OFFSET(stop_mode), AV_OPT_TYPE_INT, {.i64=T_ANY}, T_ANY, T_ALL, AF, "mode" },
|
||||
{ "detection", "set how silence is detected", OFFSET(detection), AV_OPT_TYPE_INT, {.i64=D_RMS}, D_PEAK,D_RMS, AF, "detection" },
|
||||
{ "peak", "use absolute values of samples", 0, AV_OPT_TYPE_CONST, {.i64=D_PEAK},0, 0, AF, "detection" },
|
||||
{ "rms", "use squared values of samples", 0, AV_OPT_TYPE_CONST, {.i64=D_RMS}, 0, 0, AF, "detection" },
|
||||
{ "detection", "set how silence is detected", OFFSET(detection), AV_OPT_TYPE_INT, {.i64=D_RMS}, 0, D_NB-1, AF, "detection" },
|
||||
{ "avg", "use mean absolute values of samples", 0, AV_OPT_TYPE_CONST, {.i64=D_AVG}, 0, 0, AF, "detection" },
|
||||
{ "rms", "use root mean squared values of samples", 0, AV_OPT_TYPE_CONST, {.i64=D_RMS}, 0, 0, AF, "detection" },
|
||||
{ "peak", "use max absolute values of samples", 0, AV_OPT_TYPE_CONST, {.i64=D_PEAK},0, 0, AF, "detection" },
|
||||
{ "window", "set duration of window for silence detection", OFFSET(window_duration_opt), AV_OPT_TYPE_DURATION, {.i64=20000}, 0, 100000000, AF },
|
||||
{ NULL }
|
||||
};
|
||||
@ -201,7 +210,9 @@ static int config_output(AVFilterLink *outlink)
|
||||
|
||||
s->start_window = ff_get_audio_buffer(outlink, s->window_duration);
|
||||
s->stop_window = ff_get_audio_buffer(outlink, s->window_duration);
|
||||
if (!s->start_window || !s->stop_window)
|
||||
s->start_cache = av_calloc(outlink->ch_layout.nb_channels, s->window_duration * sizeof(*s->start_cache));
|
||||
s->stop_cache = av_calloc(outlink->ch_layout.nb_channels, s->window_duration * sizeof(*s->stop_cache));
|
||||
if (!s->start_window || !s->stop_window || !s->start_cache || !s->stop_cache)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
s->start_queuef = ff_get_audio_buffer(outlink, s->start_silence + 1);
|
||||
@ -209,14 +220,20 @@ static int config_output(AVFilterLink *outlink)
|
||||
if (!s->start_queuef || !s->stop_queuef)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
s->start_cache = av_calloc(outlink->ch_layout.nb_channels, sizeof(*s->start_cache));
|
||||
s->stop_cache = av_calloc(outlink->ch_layout.nb_channels, sizeof(*s->stop_cache));
|
||||
if (!s->start_cache || !s->stop_cache)
|
||||
s->start_front = av_calloc(outlink->ch_layout.nb_channels, sizeof(*s->start_front));
|
||||
s->start_back = av_calloc(outlink->ch_layout.nb_channels, sizeof(*s->start_back));
|
||||
s->stop_front = av_calloc(outlink->ch_layout.nb_channels, sizeof(*s->stop_front));
|
||||
s->stop_back = av_calloc(outlink->ch_layout.nb_channels, sizeof(*s->stop_back));
|
||||
if (!s->start_front || !s->start_back || !s->stop_front || !s->stop_back)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
clear_windows(s);
|
||||
|
||||
switch (s->detection) {
|
||||
case D_AVG:
|
||||
s->compute_flt = compute_avg_flt;
|
||||
s->compute_dbl = compute_avg_dbl;
|
||||
break;
|
||||
case D_PEAK:
|
||||
s->compute_flt = compute_peak_flt;
|
||||
s->compute_dbl = compute_peak_dbl;
|
||||
@ -374,8 +391,13 @@ static av_cold void uninit(AVFilterContext *ctx)
|
||||
av_frame_free(&s->stop_window);
|
||||
av_frame_free(&s->start_queuef);
|
||||
av_frame_free(&s->stop_queuef);
|
||||
|
||||
av_freep(&s->start_cache);
|
||||
av_freep(&s->stop_cache);
|
||||
av_freep(&s->start_front);
|
||||
av_freep(&s->start_back);
|
||||
av_freep(&s->stop_front);
|
||||
av_freep(&s->stop_back);
|
||||
}
|
||||
|
||||
static const AVFilterPad silenceremove_inputs[] = {
|
||||
|
@ -99,8 +99,8 @@ static void fn(queue_sample)(AVFilterContext *ctx,
|
||||
*window_pos = 0;
|
||||
}
|
||||
|
||||
static ftype fn(compute_peak)(ftype *cache, ftype sample, ftype wsample,
|
||||
int window_size)
|
||||
static ftype fn(compute_avg)(ftype *cache, ftype sample, ftype wsample,
|
||||
int window_size, int *unused, int *unused2)
|
||||
{
|
||||
ftype r;
|
||||
|
||||
@ -111,8 +111,49 @@ static ftype fn(compute_peak)(ftype *cache, ftype sample, ftype wsample,
|
||||
return r / window_size;
|
||||
}
|
||||
|
||||
static ftype fn(compute_peak)(ftype *peak, ftype sample, ftype wsample,
|
||||
int size, int *ffront, int *bback)
|
||||
{
|
||||
ftype r, abs_sample = FABS(sample);
|
||||
int front = *ffront;
|
||||
int back = *bback;
|
||||
|
||||
if (front != back && abs_sample > peak[front]) {
|
||||
while (front != back) {
|
||||
front--;
|
||||
if (front < 0)
|
||||
front = size - 1;
|
||||
}
|
||||
}
|
||||
|
||||
while (front != back && abs_sample > peak[back]) {
|
||||
back++;
|
||||
if (back >= size)
|
||||
back = 0;
|
||||
}
|
||||
|
||||
if (front != back && FABS(wsample) == peak[front]) {
|
||||
front--;
|
||||
if (front < 0)
|
||||
front = size - 1;
|
||||
}
|
||||
|
||||
back--;
|
||||
if (back < 0)
|
||||
back = size - 1;
|
||||
av_assert2(back != front);
|
||||
peak[back] = abs_sample;
|
||||
|
||||
r = peak[front];
|
||||
|
||||
*ffront = front;
|
||||
*bback = back;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static ftype fn(compute_rms)(ftype *cache, ftype sample, ftype wsample,
|
||||
int window_size)
|
||||
int window_size, int *unused, int *unused2)
|
||||
{
|
||||
ftype r;
|
||||
|
||||
@ -143,6 +184,9 @@ static void fn(filter_start)(AVFilterContext *ctx,
|
||||
const int start_duration = s->start_duration;
|
||||
ftype *start_cache = (ftype *)s->start_cache;
|
||||
const int start_silence = s->start_silence;
|
||||
int window_size = start_window_nb_samples;
|
||||
int *front = s->start_front;
|
||||
int *back = s->start_back;
|
||||
|
||||
fn(queue_sample)(ctx, src, start,
|
||||
&s->start_queue_pos,
|
||||
@ -153,15 +197,20 @@ static void fn(filter_start)(AVFilterContext *ctx,
|
||||
start_nb_samples,
|
||||
start_window_nb_samples);
|
||||
|
||||
if (s->detection != D_PEAK)
|
||||
window_size = s->start_window_size;
|
||||
|
||||
for (int ch = 0; ch < nb_channels; ch++) {
|
||||
ftype start_sample = start[start_pos + ch];
|
||||
ftype start_ow = startw[start_wpos + ch];
|
||||
ftype tstart;
|
||||
|
||||
tstart = fn(s->compute)(start_cache + ch,
|
||||
tstart = fn(s->compute)(start_cache + ch * start_window_nb_samples,
|
||||
start_sample,
|
||||
start_ow,
|
||||
s->start_window_size);
|
||||
window_size,
|
||||
front + ch,
|
||||
back + ch);
|
||||
|
||||
startw[start_wpos + ch] = start_sample;
|
||||
|
||||
@ -226,6 +275,9 @@ static void fn(filter_stop)(AVFilterContext *ctx,
|
||||
ftype *stop_cache = (ftype *)s->stop_cache;
|
||||
const int stop_silence = s->stop_silence;
|
||||
const int restart = s->restart;
|
||||
int window_size = stop_window_nb_samples;
|
||||
int *front = s->stop_front;
|
||||
int *back = s->stop_back;
|
||||
|
||||
fn(queue_sample)(ctx, src, stop,
|
||||
&s->stop_queue_pos,
|
||||
@ -236,15 +288,20 @@ static void fn(filter_stop)(AVFilterContext *ctx,
|
||||
stop_nb_samples,
|
||||
stop_window_nb_samples);
|
||||
|
||||
if (s->detection != D_PEAK)
|
||||
window_size = s->stop_window_size;
|
||||
|
||||
for (int ch = 0; ch < nb_channels; ch++) {
|
||||
ftype stop_sample = stop[stop_pos + ch];
|
||||
ftype stop_ow = stopw[stop_wpos + ch];
|
||||
ftype tstop;
|
||||
|
||||
tstop = fn(s->compute)(stop_cache + ch,
|
||||
tstop = fn(s->compute)(stop_cache + ch * stop_window_nb_samples,
|
||||
stop_sample,
|
||||
stop_ow,
|
||||
s->stop_window_size);
|
||||
window_size,
|
||||
front + ch,
|
||||
back + ch);
|
||||
|
||||
stopw[stop_wpos + ch] = stop_sample;
|
||||
|
||||
|
@ -184,7 +184,7 @@ fate-filter-pan-downmix2: SRC = $(TARGET_PATH)/tests/data/asynth-44100-11.wav
|
||||
fate-filter-pan-downmix2: CMD = framecrc -ss 3.14 -i $(SRC) -frames:a 20 -filter:a "pan=5C|c0=0.7*c0+0.7*c10|c1=c9|c2=c8|c3=c7|c4=c6"
|
||||
|
||||
FATE_AFILTER-$(call ALLYES, LAVFI_INDEV, AEVALSRC_FILTER SILENCEREMOVE_FILTER) += fate-filter-silenceremove
|
||||
fate-filter-silenceremove: CMD = framecrc -auto_conversion_filters -f lavfi -i "aevalsrc=between(t\,1\,2)+between(t\,4\,5)+between(t\,7\,9):d=10:n=8192,silenceremove=start_periods=0:start_duration=0:start_threshold=0:stop_periods=-1:stop_duration=0:stop_threshold=-90dB:window=0:detection=peak"
|
||||
fate-filter-silenceremove: CMD = framecrc -auto_conversion_filters -f lavfi -i "aevalsrc=between(t\,1\,2)+between(t\,4\,5)+between(t\,7\,9):d=10:n=8192,silenceremove=start_periods=0:start_duration=0:start_threshold=0:stop_periods=-1:stop_duration=0:stop_threshold=-90dB:window=0:detection=avg"
|
||||
|
||||
FATE_AFILTER_SAMPLES-$(call FILTERDEMDECENCMUX, STEREOTOOLS, WAV, PCM_S16LE, PCM_S16LE, WAV) += fate-filter-stereotools
|
||||
fate-filter-stereotools: SRC = $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
|
||||
|
Loading…
Reference in New Issue
Block a user