mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
lavfi: add volumedetect filter.
This commit is contained in:
parent
13b965ead4
commit
5980e57cf9
@ -50,6 +50,7 @@ version next:
|
||||
- edge detection filter
|
||||
- framestep filter
|
||||
- ffmpeg -shortest option is now per-output file
|
||||
- volume measurement filter
|
||||
|
||||
|
||||
version 0.11:
|
||||
|
@ -690,6 +690,46 @@ volume=-12dB
|
||||
@end example
|
||||
@end itemize
|
||||
|
||||
@section volumedetect
|
||||
|
||||
Detect the volume of the input video.
|
||||
|
||||
The filter has no parameters. The input is not modified. Statistics about
|
||||
the volume will be printed in the log when the input stream end is reached.
|
||||
|
||||
In particular it will show the mean volume (root mean square), maximum
|
||||
volume (on a per-sample basis), and the beginning of an histogram of the
|
||||
registered volume values (from the maximum value to a cumulated 1/1000 of
|
||||
the samples).
|
||||
|
||||
All volumes are in decibels relative to the maximum PCM value.
|
||||
|
||||
Here is an excerpt of the output:
|
||||
@example
|
||||
[Parsed_volumedetect_0 @ 0xa23120] mean_volume: -27 dB
|
||||
[Parsed_volumedetect_0 @ 0xa23120] max_volume: -4 dB
|
||||
[Parsed_volumedetect_0 @ 0xa23120] histogram_4db: 6
|
||||
[Parsed_volumedetect_0 @ 0xa23120] histogram_5db: 62
|
||||
[Parsed_volumedetect_0 @ 0xa23120] histogram_6db: 286
|
||||
[Parsed_volumedetect_0 @ 0xa23120] histogram_7db: 1042
|
||||
[Parsed_volumedetect_0 @ 0xa23120] histogram_8db: 2551
|
||||
[Parsed_volumedetect_0 @ 0xa23120] histogram_9db: 4609
|
||||
[Parsed_volumedetect_0 @ 0xa23120] histogram_10db: 8409
|
||||
@end example
|
||||
|
||||
It means that:
|
||||
@itemize
|
||||
@item
|
||||
The mean square energy is approximately -27 dB, or 10^-2.7.
|
||||
@item
|
||||
The largest sample is at -4 dB, or more precisely between -4 dB and -5 dB.
|
||||
@item
|
||||
There are 6 samples at -4 dB, 62 at -5 dB, 286 at -6 dB, etc.
|
||||
@end itemize
|
||||
|
||||
In other words, raising the volume by +4 dB does not cause any clipping,
|
||||
raising it by +5 dB causes clipping for 6 samples, etc.
|
||||
|
||||
@section asyncts
|
||||
Synchronize audio data with timestamps by squeezing/stretching it and/or
|
||||
dropping samples/adding silence when needed.
|
||||
|
@ -67,6 +67,7 @@ OBJS-$(CONFIG_PAN_FILTER) += af_pan.o
|
||||
OBJS-$(CONFIG_RESAMPLE_FILTER) += af_resample.o
|
||||
OBJS-$(CONFIG_SILENCEDETECT_FILTER) += af_silencedetect.o
|
||||
OBJS-$(CONFIG_VOLUME_FILTER) += af_volume.o
|
||||
OBJS-$(CONFIG_VOLUMEDETECT_FILTER) += af_volumedetect.o
|
||||
|
||||
OBJS-$(CONFIG_AEVALSRC_FILTER) += asrc_aevalsrc.o
|
||||
OBJS-$(CONFIG_ANULLSRC_FILTER) += asrc_anullsrc.o
|
||||
|
159
libavfilter/af_volumedetect.c
Normal file
159
libavfilter/af_volumedetect.c
Normal file
@ -0,0 +1,159 @@
|
||||
/*
|
||||
* Copyright (c) 2012 Nicolas George
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public License
|
||||
* as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/audioconvert.h"
|
||||
#include "libavutil/avassert.h"
|
||||
#include "audio.h"
|
||||
#include "avfilter.h"
|
||||
#include "internal.h"
|
||||
|
||||
typedef struct {
|
||||
/**
|
||||
* Number of samples at each PCM value.
|
||||
* histogram[0x8000 + i] is the number of samples at value i.
|
||||
* The extra element is there for symmetry.
|
||||
*/
|
||||
uint64_t histogram[0x10001];
|
||||
} VolDetectContext;
|
||||
|
||||
static int query_formats(AVFilterContext *ctx)
|
||||
{
|
||||
enum AVSampleFormat sample_fmts[] = {
|
||||
AV_SAMPLE_FMT_S16,
|
||||
AV_SAMPLE_FMT_S16P,
|
||||
AV_SAMPLE_FMT_NONE
|
||||
};
|
||||
AVFilterFormats *formats;
|
||||
|
||||
if (!(formats = ff_make_format_list(sample_fmts)))
|
||||
return AVERROR(ENOMEM);
|
||||
ff_set_common_formats(ctx, formats);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int filter_samples(AVFilterLink *inlink, AVFilterBufferRef *samples)
|
||||
{
|
||||
AVFilterContext *ctx = inlink->dst;
|
||||
VolDetectContext *vd = ctx->priv;
|
||||
int64_t layout = samples->audio->channel_layout;
|
||||
int nb_samples = samples->audio->nb_samples;
|
||||
int nb_channels = av_get_channel_layout_nb_channels(layout);
|
||||
int nb_planes = nb_planes;
|
||||
int plane, i;
|
||||
int16_t *pcm;
|
||||
|
||||
if (!av_sample_fmt_is_planar(samples->format)) {
|
||||
nb_samples *= nb_channels;
|
||||
nb_planes = 1;
|
||||
}
|
||||
for (plane = 0; plane < nb_planes; plane++) {
|
||||
pcm = (int16_t *)samples->extended_data[plane];
|
||||
for (i = 0; i < nb_samples; i++)
|
||||
vd->histogram[pcm[i] + 0x8000]++;
|
||||
}
|
||||
|
||||
return ff_filter_samples(inlink->dst->outputs[0], samples);
|
||||
}
|
||||
|
||||
#define MAX_DB 91
|
||||
|
||||
static inline double logdb(uint64_t v)
|
||||
{
|
||||
double d = v / (double)(0x8000 * 0x8000);
|
||||
if (!v)
|
||||
return MAX_DB;
|
||||
return log(d) * -4.3429448190325182765112891891660508229; /* -10/log(10) */
|
||||
}
|
||||
|
||||
static void print_stats(AVFilterContext *ctx)
|
||||
{
|
||||
VolDetectContext *vd = ctx->priv;
|
||||
int i, max_volume, shift;
|
||||
uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0;
|
||||
uint64_t histdb[MAX_DB + 1] = { 0 };
|
||||
|
||||
for (i = 0; i < 0x10000; i++)
|
||||
nb_samples += vd->histogram[i];
|
||||
av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples);
|
||||
if (!nb_samples)
|
||||
return;
|
||||
|
||||
/* If nb_samples > 1<<34, there is a risk of overflow in the
|
||||
multiplication or the sum: shift all histogram values to avoid that.
|
||||
The total number of samples must be recomputed to avoid rounding
|
||||
errors. */
|
||||
shift = av_log2(nb_samples >> 33);
|
||||
for (i = 0; i < 0x10000; i++) {
|
||||
nb_samples_shift += vd->histogram[i] >> shift;
|
||||
power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift);
|
||||
}
|
||||
if (!nb_samples_shift)
|
||||
return;
|
||||
power = (power + nb_samples_shift / 2) / nb_samples_shift;
|
||||
av_assert0(power <= 0x8000 * 0x8000);
|
||||
av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power));
|
||||
|
||||
max_volume = 0x8000;
|
||||
while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] &&
|
||||
!vd->histogram[0x8000 - max_volume])
|
||||
max_volume--;
|
||||
av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume));
|
||||
|
||||
for (i = 0; i < 0x10000; i++)
|
||||
histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i];
|
||||
for (i = 0; i <= MAX_DB && !histdb[i]; i++);
|
||||
for (; i <= MAX_DB && sum < nb_samples / 1000; i++) {
|
||||
av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]);
|
||||
sum += histdb[i];
|
||||
}
|
||||
}
|
||||
|
||||
static int request_frame(AVFilterLink *outlink)
|
||||
{
|
||||
AVFilterContext *ctx = outlink->src;
|
||||
int ret = ff_request_frame(ctx->inputs[0]);
|
||||
if (ret == AVERROR_EOF)
|
||||
print_stats(ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
AVFilter avfilter_af_volumedetect = {
|
||||
.name = "volumedetect",
|
||||
.description = NULL_IF_CONFIG_SMALL("Detect audio volume."),
|
||||
|
||||
.priv_size = sizeof(VolDetectContext),
|
||||
.query_formats = query_formats,
|
||||
|
||||
.inputs = (const AVFilterPad[]) {
|
||||
{ .name = "default",
|
||||
.type = AVMEDIA_TYPE_AUDIO,
|
||||
.get_audio_buffer = ff_null_get_audio_buffer,
|
||||
.filter_samples = filter_samples,
|
||||
.min_perms = AV_PERM_READ, },
|
||||
{ .name = NULL }
|
||||
},
|
||||
.outputs = (const AVFilterPad[]) {
|
||||
{ .name = "default",
|
||||
.type = AVMEDIA_TYPE_AUDIO,
|
||||
.request_frame = request_frame, },
|
||||
{ .name = NULL }
|
||||
},
|
||||
};
|
@ -57,6 +57,7 @@ void avfilter_register_all(void)
|
||||
REGISTER_FILTER (PAN, pan, af);
|
||||
REGISTER_FILTER (SILENCEDETECT, silencedetect, af);
|
||||
REGISTER_FILTER (VOLUME, volume, af);
|
||||
REGISTER_FILTER (VOLUMEDETECT,volumedetect,af);
|
||||
REGISTER_FILTER (RESAMPLE, resample, af);
|
||||
|
||||
REGISTER_FILTER (AEVALSRC, aevalsrc, asrc);
|
||||
|
Loading…
Reference in New Issue
Block a user