diff --git a/doc/filters.texi b/doc/filters.texi index bf48abfddf..7c6b945987 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -9322,6 +9322,27 @@ verbose logging level By default, the logging level is set to @var{info}. If the @option{video} or the @option{metadata} options are set, it switches to @var{verbose}. + +@item peak +Set peak mode(s). + +Available modes can be cumulated (the option is a @code{flag} type). Possible +values are: +@table @samp +@item none +Disable any peak mode (default). +@item sample +Enable sample-peak mode. + +Simple peak mode looking for the higher sample value. +@item true +Enable true-peak mode. + +If enabled, the peak lookup is done on an over-sampled version of the input +stream for better peak accuracy. This mode requires a build with +@code{libswresample}. +@end table + @end table @subsection Examples diff --git a/libavfilter/Makefile b/libavfilter/Makefile index e9bc01945e..05ec5f2cb9 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -22,6 +22,8 @@ FFLIBS-$(CONFIG_SCALE_FILTER) += swscale FFLIBS-$(CONFIG_SHOWSPECTRUM_FILTER) += avcodec FFLIBS-$(CONFIG_SMARTBLUR_FILTER) += swscale FFLIBS-$(CONFIG_SUBTITLES_FILTER) += avformat avcodec +EBUR128LIBS-$(CONFIG_SWRESAMPLE) = swresample +FFLIBS-$(CONFIG_EBUR128_FILTER) += $(EBUR128LIBS-yes) HEADERS = asrc_abuffer.h \ avcodec.h \ diff --git a/libavfilter/f_ebur128.c b/libavfilter/f_ebur128.c index a2c616098e..503d87ac32 100644 --- a/libavfilter/f_ebur128.c +++ b/libavfilter/f_ebur128.c @@ -37,6 +37,7 @@ #include "libavutil/xga_font_data.h" #include "libavutil/opt.h" #include "libavutil/timestamp.h" +#include "libswresample/swresample.h" #include "audio.h" #include "avfilter.h" #include "formats.h" @@ -92,6 +93,16 @@ struct rect { int x, y, w, h; }; typedef struct { const AVClass *class; ///< AVClass context for log and options purpose + /* peak metering */ + int peak_mode; ///< enabled peak modes + double *true_peaks; ///< true peaks per channel + double *sample_peaks; ///< sample peaks per channel +#if CONFIG_SWRESAMPLE + SwrContext *swr_ctx; ///< over-sampling context for true peak metering + double *swr_buf; ///< resampled audio data for true peak metering + int swr_linesize; +#endif + /* video */ int do_video; ///< 1 if video output enabled, 0 otherwise int w, h; ///< size of the video output @@ -130,6 +141,12 @@ typedef struct { int metadata; ///< whether or not to inject loudness results in frames } EBUR128Context; +enum { + PEAK_MODE_NONE = 0, + PEAK_MODE_SAMPLES_PEAKS = 1<<1, + PEAK_MODE_TRUE_PEAKS = 1<<2, +}; + #define OFFSET(x) offsetof(EBUR128Context, x) #define A AV_OPT_FLAG_AUDIO_PARAM #define V AV_OPT_FLAG_VIDEO_PARAM @@ -142,7 +159,11 @@ static const AVOption ebur128_options[] = { { "info", "information logging level", 0, AV_OPT_TYPE_CONST, {.i64 = AV_LOG_INFO}, INT_MIN, INT_MAX, A|V|F, "level" }, { "verbose", "verbose logging level", 0, AV_OPT_TYPE_CONST, {.i64 = AV_LOG_VERBOSE}, INT_MIN, INT_MAX, A|V|F, "level" }, { "metadata", "inject metadata in the filtergraph", OFFSET(metadata), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, A|V|F }, - { NULL } + { "peak", "set peak mode", OFFSET(peak_mode), AV_OPT_TYPE_FLAGS, {.i64 = PEAK_MODE_NONE}, 0, INT_MAX, A|F, "mode" }, + { "none", "disable any peak mode", 0, AV_OPT_TYPE_CONST, {.i64 = PEAK_MODE_NONE}, INT_MIN, INT_MAX, A|F, "mode" }, + { "sample", "enable peak-sample mode", 0, AV_OPT_TYPE_CONST, {.i64 = PEAK_MODE_SAMPLES_PEAKS}, INT_MIN, INT_MAX, A|F, "mode" }, + { "true", "enable true-peak mode", 0, AV_OPT_TYPE_CONST, {.i64 = PEAK_MODE_TRUE_PEAKS}, INT_MIN, INT_MAX, A|F, "mode" }, + { NULL }, }; AVFILTER_DEFINE_CLASS(ebur128); @@ -326,9 +347,13 @@ static int config_audio_input(AVFilterLink *inlink) AVFilterContext *ctx = inlink->dst; EBUR128Context *ebur128 = ctx->priv; - /* force 100ms framing in case of metadata injection: the frames must have - * a granularity of the window overlap to be accurately exploited */ - if (ebur128->metadata) + /* Force 100ms framing in case of metadata injection: the frames must have + * a granularity of the window overlap to be accurately exploited. + * As for the true peaks mode, it just simplifies the resampling buffer + * allocation and the lookup in it (since sample buffers differ in size, it + * can be more complex to integrate in the one-sample loop of + * filter_frame()). */ + if (ebur128->metadata || (ebur128->peak_mode & PEAK_MODE_TRUE_PEAKS)) inlink->min_samples = inlink->max_samples = inlink->partial_buf_size = inlink->sample_rate / 10; @@ -383,6 +408,36 @@ static int config_audio_output(AVFilterLink *outlink) outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP; +#if CONFIG_SWRESAMPLE + if (ebur128->peak_mode & PEAK_MODE_TRUE_PEAKS) { + int ret; + + ebur128->swr_buf = av_malloc(19200 * nb_channels * sizeof(double)); + ebur128->true_peaks = av_calloc(nb_channels, sizeof(*ebur128->true_peaks)); + ebur128->swr_ctx = swr_alloc(); + if (!ebur128->swr_buf || !ebur128->true_peaks || !ebur128->swr_ctx) + return AVERROR(ENOMEM); + + av_opt_set_int(ebur128->swr_ctx, "in_channel_layout", outlink->channel_layout, 0); + av_opt_set_int(ebur128->swr_ctx, "in_sample_rate", outlink->sample_rate, 0); + av_opt_set_sample_fmt(ebur128->swr_ctx, "in_sample_fmt", outlink->format, 0); + + av_opt_set_int(ebur128->swr_ctx, "out_channel_layout", outlink->channel_layout, 0); + av_opt_set_int(ebur128->swr_ctx, "out_sample_rate", 192000, 0); + av_opt_set_sample_fmt(ebur128->swr_ctx, "out_sample_fmt", outlink->format, 0); + + ret = swr_init(ebur128->swr_ctx); + if (ret < 0) + return ret; + } +#endif + + if (ebur128->peak_mode & PEAK_MODE_SAMPLES_PEAKS) { + ebur128->sample_peaks = av_calloc(nb_channels, sizeof(*ebur128->sample_peaks)); + if (!ebur128->sample_peaks) + return AVERROR(ENOMEM); + } + return 0; } @@ -416,6 +471,12 @@ static av_cold int init(AVFilterContext *ctx) ebur128->loglevel = AV_LOG_INFO; } + if (!CONFIG_SWRESAMPLE && (ebur128->peak_mode & PEAK_MODE_TRUE_PEAKS)) { + av_log(ctx, AV_LOG_ERROR, + "True-peak mode requires libswresample to be performed\n"); + return AVERROR(EINVAL); + } + // if meter is +9 scale, scale range is from -18 LU to +9 LU (or 3*9) // if meter is +18 scale, scale range is from -36 LU to +18 LU (or 3*18) ebur128->scale_range = 3 * ebur128->meter; @@ -491,6 +552,22 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples) const double *samples = (double *)insamples->data[0]; AVFrame *pic = ebur128->outpicref; +#if CONFIG_SWRESAMPLE + if (ebur128->peak_mode & PEAK_MODE_TRUE_PEAKS) { + const double *swr_samples = ebur128->swr_buf; + int ret = swr_convert(ebur128->swr_ctx, (uint8_t**)&ebur128->swr_buf, 19200, + (const uint8_t **)insamples->data, nb_samples); + if (ret < 0) + return ret; + for (idx_insample = 0; idx_insample < ret; idx_insample++) { + for (ch = 0; ch < nb_channels; ch++) { + ebur128->true_peaks[ch] = FFMAX(ebur128->true_peaks[ch], FFABS(*swr_samples)); + swr_samples++; + } + } + } +#endif + for (idx_insample = 0; idx_insample < nb_samples; idx_insample++) { const int bin_id_400 = ebur128->i400.cache_pos; const int bin_id_3000 = ebur128->i3000.cache_pos; @@ -509,6 +586,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples) for (ch = 0; ch < nb_channels; ch++) { double bin; + if (ebur128->peak_mode & PEAK_MODE_SAMPLES_PEAKS) + ebur128->sample_peaks[ch] = FFMAX(ebur128->sample_peaks[ch], FFABS(*samples)); + ebur128->x[ch * 3] = *samples++; // set X[i] if (!ebur128->ch_weighting[ch]) @@ -677,22 +757,52 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples) if (ebur128->metadata) { /* happens only once per filter_frame call */ char metabuf[128]; +#define META_PREFIX "lavfi.r128." + #define SET_META(name, var) do { \ snprintf(metabuf, sizeof(metabuf), "%.3f", var); \ - av_dict_set(&insamples->metadata, "lavfi.r128." name, metabuf, 0); \ + av_dict_set(&insamples->metadata, name, metabuf, 0); \ } while (0) - SET_META("M", loudness_400); - SET_META("S", loudness_3000); - SET_META("I", ebur128->integrated_loudness); - SET_META("LRA", ebur128->loudness_range); - SET_META("LRA.low", ebur128->lra_low); - SET_META("LRA.high", ebur128->lra_high); + +#define SET_META_PEAK(name, ptype) do { \ + if (ebur128->peak_mode & PEAK_MODE_ ## ptype ## _PEAKS) { \ + char key[64]; \ + for (ch = 0; ch < nb_channels; ch++) { \ + snprintf(key, sizeof(key), \ + META_PREFIX AV_STRINGIFY(name) "_peaks_ch%d", ch); \ + SET_META(key, ebur128->name##_peaks[ch]); \ + } \ + } \ +} while (0) + + SET_META(META_PREFIX "M", loudness_400); + SET_META(META_PREFIX "S", loudness_3000); + SET_META(META_PREFIX "I", ebur128->integrated_loudness); + SET_META(META_PREFIX "LRA", ebur128->loudness_range); + SET_META(META_PREFIX "LRA.low", ebur128->lra_low); + SET_META(META_PREFIX "LRA.high", ebur128->lra_high); + + SET_META_PEAK(sample, SAMPLES); + SET_META_PEAK(true, TRUE); } - av_log(ctx, ebur128->loglevel, "t: %-10s " LOG_FMT "\n", + av_log(ctx, ebur128->loglevel, "t: %-10s " LOG_FMT, av_ts2timestr(pts, &outlink->time_base), loudness_400, loudness_3000, ebur128->integrated_loudness, ebur128->loudness_range); + +#define PRINT_PEAKS(str, sp, ptype) do { \ + if (ebur128->peak_mode & PEAK_MODE_ ## ptype ## _PEAKS) { \ + av_log(ctx, ebur128->loglevel, " [" str ":"); \ + for (ch = 0; ch < nb_channels; ch++) \ + av_log(ctx, ebur128->loglevel, " %.5f", sp[ch]); \ + av_log(ctx, ebur128->loglevel, "]"); \ + } \ +} while (0) + + PRINT_PEAKS("SPK", ebur128->sample_peaks, SAMPLES); + PRINT_PEAKS("TPK", ebur128->true_peaks, TRUE); + av_log(ctx, ebur128->loglevel, "\n"); } } @@ -764,6 +874,8 @@ static av_cold void uninit(AVFilterContext *ctx) av_freep(&ebur128->y_line_ref); av_freep(&ebur128->ch_weighting); + av_freep(&ebur128->true_peaks); + av_freep(&ebur128->sample_peaks); av_freep(&ebur128->i400.histogram); av_freep(&ebur128->i3000.histogram); for (i = 0; i < ebur128->nb_channels; i++) { @@ -773,6 +885,10 @@ static av_cold void uninit(AVFilterContext *ctx) for (i = 0; i < ctx->nb_outputs; i++) av_freep(&ctx->output_pads[i].name); av_frame_free(&ebur128->outpicref); +#if CONFIG_SWRESAMPLE + av_freep(&ebur128->swr_buf); + swr_free(&ebur128->swr_ctx); +#endif } static const AVFilterPad ebur128_inputs[] = {