avfilter: add normalize filter

2025-08-10 06:10:52 +02:00 · 2017-11-21 21:32:06 +11:00
parent 279d2599dd
commit 7d4fe0c5cb
6 changed files with 470 additions and 1 deletions
--- a/1
+++ b/1
@@ -19,6 +19,7 @@ version <next>:
 - acontrast audio filter
 - OpenCL overlay filter
 - video mix filter
+- video normalize filter


 version 3.4:
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -10867,6 +10867,86 @@ Add temporal and uniform noise to input video:
 noise=alls=20:allf=t+u
@end example

+@section normalize
+
+Normalize RGB video (aka histogram stretching, contrast stretching).
+See: https://en.wikipedia.org/wiki/Normalization_(image_processing)
+
+For each channel of each frame, the filter computes the input range and maps
+it linearly to the user-specified output range. The output range defaults
+to the full dynamic range from pure black to pure white.
+
+Temporal smoothing can be used on the input range to reduce flickering (rapid
+changes in brightness) caused when small dark or bright objects enter or leave
+the scene. This is similar to the auto-exposure (automatic gain control) on a
+video camera, and, like a video camera, it may cause a period of over- or
+under-exposure of the video.
+
+The R,G,B channels can be normalized independently, which may cause some
+color shifting, or linked together as a single channel, which prevents
+color shifting. Linked normalization preserves hue. Independent normalization
+does not, so it can be used to remove some color casts. Independent and linked
+normalization can be combined in any ratio.
+
+The normalize filter accepts the following options:
+
+@table @option
+@item blackpt
+@item whitept
+Colors which define the output range. The minimum input value is mapped to
+the @var{blackpt}. The maximum input value is mapped to the @var{whitept}.
+The defaults are black and white respectively. Specifying white for
+@var{blackpt} and black for @var{whitept} will give color-inverted,
+normalized video. Shades of grey can be used to reduce the dynamic range
+(contrast). Specifying saturated colors here can create some interesting
+effects.
+
+@item smoothing
+The number of previous frames to use for temporal smoothing. The input range
+of each channel is smoothed using a rolling average over the current frame
+and the @var{smoothing} previous frames. The default is 0 (no temporal
+smoothing).
+
+@item independence
+Controls the ratio of independent (color shifting) channel normalization to
+linked (color preserving) normalization. 0.0 is fully linked, 1.0 is fully
+independent. Defaults to 1.0 (fully independent).
+
+@item strength
+Overall strength of the filter. 1.0 is full strength. 0.0 is a rather
+expensive no-op. Defaults to 1.0 (full strength).
+
+@end table
+
+@subsection Examples
+
+Stretch video contrast to use the full dynamic range, with no temporal
+smoothing; may flicker depending on the source content:
+@example
+normalize=blackpt=black:whitept=white:smoothing=0
+@end example
+
+As above, but with 50 frames of temporal smoothing; flicker should be
+reduced, depending on the source content:
+@example
+normalize=blackpt=black:whitept=white:smoothing=50
+@end example
+
+As above, but with hue-preserving linked channel normalization:
+@example
+normalize=blackpt=black:whitept=white:smoothing=50:independence=0
+@end example
+
+As above, but with half strength:
+@example
+normalize=blackpt=black:whitept=white:smoothing=50:independence=0:strength=0.5
+@end example
+
+Map the darkest input color to red, the brightest input color to cyan:
+@example
+normalize=blackpt=red:whitept=cyan
+@end example
+
@section null

 Pass the video source unchanged to the output.
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -247,6 +247,7 @@ OBJS-$(CONFIG_NLMEANS_FILTER)                += vf_nlmeans.o
 OBJS-$(CONFIG_NNEDI_FILTER)                  += vf_nnedi.o
 OBJS-$(CONFIG_NOFORMAT_FILTER)               += vf_format.o
 OBJS-$(CONFIG_NOISE_FILTER)                  += vf_noise.o
+OBJS-$(CONFIG_NORMALIZE_FILTER)              += vf_normalize.o
 OBJS-$(CONFIG_NULL_FILTER)                   += vf_null.o
 OBJS-$(CONFIG_OCR_FILTER)                    += vf_ocr.o
 OBJS-$(CONFIG_OCV_FILTER)                    += vf_libopencv.o
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -257,6 +257,7 @@ static void register_all(void)
    REGISTER_FILTER(NNEDI,          nnedi,          vf);
    REGISTER_FILTER(NOFORMAT,       noformat,       vf);
    REGISTER_FILTER(NOISE,          noise,          vf);
+    REGISTER_FILTER(NORMALIZE,      normalize,      vf);
    REGISTER_FILTER(NULL,           null,           vf);
    REGISTER_FILTER(OCR,            ocr,            vf);
    REGISTER_FILTER(OCV,            ocv,            vf);
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -30,7 +30,7 @@
 #include "libavutil/version.h"

 #define LIBAVFILTER_VERSION_MAJOR   7
-#define LIBAVFILTER_VERSION_MINOR   3
+#define LIBAVFILTER_VERSION_MINOR   4
 #define LIBAVFILTER_VERSION_MICRO 100

 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
--- a/libavfilter/vf_normalize.c
+++ b/libavfilter/vf_normalize.c
@@ -0,0 +1,386 @@
+/*
+ * Copyright (c) 2017 Richard Ling
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * Normalize RGB video (aka histogram stretching, contrast stretching).
+ * See: https://en.wikipedia.org/wiki/Normalization_(image_processing)
+ *
+ * For each channel of each frame, the filter computes the input range and maps
+ * it linearly to the user-specified output range. The output range defaults
+ * to the full dynamic range from pure black to pure white.
+ *
+ * Naively maximising the dynamic range of each frame of video in isolation
+ * may cause flickering (rapid changes in brightness of static objects in the
+ * scene) when small dark or bright objects enter or leave the scene. This
+ * filter can apply temporal smoothing to the input range to reduce flickering.
+ * Temporal smoothing is similar to the auto-exposure (automatic gain control)
+ * on a video camera, which performs the same function; and, like a video
+ * camera, it may cause a period of over- or under-exposure of the video.
+ *
+ * The filter can normalize the R,G,B channels independently, which may cause
+ * color shifting, or link them together as a single channel, which prevents
+ * color shifting. More precisely, linked normalization preserves hue (as it's
+ * defined in HSV/HSL color spaces) while independent normalization does not.
+ * Independent normalization can be used to remove color casts, such as the
+ * blue cast from underwater video, restoring more natural colors. The filter
+ * can also combine independent and linked normalization in any ratio.
+ *
+ * Finally the overall strength of the filter can be adjusted, from no effect
+ * to full normalization.
+ *
+ * The 5 AVOptions are:
+ *   blackpt,   Colors which define the output range. The minimum input value
+ *   whitept    is mapped to the blackpt. The maximum input value is mapped to
+ *              the whitept. The defaults are black and white respectively.
+ *              Specifying white for blackpt and black for whitept will give
+ *              color-inverted, normalized video. Shades of grey can be used
+ *              to reduce the dynamic range (contrast). Specifying saturated
+ *              colors here can create some interesting effects.
+ *
+ *   smoothing  The amount of temporal smoothing, expressed in frames (>=0).
+ *              the minimum and maximum input values of each channel are
+ *              smoothed using a rolling average over the current frame and
+ *              that many previous frames of video.  Defaults to 0 (no temporal
+ *              smoothing).
+ *
+ *   independence
+ *              Controls the ratio of independent (color shifting) channel
+ *              normalization to linked (color preserving) normalization. 0.0
+ *              is fully linked, 1.0 is fully independent. Defaults to fully
+ *              independent.
+ *
+ *   strength   Overall strength of the filter. 1.0 is full strength. 0.0 is
+ *              a rather expensive no-op. Values in between can give a gentle
+ *              boost to low-contrast video without creating an artificial
+ *              over-processed look. The default is full strength.
+ */
+
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+typedef struct NormalizeContext {
+    const AVClass *class;
+
+    // Storage for the corresponding AVOptions
+    uint8_t blackpt[4];
+    uint8_t whitept[4];
+    int smoothing;
+    float independence;
+    float strength;
+
+    int co[4];          // Offsets to R,G,B,A bytes respectively in each pixel
+    int num_components; // Number of components in the pixel format
+    int history_len;    // Number of frames to average; based on smoothing factor
+    int frame_num;      // Increments on each frame, starting from 0.
+
+    // Per-extremum, per-channel history, for temporal smoothing.
+    struct {
+        uint8_t *history;       // History entries.
+        uint32_t history_sum;   // Sum of history entries.
+    } min[3], max[3];           // Min and max for each channel in {R,G,B}.
+    uint8_t *history_mem;       // Single allocation for above history entries
+
+} NormalizeContext;
+
+#define OFFSET(x) offsetof(NormalizeContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption normalize_options[] = {
+    { "blackpt",  "output color to which darkest input color is mapped",  OFFSET(blackpt), AV_OPT_TYPE_COLOR, { .str = "black" }, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "whitept",  "output color to which brightest input color is mapped",  OFFSET(whitept), AV_OPT_TYPE_COLOR, { .str = "white" }, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "smoothing",  "amount of temporal smoothing of the input range, to reduce flicker", OFFSET(smoothing), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX/8, FLAGS },
+    { "independence", "proportion of independent to linked channel normalization", OFFSET(independence), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, 1.0, FLAGS },
+    { "strength", "strength of filter, from no effect to full normalization", OFFSET(strength), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, 1.0, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(normalize);
+
+// This function is the main guts of the filter. Normalizes the input frame
+// into the output frame. The frames are known to have the same dimensions
+// and pixel format.
+static void normalize(NormalizeContext *s, AVFrame *in, AVFrame *out)
+{
+    // Per-extremum, per-channel local variables.
+    struct {
+        uint8_t in;     // Original input byte value for this frame.
+        float smoothed; // Smoothed input value [0,255].
+        float out;      // Output value [0,255].
+    } min[3], max[3];   // Min and max for each channel in {R,G,B}.
+
+    float rgb_min_smoothed; // Min input range for linked normalization
+    float rgb_max_smoothed; // Max input range for linked normalization
+    uint8_t lut[3][256];    // Lookup table
+    int x, y, c;
+
+    // First, scan the input frame to find, for each channel, the minimum
+    // (min.in) and maximum (max.in) values present in the channel.
+    for (c = 0; c < 3; c++)
+        min[c].in = max[c].in = in->data[0][s->co[c]];
+    for (y = 0; y < in->height; y++) {
+        uint8_t *inp = in->data[0] + y * in->linesize[0];
+        uint8_t *outp = out->data[0] + y * out->linesize[0];
+        for (x = 0; x < in->width; x++) {
+            for (c = 0; c < 3; c++) {
+                min[c].in = FFMIN(min[c].in, inp[s->co[c]]);
+                max[c].in = FFMAX(max[c].in, inp[s->co[c]]);
+            }
+            inp += s->num_components;
+            outp += s->num_components;
+        }
+    }
+
+    // Next, for each channel, push min.in and max.in into their respective
+    // histories, to determine the min.smoothed and max.smoothed for this frame.
+    {
+        int history_idx = s->frame_num % s->history_len;
+        // Assume the history is not yet full; num_history_vals is the number
+        // of frames received so far including the current frame.
+        int num_history_vals = s->frame_num + 1;
+        if (s->frame_num >= s->history_len) {
+            //The history is full; drop oldest value and cap num_history_vals.
+            for (c = 0; c < 3; c++) {
+                s->min[c].history_sum -= s->min[c].history[history_idx];
+                s->max[c].history_sum -= s->max[c].history[history_idx];
+            }
+            num_history_vals = s->history_len;
+        }
+        // For each extremum, update history_sum and calculate smoothed value
+        // as the rolling average of the history entries.
+        for (c = 0; c < 3; c++) {
+            s->min[c].history_sum += (s->min[c].history[history_idx] = min[c].in);
+            min[c].smoothed = s->min[c].history_sum / (float)num_history_vals;
+            s->max[c].history_sum += (s->max[c].history[history_idx] = max[c].in);
+            max[c].smoothed = s->max[c].history_sum / (float)num_history_vals;
+        }
+    }
+
+    // Determine the input range for linked normalization. This is simply the
+    // minimum of the per-channel minimums, and the maximum of the per-channel
+    // maximums.
+    rgb_min_smoothed = FFMIN3(min[0].smoothed, min[1].smoothed, min[2].smoothed);
+    rgb_max_smoothed = FFMAX3(max[0].smoothed, max[1].smoothed, max[2].smoothed);
+
+    // Now, process each channel to determine the input and output range and
+    // build the lookup tables.
+    for (c = 0; c < 3; c++) {
+        int in_val;
+        // Adjust the input range for this channel [min.smoothed,max.smoothed]
+        // by mixing in the correct proportion of the linked normalization
+        // input range [rgb_min_smoothed,rgb_max_smoothed].
+        min[c].smoothed = (min[c].smoothed  *         s->independence)
+                        + (rgb_min_smoothed * (1.0f - s->independence));
+        max[c].smoothed = (max[c].smoothed  *         s->independence)
+                        + (rgb_max_smoothed * (1.0f - s->independence));
+
+        // Calculate the output range [min.out,max.out] as a ratio of the full-
+        // strength output range [blackpt,whitept] and the original input range
+        // [min.in,max.in], based on the user-specified filter strength.
+        min[c].out = (s->blackpt[c] *         s->strength)
+                   + (min[c].in     * (1.0f - s->strength));
+        max[c].out = (s->whitept[c] *         s->strength)
+                   + (max[c].in     * (1.0f - s->strength));
+
+        // Now, build a lookup table which linearly maps the adjusted input range
+        // [min.smoothed,max.smoothed] to the output range [min.out,max.out].
+        // Perform the linear interpolation for each x:
+        //     lut[x] = (int)(float(x - min.smoothed) * scale + max.out + 0.5)
+        // where scale = (max.out - min.out) / (max.smoothed - min.smoothed)
+        if (min[c].smoothed == max[c].smoothed) {
+            // There is no dynamic range to expand. No mapping for this channel.
+            for (in_val = min[c].in; in_val <= max[c].in; in_val++)
+                lut[c][in_val] = min[c].out;
+        } else {
+            // We must set lookup values for all values in the original input
+            // range [min.in,max.in]. Since the original input range may be
+            // larger than [min.smoothed,max.smoothed], some output values may
+            // fall outside the [0,255] dynamic range. We need to clamp them.
+            float scale = (max[c].out - min[c].out) / (max[c].smoothed - min[c].smoothed);
+            for (in_val = min[c].in; in_val <= max[c].in; in_val++) {
+                int out_val = (in_val - min[c].smoothed) * scale + min[c].out + 0.5f;
+                out_val = FFMAX(out_val, 0);
+                out_val = FFMIN(out_val, 255);
+                lut[c][in_val] = out_val;
+            }
+        }
+    }
+
+    // Finally, process the pixels of the input frame using the lookup tables.
+    for (y = 0; y < in->height; y++) {
+        uint8_t *inp = in->data[0] + y * in->linesize[0];
+        uint8_t *outp = out->data[0] + y * out->linesize[0];
+        for (x = 0; x < in->width; x++) {
+            for (c = 0; c < 3; c++)
+                outp[s->co[c]] = lut[c][inp[s->co[c]]];
+            if (s->num_components == 4)
+                // Copy alpha as-is.
+                outp[s->co[3]] = inp[s->co[3]];
+            inp += s->num_components;
+            outp += s->num_components;
+        }
+    }
+
+    s->frame_num++;
+}
+
+// Now we define all the functions accessible from the ff_vf_normalize class,
+// which is ffmpeg's interface to our filter.  See doc/filter_design.txt and
+// doc/writing_filters.txt for descriptions of what these interface functions
+// are expected to do.
+
+// Set the pixel formats that our filter supports. We should be able to process
+// any 8-bit RGB formats. 16-bit support might be useful one day.
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pixel_fmts[] = {
+        AV_PIX_FMT_RGB24,
+        AV_PIX_FMT_BGR24,
+        AV_PIX_FMT_ARGB,
+        AV_PIX_FMT_RGBA,
+        AV_PIX_FMT_ABGR,
+        AV_PIX_FMT_BGRA,
+        AV_PIX_FMT_0RGB,
+        AV_PIX_FMT_RGB0,
+        AV_PIX_FMT_0BGR,
+        AV_PIX_FMT_BGR0,
+        AV_PIX_FMT_NONE
+    };
+    // According to filter_design.txt, using ff_set_common_formats() this way
+    // ensures the pixel formats of the input and output will be the same. That
+    // saves a bit of effort possibly needing to handle format conversions.
+    AVFilterFormats *formats = ff_make_format_list(pixel_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, formats);
+}
+
+// At this point we know the pixel format used for both input and output.  We
+// can also access the frame rate of the input video and allocate some memory
+// appropriately
+static int config_input(AVFilterLink *inlink)
+{
+    NormalizeContext *s = inlink->dst->priv;
+    // Store offsets to R,G,B,A bytes respectively in each pixel
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    int c;
+
+    for (c = 0; c < 4; ++c)
+        s->co[c] = desc->comp[c].offset;
+    s->num_components = desc->nb_components;
+    // Convert smoothing value to history_len (a count of frames to average,
+    // must be at least 1).  Currently this is a direct assignment, but the
+    // smoothing value was originally envisaged as a number of seconds.  In
+    // future it would be nice to set history_len using a number of seconds,
+    // but VFR video is currently an obstacle to doing so.
+    s->history_len = s->smoothing + 1;
+    // Allocate the history buffers -- there are 6 -- one for each extrema.
+    // s->smoothing is limited to INT_MAX/8, so that (s->history_len * 6)
+    // can't overflow on 32bit causing a too-small allocation.
+    s->history_mem = av_malloc(s->history_len * 6);
+    if (s->history_mem == NULL)
+        return AVERROR(ENOMEM);
+
+    for (c = 0; c < 3; c++) {
+        s->min[c].history = s->history_mem + (c*2)   * s->history_len;
+        s->max[c].history = s->history_mem + (c*2+1) * s->history_len;
+    }
+    return 0;
+}
+
+// Free any memory allocations here
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    NormalizeContext *s = ctx->priv;
+
+    av_freep(&s->history_mem);
+}
+
+// This function is pretty much standard from doc/writing_filters.txt.  It
+// tries to do in-place filtering where possible, only allocating a new output
+// frame when absolutely necessary.
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    NormalizeContext *s = ctx->priv;
+    AVFrame *out;
+    // Set 'direct' if we can modify the input frame in-place.  Otherwise we
+    // need to retrieve a new frame from the output link.
+    int direct = av_frame_is_writable(in) && !ctx->is_disabled;
+
+    if (direct) {
+        out = in;
+    } else {
+        out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+        if (!out) {
+            av_frame_free(&in);
+            return AVERROR(ENOMEM);
+        }
+        av_frame_copy_props(out, in);
+    }
+
+    // Now we've got the input and output frames (which may be the same frame)
+    // perform the filtering with our custom function.
+    normalize(s, in, out);
+
+    if (ctx->is_disabled) {
+        av_frame_free(&out);
+        return ff_filter_frame(outlink, in);
+    }
+
+    if (!direct)
+        av_frame_free(&in);
+
+    return ff_filter_frame(outlink, out);
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+        .config_props = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_normalize = {
+    .name          = "normalize",
+    .description   = NULL_IF_CONFIG_SMALL("Normalize RGB video."),
+    .priv_size     = sizeof(NormalizeContext),
+    .priv_class    = &normalize_class,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = inputs,
+    .outputs       = outputs,
+};