avfilter: add LIBVMAF filter

This one changes the previous vmaf patch to libvmaf to keep it separate from the native implementation of vmaf inside ffmpeg later. Signed-off-by: Ashish Singh <ashk43712@gmail.com> Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
2025-08-10 06:10:52 +02:00 · 2017-07-15 22:12:33 +05:30
parent 4de4308d2a
commit 615479d51c
6 changed files with 411 additions and 0 deletions
--- a/1
+++ b/1
@@ -27,6 +27,7 @@ version <next>:
 - additional frame format support for Interplay MVE movies
 - support for decoding through D3D11VA in ffmpeg
 - limiter video filter
 - libvmaf video filter
 version 3.3:
 - CrystalHD decoder moved to new decode API
--- a/5
+++ b/5
@@ -256,6 +256,7 @@ External library support:
  --enable-libtwolame      enable MP2 encoding via libtwolame [no]
  --enable-libv4l2         enable libv4l2/v4l-utils [no]
  --enable-libvidstab      enable video stabilization using vid.stab [no]
  --enable-libvmaf         enable vmaf filter via libvmaf [no]
  --enable-libvo-amrwbenc  enable AMR-WB encoding via libvo-amrwbenc [no]
  --enable-libvorbis       enable Vorbis en/decoding via libvorbis,
                           native implementation exists [no]
@@ -1569,6 +1570,7 @@ EXTERNAL_LIBRARY_LIST="
    libtheora
    libtwolame
    libv4l2
    libvmaf
    libvorbis
    libvpx
    libwavpack
@@ -3186,6 +3188,7 @@ uspp_filter_deps="gpl avcodec"
 vaguedenoiser_filter_deps="gpl"
 vidstabdetect_filter_deps="libvidstab"
 vidstabtransform_filter_deps="libvidstab"
 vmaf_filter_deps="libvmaf"
 zmq_filter_deps="libzmq"
 zoompan_filter_deps="swscale"
 zscale_filter_deps="libzimg"
@@ -5902,6 +5905,8 @@ enabled libtwolame        && require libtwolame twolame.h twolame_init -ltwolame
                               die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; }
 enabled libv4l2           && require_pkg_config libv4l2 libv4l2.h v4l2_ioctl
 enabled libvidstab        && require_pkg_config "vidstab >= 0.98" vid.stab/libvidstab.h vsMotionDetectInit
 enabled libvmaf           && { check_lib libvmaf "libvmaf.h" "compute_vmaf" -lvmaf -lstdc++ -lpthread -lm ||
                               die "ERROR: libvmaf must be installed"; }
 enabled libvo_amrwbenc    && require libvo_amrwbenc vo-amrwbenc/enc_if.h E_IF_init -lvo-amrwbenc
 enabled libvorbis         && require_pkg_config vorbis vorbis/codec.h vorbis_info_init &&
                             require_pkg_config vorbisenc vorbis/vorbisenc.h vorbis_encode_init
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -9639,6 +9639,70 @@ The formula that generates the correction is:
 where @var{r_0} is halve of the image diagonal and @var{r_src} and @var{r_tgt} are the
 distances from the focal point in the source and target images, respectively.
@section libvmaf
 Obtain the average VMAF (Video Multi-Method Assessment Fusion)
 score between two input videos.
 This filter takes two input videos.
 Both video inputs must have the same resolution and pixel format for
 this filter to work correctly. Also it assumes that both inputs
 have the same number of frames, which are compared one by one.
 The obtained average VMAF score is printed through the logging system.
 It requires Netflix's vmaf library (libvmaf) as a pre-requisite.
 After installing the library it can be enabled using:
@code{./configure --enable-libvmaf}.
 If no model path is specified it uses the default model: @code{vmaf_v0.6.1.pkl}.
 On the below examples the input file @file{main.mpg} being processed is
 compared with the reference file @file{ref.mpg}.
 The filter has following options:
@table @option
@item model_path
 Set the model path which is to be used for SVM.
 Default value: @code{"vmaf_v0.6.1.pkl"}
@item log_path
 Set the file path to be used to store logs.
@item log_fmt
 Set the format of the log file (xml or json).
@item enable_transform
 Enables transform for computing vmaf.
@item phone_model
 Invokes the phone model which will generate VMAF scores higher than in the
 regular model, which is more suitable for laptop, TV, etc. viewing conditions.
@item psnr
 Enables computing psnr along with vmaf.
@item ssim
 Enables computing ssim along with vmaf.
@item ms_ssim
 Enables computing ms_ssim along with vmaf.
@item pool
 Set the pool method to be used for computing vmaf.
@end table
 For example:
@example
 ffmpeg -i main.mpg -i ref.mpg -lavfi libvmaf -f null -
@end example
 Example with options:
@example
 ffmpeg -i main.mpg -i ref.mpg -lavfi libvmaf="psnr=1:enable-transform=1" -f null -
@end example
@section limiter
 Limits the pixel components values to the specified range [min, max].
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -216,6 +216,7 @@ OBJS-$(CONFIG_INTERLACE_FILTER)              += vf_interlace.o
 OBJS-$(CONFIG_INTERLEAVE_FILTER)             += f_interleave.o
 OBJS-$(CONFIG_KERNDEINT_FILTER)              += vf_kerndeint.o
 OBJS-$(CONFIG_LENSCORRECTION_FILTER)         += vf_lenscorrection.o
 OBJS-$(CONFIG_LIBVMAF_FILTER)                += vf_libvmaf.o dualinput.o framesync.o
 OBJS-$(CONFIG_LIMITER_FILTER)                += vf_limiter.o
 OBJS-$(CONFIG_LOOP_FILTER)                   += f_loop.o
 OBJS-$(CONFIG_LUMAKEY_FILTER)                += vf_lumakey.o
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -228,6 +228,7 @@ static void register_all(void)
    REGISTER_FILTER(INTERLEAVE,     interleave,     vf);
    REGISTER_FILTER(KERNDEINT,      kerndeint,      vf);
    REGISTER_FILTER(LENSCORRECTION, lenscorrection, vf);
    REGISTER_FILTER(LIBVMAF,        libvmaf,        vf);
    REGISTER_FILTER(LIMITER,        limiter,        vf);
    REGISTER_FILTER(LOOP,           loop,           vf);
    REGISTER_FILTER(LUMAKEY,        lumakey,        vf);
--- a/libavfilter/vf_libvmaf.c
+++ b/libavfilter/vf_libvmaf.c
@@ -0,0 +1,339 @@
 /*
 * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
 * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
 /**
 * @file
 * Calculate the VMAF between two input videos.
 */
 #include <inttypes.h>
 #include <pthread.h>
 #include <libvmaf.h>
 #include "libavutil/avstring.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 #include "avfilter.h"
 #include "dualinput.h"
 #include "drawutils.h"
 #include "formats.h"
 #include "internal.h"
 #include "video.h"
 typedef struct LIBVMAFContext {
    const AVClass *class;
    FFDualInputContext dinput;
    const AVPixFmtDescriptor *desc;
    char *format;
    int width;
    int height;
    double vmaf_score;
    pthread_t vmaf_thread;
    pthread_mutex_t lock;
    pthread_cond_t cond;
    int eof;
    AVFrame *gmain;
    AVFrame *gref;
    int frame_set;
    char *model_path;
    char *log_path;
    char *log_fmt;
    int disable_clip;
    int disable_avx;
    int enable_transform;
    int phone_model;
    int psnr;
    int ssim;
    int ms_ssim;
    char *pool;
 } LIBVMAFContext;
 #define OFFSET(x) offsetof(LIBVMAFContext, x)
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
 static const AVOption libvmaf_options[] = {
    {"model_path",  "Set the model to be used for computing vmaf.",                     OFFSET(model_path), AV_OPT_TYPE_STRING, {.str="/usr/local/share/model/vmaf_v0.6.1.pkl"}, 0, 1, FLAGS},
    {"log_path",  "Set the file path to be used to store logs.",                        OFFSET(log_path), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
    {"log_fmt",  "Set the format of the log (xml or json).",                            OFFSET(log_fmt), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
    {"enable_transform",  "Enables transform for computing vmaf.",                      OFFSET(enable_transform), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
    {"phone_model",  "Invokes the phone model that will generate higher VMAF scores.",  OFFSET(phone_model), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
    {"psnr",  "Enables computing psnr along with vmaf.",                                OFFSET(psnr), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
    {"ssim",  "Enables computing ssim along with vmaf.",                                OFFSET(ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
    {"ms_ssim",  "Enables computing ms-ssim along with vmaf.",                          OFFSET(ms_ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
    {"pool",  "Set the pool method to be used for computing vmaf.",                     OFFSET(pool), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
    { NULL }
 };
 AVFILTER_DEFINE_CLASS(libvmaf);
 #define read_frame_fn(type, bits)                                               \
    static int read_frame_##bits##bit(float *ref_data, float *main_data,            \
                                      float *temp_data, int stride,             \
                                      double *score, void *ctx)                 \
 {                                                                               \
    LIBVMAFContext *s = (LIBVMAFContext *) ctx;                                       \
    int ret;                                                                    \
    \
    pthread_mutex_lock(&s->lock);                                               \
    \
    while (!s->frame_set && !s->eof) {                                          \
        pthread_cond_wait(&s->cond, &s->lock);                                  \
    }                                                                           \
    \
    if (s->frame_set) {                                                         \
        int ref_stride = s->gref->linesize[0];                                  \
        int main_stride = s->gmain->linesize[0];                                \
        \
        const type *ref_ptr = (const type *) s->gref->data[0];                  \
        const type *main_ptr = (const type *) s->gmain->data[0];                \
        \
        float *ptr = ref_data;                                                  \
        \
        int h = s->height;                                                      \
        int w = s->width;                                                       \
        \
        int i,j;                                                                \
        \
        for (i = 0; i < h; i++) {                                               \
            for ( j = 0; j < w; j++) {                                          \
                ptr[j] = (float)ref_ptr[j];                                     \
            }                                                                   \
            ref_ptr += ref_stride / sizeof(*ref_ptr);                           \
            ptr += stride / sizeof(*ptr);                                       \
        }                                                                       \
        \
        ptr = main_data;                                                        \
        \
        for (i = 0; i < h; i++) {                                               \
            for (j = 0; j < w; j++) {                                           \
                ptr[j] = (float)main_ptr[j];                                    \
            }                                                                   \
            main_ptr += main_stride / sizeof(*main_ptr);                        \
            ptr += stride / sizeof(*ptr);                                       \
        }                                                                       \
    }                                                                           \
    \
    ret = !s->frame_set;                                                        \
    \
    s->frame_set = 0;                                                           \
    \
    pthread_cond_signal(&s->cond);                                              \
    pthread_mutex_unlock(&s->lock);                                             \
    \
    if (ret) {                                                                  \
        return 2;                                                               \
    }                                                                           \
    \
    return 0;                                                                   \
 }
 read_frame_fn(uint8_t, 8);
 read_frame_fn(uint16_t, 10);
 static void compute_vmaf_score(LIBVMAFContext *s)
 {
    int (*read_frame)(float *ref_data, float *main_data, float *temp_data,
                      int stride, double *score, void *ctx);
    if (s->desc->comp[0].depth <= 8) {
        read_frame = read_frame_8bit;
    } else {
        read_frame = read_frame_10bit;
    }
    s->vmaf_score = compute_vmaf(s->format, s->width, s->height, read_frame, s,
                                 s->model_path, s->log_path, s->log_fmt, 0, 0,
                                 s->enable_transform, s->phone_model, s->psnr,
                                 s->ssim, s->ms_ssim, s->pool);
 }
 static void *call_vmaf(void *ctx)
 {
    LIBVMAFContext *s = (LIBVMAFContext *) ctx;
    compute_vmaf_score(s);
    av_log(ctx, AV_LOG_INFO, "VMAF score: %f\n",s->vmaf_score);
    pthread_exit(NULL);
 }
 static AVFrame *do_vmaf(AVFilterContext *ctx, AVFrame *main, const AVFrame *ref)
 {
    LIBVMAFContext *s = ctx->priv;
    pthread_mutex_lock(&s->lock);
    while (s->frame_set != 0) {
        pthread_cond_wait(&s->cond, &s->lock);
    }
    av_frame_ref(s->gref, ref);
    av_frame_ref(s->gmain, main);
    s->frame_set = 1;
    pthread_cond_signal(&s->cond);
    pthread_mutex_unlock(&s->lock);
    return main;
 }
 static av_cold int init(AVFilterContext *ctx)
 {
    LIBVMAFContext *s = ctx->priv;
    s->gref = av_frame_alloc();
    s->gmain = av_frame_alloc();
    pthread_mutex_init(&s->lock, NULL);
    pthread_cond_init (&s->cond, NULL);
    s->dinput.process = do_vmaf;
    return 0;
 }
 static int query_formats(AVFilterContext *ctx)
 {
    static const enum AVPixelFormat pix_fmts[] = {
        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P,
        AV_PIX_FMT_YUV444P10LE, AV_PIX_FMT_YUV422P10LE, AV_PIX_FMT_YUV420P10LE,
        AV_PIX_FMT_NONE
    };
    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
    if (!fmts_list)
        return AVERROR(ENOMEM);
    return ff_set_common_formats(ctx, fmts_list);
 }
 static int config_input_ref(AVFilterLink *inlink)
 {
    AVFilterContext *ctx  = inlink->dst;
    LIBVMAFContext *s = ctx->priv;
    int th;
    if (ctx->inputs[0]->w != ctx->inputs[1]->w ||
        ctx->inputs[0]->h != ctx->inputs[1]->h) {
        av_log(ctx, AV_LOG_ERROR, "Width and height of input videos must be same.\n");
        return AVERROR(EINVAL);
    }
    if (ctx->inputs[0]->format != ctx->inputs[1]->format) {
        av_log(ctx, AV_LOG_ERROR, "Inputs must be of same pixel format.\n");
        return AVERROR(EINVAL);
    }
    s->desc = av_pix_fmt_desc_get(inlink->format);
    s->width = ctx->inputs[0]->w;
    s->height = ctx->inputs[0]->h;
    th = pthread_create(&s->vmaf_thread, NULL, call_vmaf, (void *) s);
    if (th) {
        av_log(ctx, AV_LOG_ERROR, "Thread creation failed.\n");
        return AVERROR(EINVAL);
    }
    return 0;
 }
 static int config_output(AVFilterLink *outlink)
 {
    AVFilterContext *ctx = outlink->src;
    LIBVMAFContext *s = ctx->priv;
    AVFilterLink *mainlink = ctx->inputs[0];
    int ret;
    outlink->w = mainlink->w;
    outlink->h = mainlink->h;
    outlink->time_base = mainlink->time_base;
    outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio;
    outlink->frame_rate = mainlink->frame_rate;
    if ((ret = ff_dualinput_init(ctx, &s->dinput)) < 0)
        return ret;
    return 0;
 }
 static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref)
 {
    LIBVMAFContext *s = inlink->dst->priv;
    return ff_dualinput_filter_frame(&s->dinput, inlink, inpicref);
 }
 static int request_frame(AVFilterLink *outlink)
 {
    LIBVMAFContext *s = outlink->src->priv;
    return ff_dualinput_request_frame(&s->dinput, outlink);
 }
 static av_cold void uninit(AVFilterContext *ctx)
 {
    LIBVMAFContext *s = ctx->priv;
    ff_dualinput_uninit(&s->dinput);
    pthread_mutex_lock(&s->lock);
    s->eof = 1;
    pthread_cond_signal(&s->cond);
    pthread_mutex_unlock(&s->lock);
    pthread_join(s->vmaf_thread, NULL);
    av_frame_free(&s->gref);
    av_frame_free(&s->gmain);
    pthread_mutex_destroy(&s->lock);
    pthread_cond_destroy(&s->cond);
 }
 static const AVFilterPad libvmaf_inputs[] = {
    {
        .name         = "main",
        .type         = AVMEDIA_TYPE_VIDEO,
        .filter_frame = filter_frame,
    },{
        .name         = "reference",
        .type         = AVMEDIA_TYPE_VIDEO,
        .filter_frame = filter_frame,
        .config_props = config_input_ref,
    },
    { NULL }
 };
 static const AVFilterPad libvmaf_outputs[] = {
    {
        .name          = "default",
        .type          = AVMEDIA_TYPE_VIDEO,
        .config_props  = config_output,
        .request_frame = request_frame,
    },
    { NULL }
 };
 AVFilter ff_vf_libvmaf = {
    .name          = "libvmaf",
    .description   = NULL_IF_CONFIG_SMALL("Calculate the VMAF between two video streams."),
    .init          = init,
    .uninit        = uninit,
    .query_formats = query_formats,
    .priv_size     = sizeof(LIBVMAFContext),
    .priv_class    = &libvmaf_class,
    .inputs        = libvmaf_inputs,
    .outputs       = libvmaf_outputs,
 };