1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-12-23 12:43:46 +02:00

avfilter: add LIBVMAF filter

This one changes the previous vmaf patch to libvmaf to keep it separate from the
native implementation of vmaf inside ffmpeg later.

Signed-off-by: Ashish Singh <ashk43712@gmail.com>
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
This commit is contained in:
Ashish Singh 2017-07-15 22:12:33 +05:30 committed by Ronald S. Bultje
parent 4de4308d2a
commit 615479d51c
6 changed files with 411 additions and 0 deletions

View File

@ -27,6 +27,7 @@ version <next>:
- additional frame format support for Interplay MVE movies - additional frame format support for Interplay MVE movies
- support for decoding through D3D11VA in ffmpeg - support for decoding through D3D11VA in ffmpeg
- limiter video filter - limiter video filter
- libvmaf video filter
version 3.3: version 3.3:
- CrystalHD decoder moved to new decode API - CrystalHD decoder moved to new decode API

5
configure vendored
View File

@ -256,6 +256,7 @@ External library support:
--enable-libtwolame enable MP2 encoding via libtwolame [no] --enable-libtwolame enable MP2 encoding via libtwolame [no]
--enable-libv4l2 enable libv4l2/v4l-utils [no] --enable-libv4l2 enable libv4l2/v4l-utils [no]
--enable-libvidstab enable video stabilization using vid.stab [no] --enable-libvidstab enable video stabilization using vid.stab [no]
--enable-libvmaf enable vmaf filter via libvmaf [no]
--enable-libvo-amrwbenc enable AMR-WB encoding via libvo-amrwbenc [no] --enable-libvo-amrwbenc enable AMR-WB encoding via libvo-amrwbenc [no]
--enable-libvorbis enable Vorbis en/decoding via libvorbis, --enable-libvorbis enable Vorbis en/decoding via libvorbis,
native implementation exists [no] native implementation exists [no]
@ -1569,6 +1570,7 @@ EXTERNAL_LIBRARY_LIST="
libtheora libtheora
libtwolame libtwolame
libv4l2 libv4l2
libvmaf
libvorbis libvorbis
libvpx libvpx
libwavpack libwavpack
@ -3186,6 +3188,7 @@ uspp_filter_deps="gpl avcodec"
vaguedenoiser_filter_deps="gpl" vaguedenoiser_filter_deps="gpl"
vidstabdetect_filter_deps="libvidstab" vidstabdetect_filter_deps="libvidstab"
vidstabtransform_filter_deps="libvidstab" vidstabtransform_filter_deps="libvidstab"
vmaf_filter_deps="libvmaf"
zmq_filter_deps="libzmq" zmq_filter_deps="libzmq"
zoompan_filter_deps="swscale" zoompan_filter_deps="swscale"
zscale_filter_deps="libzimg" zscale_filter_deps="libzimg"
@ -5902,6 +5905,8 @@ enabled libtwolame && require libtwolame twolame.h twolame_init -ltwolame
die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; } die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; }
enabled libv4l2 && require_pkg_config libv4l2 libv4l2.h v4l2_ioctl enabled libv4l2 && require_pkg_config libv4l2 libv4l2.h v4l2_ioctl
enabled libvidstab && require_pkg_config "vidstab >= 0.98" vid.stab/libvidstab.h vsMotionDetectInit enabled libvidstab && require_pkg_config "vidstab >= 0.98" vid.stab/libvidstab.h vsMotionDetectInit
enabled libvmaf && { check_lib libvmaf "libvmaf.h" "compute_vmaf" -lvmaf -lstdc++ -lpthread -lm ||
die "ERROR: libvmaf must be installed"; }
enabled libvo_amrwbenc && require libvo_amrwbenc vo-amrwbenc/enc_if.h E_IF_init -lvo-amrwbenc enabled libvo_amrwbenc && require libvo_amrwbenc vo-amrwbenc/enc_if.h E_IF_init -lvo-amrwbenc
enabled libvorbis && require_pkg_config vorbis vorbis/codec.h vorbis_info_init && enabled libvorbis && require_pkg_config vorbis vorbis/codec.h vorbis_info_init &&
require_pkg_config vorbisenc vorbis/vorbisenc.h vorbis_encode_init require_pkg_config vorbisenc vorbis/vorbisenc.h vorbis_encode_init

View File

@ -9639,6 +9639,70 @@ The formula that generates the correction is:
where @var{r_0} is halve of the image diagonal and @var{r_src} and @var{r_tgt} are the where @var{r_0} is halve of the image diagonal and @var{r_src} and @var{r_tgt} are the
distances from the focal point in the source and target images, respectively. distances from the focal point in the source and target images, respectively.
@section libvmaf
Obtain the average VMAF (Video Multi-Method Assessment Fusion)
score between two input videos.
This filter takes two input videos.
Both video inputs must have the same resolution and pixel format for
this filter to work correctly. Also it assumes that both inputs
have the same number of frames, which are compared one by one.
The obtained average VMAF score is printed through the logging system.
It requires Netflix's vmaf library (libvmaf) as a pre-requisite.
After installing the library it can be enabled using:
@code{./configure --enable-libvmaf}.
If no model path is specified it uses the default model: @code{vmaf_v0.6.1.pkl}.
On the below examples the input file @file{main.mpg} being processed is
compared with the reference file @file{ref.mpg}.
The filter has following options:
@table @option
@item model_path
Set the model path which is to be used for SVM.
Default value: @code{"vmaf_v0.6.1.pkl"}
@item log_path
Set the file path to be used to store logs.
@item log_fmt
Set the format of the log file (xml or json).
@item enable_transform
Enables transform for computing vmaf.
@item phone_model
Invokes the phone model which will generate VMAF scores higher than in the
regular model, which is more suitable for laptop, TV, etc. viewing conditions.
@item psnr
Enables computing psnr along with vmaf.
@item ssim
Enables computing ssim along with vmaf.
@item ms_ssim
Enables computing ms_ssim along with vmaf.
@item pool
Set the pool method to be used for computing vmaf.
@end table
For example:
@example
ffmpeg -i main.mpg -i ref.mpg -lavfi libvmaf -f null -
@end example
Example with options:
@example
ffmpeg -i main.mpg -i ref.mpg -lavfi libvmaf="psnr=1:enable-transform=1" -f null -
@end example
@section limiter @section limiter
Limits the pixel components values to the specified range [min, max]. Limits the pixel components values to the specified range [min, max].

View File

@ -216,6 +216,7 @@ OBJS-$(CONFIG_INTERLACE_FILTER) += vf_interlace.o
OBJS-$(CONFIG_INTERLEAVE_FILTER) += f_interleave.o OBJS-$(CONFIG_INTERLEAVE_FILTER) += f_interleave.o
OBJS-$(CONFIG_KERNDEINT_FILTER) += vf_kerndeint.o OBJS-$(CONFIG_KERNDEINT_FILTER) += vf_kerndeint.o
OBJS-$(CONFIG_LENSCORRECTION_FILTER) += vf_lenscorrection.o OBJS-$(CONFIG_LENSCORRECTION_FILTER) += vf_lenscorrection.o
OBJS-$(CONFIG_LIBVMAF_FILTER) += vf_libvmaf.o dualinput.o framesync.o
OBJS-$(CONFIG_LIMITER_FILTER) += vf_limiter.o OBJS-$(CONFIG_LIMITER_FILTER) += vf_limiter.o
OBJS-$(CONFIG_LOOP_FILTER) += f_loop.o OBJS-$(CONFIG_LOOP_FILTER) += f_loop.o
OBJS-$(CONFIG_LUMAKEY_FILTER) += vf_lumakey.o OBJS-$(CONFIG_LUMAKEY_FILTER) += vf_lumakey.o

View File

@ -228,6 +228,7 @@ static void register_all(void)
REGISTER_FILTER(INTERLEAVE, interleave, vf); REGISTER_FILTER(INTERLEAVE, interleave, vf);
REGISTER_FILTER(KERNDEINT, kerndeint, vf); REGISTER_FILTER(KERNDEINT, kerndeint, vf);
REGISTER_FILTER(LENSCORRECTION, lenscorrection, vf); REGISTER_FILTER(LENSCORRECTION, lenscorrection, vf);
REGISTER_FILTER(LIBVMAF, libvmaf, vf);
REGISTER_FILTER(LIMITER, limiter, vf); REGISTER_FILTER(LIMITER, limiter, vf);
REGISTER_FILTER(LOOP, loop, vf); REGISTER_FILTER(LOOP, loop, vf);
REGISTER_FILTER(LUMAKEY, lumakey, vf); REGISTER_FILTER(LUMAKEY, lumakey, vf);

339
libavfilter/vf_libvmaf.c Normal file
View File

@ -0,0 +1,339 @@
/*
* Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
* Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* Calculate the VMAF between two input videos.
*/
#include <inttypes.h>
#include <pthread.h>
#include <libvmaf.h>
#include "libavutil/avstring.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
#include "avfilter.h"
#include "dualinput.h"
#include "drawutils.h"
#include "formats.h"
#include "internal.h"
#include "video.h"
typedef struct LIBVMAFContext {
const AVClass *class;
FFDualInputContext dinput;
const AVPixFmtDescriptor *desc;
char *format;
int width;
int height;
double vmaf_score;
pthread_t vmaf_thread;
pthread_mutex_t lock;
pthread_cond_t cond;
int eof;
AVFrame *gmain;
AVFrame *gref;
int frame_set;
char *model_path;
char *log_path;
char *log_fmt;
int disable_clip;
int disable_avx;
int enable_transform;
int phone_model;
int psnr;
int ssim;
int ms_ssim;
char *pool;
} LIBVMAFContext;
#define OFFSET(x) offsetof(LIBVMAFContext, x)
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
static const AVOption libvmaf_options[] = {
{"model_path", "Set the model to be used for computing vmaf.", OFFSET(model_path), AV_OPT_TYPE_STRING, {.str="/usr/local/share/model/vmaf_v0.6.1.pkl"}, 0, 1, FLAGS},
{"log_path", "Set the file path to be used to store logs.", OFFSET(log_path), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
{"log_fmt", "Set the format of the log (xml or json).", OFFSET(log_fmt), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
{"enable_transform", "Enables transform for computing vmaf.", OFFSET(enable_transform), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
{"phone_model", "Invokes the phone model that will generate higher VMAF scores.", OFFSET(phone_model), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
{"psnr", "Enables computing psnr along with vmaf.", OFFSET(psnr), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
{"ssim", "Enables computing ssim along with vmaf.", OFFSET(ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
{"ms_ssim", "Enables computing ms-ssim along with vmaf.", OFFSET(ms_ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
{"pool", "Set the pool method to be used for computing vmaf.", OFFSET(pool), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
{ NULL }
};
AVFILTER_DEFINE_CLASS(libvmaf);
#define read_frame_fn(type, bits) \
static int read_frame_##bits##bit(float *ref_data, float *main_data, \
float *temp_data, int stride, \
double *score, void *ctx) \
{ \
LIBVMAFContext *s = (LIBVMAFContext *) ctx; \
int ret; \
\
pthread_mutex_lock(&s->lock); \
\
while (!s->frame_set && !s->eof) { \
pthread_cond_wait(&s->cond, &s->lock); \
} \
\
if (s->frame_set) { \
int ref_stride = s->gref->linesize[0]; \
int main_stride = s->gmain->linesize[0]; \
\
const type *ref_ptr = (const type *) s->gref->data[0]; \
const type *main_ptr = (const type *) s->gmain->data[0]; \
\
float *ptr = ref_data; \
\
int h = s->height; \
int w = s->width; \
\
int i,j; \
\
for (i = 0; i < h; i++) { \
for ( j = 0; j < w; j++) { \
ptr[j] = (float)ref_ptr[j]; \
} \
ref_ptr += ref_stride / sizeof(*ref_ptr); \
ptr += stride / sizeof(*ptr); \
} \
\
ptr = main_data; \
\
for (i = 0; i < h; i++) { \
for (j = 0; j < w; j++) { \
ptr[j] = (float)main_ptr[j]; \
} \
main_ptr += main_stride / sizeof(*main_ptr); \
ptr += stride / sizeof(*ptr); \
} \
} \
\
ret = !s->frame_set; \
\
s->frame_set = 0; \
\
pthread_cond_signal(&s->cond); \
pthread_mutex_unlock(&s->lock); \
\
if (ret) { \
return 2; \
} \
\
return 0; \
}
read_frame_fn(uint8_t, 8);
read_frame_fn(uint16_t, 10);
static void compute_vmaf_score(LIBVMAFContext *s)
{
int (*read_frame)(float *ref_data, float *main_data, float *temp_data,
int stride, double *score, void *ctx);
if (s->desc->comp[0].depth <= 8) {
read_frame = read_frame_8bit;
} else {
read_frame = read_frame_10bit;
}
s->vmaf_score = compute_vmaf(s->format, s->width, s->height, read_frame, s,
s->model_path, s->log_path, s->log_fmt, 0, 0,
s->enable_transform, s->phone_model, s->psnr,
s->ssim, s->ms_ssim, s->pool);
}
static void *call_vmaf(void *ctx)
{
LIBVMAFContext *s = (LIBVMAFContext *) ctx;
compute_vmaf_score(s);
av_log(ctx, AV_LOG_INFO, "VMAF score: %f\n",s->vmaf_score);
pthread_exit(NULL);
}
static AVFrame *do_vmaf(AVFilterContext *ctx, AVFrame *main, const AVFrame *ref)
{
LIBVMAFContext *s = ctx->priv;
pthread_mutex_lock(&s->lock);
while (s->frame_set != 0) {
pthread_cond_wait(&s->cond, &s->lock);
}
av_frame_ref(s->gref, ref);
av_frame_ref(s->gmain, main);
s->frame_set = 1;
pthread_cond_signal(&s->cond);
pthread_mutex_unlock(&s->lock);
return main;
}
static av_cold int init(AVFilterContext *ctx)
{
LIBVMAFContext *s = ctx->priv;
s->gref = av_frame_alloc();
s->gmain = av_frame_alloc();
pthread_mutex_init(&s->lock, NULL);
pthread_cond_init (&s->cond, NULL);
s->dinput.process = do_vmaf;
return 0;
}
static int query_formats(AVFilterContext *ctx)
{
static const enum AVPixelFormat pix_fmts[] = {
AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P,
AV_PIX_FMT_YUV444P10LE, AV_PIX_FMT_YUV422P10LE, AV_PIX_FMT_YUV420P10LE,
AV_PIX_FMT_NONE
};
AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
if (!fmts_list)
return AVERROR(ENOMEM);
return ff_set_common_formats(ctx, fmts_list);
}
static int config_input_ref(AVFilterLink *inlink)
{
AVFilterContext *ctx = inlink->dst;
LIBVMAFContext *s = ctx->priv;
int th;
if (ctx->inputs[0]->w != ctx->inputs[1]->w ||
ctx->inputs[0]->h != ctx->inputs[1]->h) {
av_log(ctx, AV_LOG_ERROR, "Width and height of input videos must be same.\n");
return AVERROR(EINVAL);
}
if (ctx->inputs[0]->format != ctx->inputs[1]->format) {
av_log(ctx, AV_LOG_ERROR, "Inputs must be of same pixel format.\n");
return AVERROR(EINVAL);
}
s->desc = av_pix_fmt_desc_get(inlink->format);
s->width = ctx->inputs[0]->w;
s->height = ctx->inputs[0]->h;
th = pthread_create(&s->vmaf_thread, NULL, call_vmaf, (void *) s);
if (th) {
av_log(ctx, AV_LOG_ERROR, "Thread creation failed.\n");
return AVERROR(EINVAL);
}
return 0;
}
static int config_output(AVFilterLink *outlink)
{
AVFilterContext *ctx = outlink->src;
LIBVMAFContext *s = ctx->priv;
AVFilterLink *mainlink = ctx->inputs[0];
int ret;
outlink->w = mainlink->w;
outlink->h = mainlink->h;
outlink->time_base = mainlink->time_base;
outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio;
outlink->frame_rate = mainlink->frame_rate;
if ((ret = ff_dualinput_init(ctx, &s->dinput)) < 0)
return ret;
return 0;
}
static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref)
{
LIBVMAFContext *s = inlink->dst->priv;
return ff_dualinput_filter_frame(&s->dinput, inlink, inpicref);
}
static int request_frame(AVFilterLink *outlink)
{
LIBVMAFContext *s = outlink->src->priv;
return ff_dualinput_request_frame(&s->dinput, outlink);
}
static av_cold void uninit(AVFilterContext *ctx)
{
LIBVMAFContext *s = ctx->priv;
ff_dualinput_uninit(&s->dinput);
pthread_mutex_lock(&s->lock);
s->eof = 1;
pthread_cond_signal(&s->cond);
pthread_mutex_unlock(&s->lock);
pthread_join(s->vmaf_thread, NULL);
av_frame_free(&s->gref);
av_frame_free(&s->gmain);
pthread_mutex_destroy(&s->lock);
pthread_cond_destroy(&s->cond);
}
static const AVFilterPad libvmaf_inputs[] = {
{
.name = "main",
.type = AVMEDIA_TYPE_VIDEO,
.filter_frame = filter_frame,
},{
.name = "reference",
.type = AVMEDIA_TYPE_VIDEO,
.filter_frame = filter_frame,
.config_props = config_input_ref,
},
{ NULL }
};
static const AVFilterPad libvmaf_outputs[] = {
{
.name = "default",
.type = AVMEDIA_TYPE_VIDEO,
.config_props = config_output,
.request_frame = request_frame,
},
{ NULL }
};
AVFilter ff_vf_libvmaf = {
.name = "libvmaf",
.description = NULL_IF_CONFIG_SMALL("Calculate the VMAF between two video streams."),
.init = init,
.uninit = uninit,
.query_formats = query_formats,
.priv_size = sizeof(LIBVMAFContext),
.priv_class = &libvmaf_class,
.inputs = libvmaf_inputs,
.outputs = libvmaf_outputs,
};