2022-03-22 10:34:02 +02:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2021 Boris Baracaldo
|
|
|
|
* Copyright (c) 2022 Thilo Borgmann
|
|
|
|
*
|
|
|
|
* This file is part of FFmpeg.
|
|
|
|
*
|
2022-04-02 13:14:00 +02:00
|
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
2022-03-22 10:34:02 +02:00
|
|
|
*
|
|
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
2022-04-02 13:14:00 +02:00
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
2022-03-22 10:34:02 +02:00
|
|
|
*
|
2022-04-02 13:14:00 +02:00
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
2022-03-22 10:34:02 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @file
|
|
|
|
* Calculate Spatial Info (SI) and Temporal Info (TI) scores
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <math.h>
|
|
|
|
|
|
|
|
#include "libavutil/imgutils.h"
|
|
|
|
#include "libavutil/internal.h"
|
|
|
|
#include "libavutil/opt.h"
|
|
|
|
|
|
|
|
#include "avfilter.h"
|
|
|
|
#include "formats.h"
|
|
|
|
#include "internal.h"
|
|
|
|
#include "video.h"
|
|
|
|
|
|
|
|
static const int X_FILTER[9] = {
|
|
|
|
1, 0, -1,
|
|
|
|
2, 0, -2,
|
|
|
|
1, 0, -1
|
|
|
|
};
|
|
|
|
|
|
|
|
static const int Y_FILTER[9] = {
|
|
|
|
1, 2, 1,
|
|
|
|
0, 0, 0,
|
|
|
|
-1, -2, -1
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef struct SiTiContext {
|
|
|
|
const AVClass *class;
|
|
|
|
int pixel_depth;
|
|
|
|
int width, height;
|
|
|
|
uint64_t nb_frames;
|
|
|
|
uint8_t *prev_frame;
|
|
|
|
float max_si;
|
|
|
|
float max_ti;
|
|
|
|
float min_si;
|
|
|
|
float min_ti;
|
|
|
|
float sum_si;
|
|
|
|
float sum_ti;
|
|
|
|
float *gradient_matrix;
|
|
|
|
float *motion_matrix;
|
|
|
|
int full_range;
|
|
|
|
int print_summary;
|
|
|
|
} SiTiContext;
|
|
|
|
|
|
|
|
static const enum AVPixelFormat pix_fmts[] = {
|
|
|
|
AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
|
|
|
|
AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
|
|
|
|
AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10,
|
|
|
|
AV_PIX_FMT_NONE
|
|
|
|
};
|
|
|
|
|
|
|
|
static av_cold int init(AVFilterContext *ctx)
|
|
|
|
{
|
|
|
|
// User options but no input data
|
|
|
|
SiTiContext *s = ctx->priv;
|
|
|
|
s->max_si = 0;
|
|
|
|
s->max_ti = 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static av_cold void uninit(AVFilterContext *ctx)
|
|
|
|
{
|
|
|
|
SiTiContext *s = ctx->priv;
|
|
|
|
|
|
|
|
if (s->print_summary) {
|
|
|
|
float avg_si = s->sum_si / s->nb_frames;
|
|
|
|
float avg_ti = s->sum_ti / s->nb_frames;
|
|
|
|
av_log(ctx, AV_LOG_INFO,
|
|
|
|
"SITI Summary:\nTotal frames: %"PRId64"\n\n"
|
|
|
|
"Spatial Information:\nAverage: %f\nMax: %f\nMin: %f\n\n"
|
|
|
|
"Temporal Information:\nAverage: %f\nMax: %f\nMin: %f\n",
|
|
|
|
s->nb_frames, avg_si, s->max_si, s->min_si, avg_ti, s->max_ti, s->min_ti
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
av_freep(&s->prev_frame);
|
|
|
|
av_freep(&s->gradient_matrix);
|
|
|
|
av_freep(&s->motion_matrix);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int config_input(AVFilterLink *inlink)
|
|
|
|
{
|
|
|
|
// Video input data avilable
|
|
|
|
AVFilterContext *ctx = inlink->dst;
|
|
|
|
SiTiContext *s = ctx->priv;
|
|
|
|
int max_pixsteps[4];
|
|
|
|
size_t pixel_sz;
|
|
|
|
size_t data_sz;
|
|
|
|
size_t gradient_sz;
|
|
|
|
size_t motion_sz;
|
|
|
|
|
|
|
|
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
|
|
|
|
av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
|
|
|
|
|
|
|
|
// free previous buffers in case they are allocated already
|
|
|
|
av_freep(&s->prev_frame);
|
|
|
|
av_freep(&s->gradient_matrix);
|
|
|
|
av_freep(&s->motion_matrix);
|
|
|
|
|
|
|
|
s->pixel_depth = max_pixsteps[0];
|
|
|
|
s->width = inlink->w;
|
|
|
|
s->height = inlink->h;
|
|
|
|
pixel_sz = s->pixel_depth == 1 ? sizeof(uint8_t) : sizeof(uint16_t);
|
|
|
|
data_sz = s->width * pixel_sz * s->height;
|
|
|
|
|
|
|
|
s->prev_frame = av_malloc(data_sz);
|
|
|
|
|
|
|
|
gradient_sz = (s->width - 2) * sizeof(float) * (s->height - 2);
|
|
|
|
s->gradient_matrix = av_malloc(gradient_sz);
|
|
|
|
|
|
|
|
motion_sz = s->width * sizeof(float) * s->height;
|
|
|
|
s->motion_matrix = av_malloc(motion_sz);
|
|
|
|
|
|
|
|
if (!s->prev_frame || ! s->gradient_matrix || !s->motion_matrix) {
|
|
|
|
return AVERROR(ENOMEM);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Determine whether the video is in full or limited range. If not defined, assume limited.
|
|
|
|
static int is_full_range(AVFrame* frame)
|
|
|
|
{
|
|
|
|
// If color range not specified, fallback to pixel format
|
|
|
|
if (frame->color_range == AVCOL_RANGE_UNSPECIFIED || frame->color_range == AVCOL_RANGE_NB)
|
|
|
|
return frame->format == AV_PIX_FMT_YUVJ420P || frame->format == AV_PIX_FMT_YUVJ422P;
|
|
|
|
return frame->color_range == AVCOL_RANGE_JPEG;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check frame's color range and convert to full range if needed
|
|
|
|
static uint16_t convert_full_range(int factor, uint16_t y)
|
|
|
|
{
|
|
|
|
int shift;
|
|
|
|
int limit_upper;
|
|
|
|
int full_upper;
|
|
|
|
int limit_y;
|
|
|
|
|
|
|
|
// For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
|
|
|
|
shift = 16 * factor;
|
|
|
|
limit_upper = 235 * factor - shift;
|
|
|
|
full_upper = 256 * factor - 1;
|
|
|
|
limit_y = fminf(fmaxf(y - shift, 0), limit_upper);
|
|
|
|
return (full_upper * limit_y / limit_upper);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Applies sobel convolution
|
|
|
|
static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int linesize)
|
|
|
|
{
|
|
|
|
double x_conv_sum;
|
|
|
|
double y_conv_sum;
|
|
|
|
float gradient;
|
|
|
|
int ki;
|
|
|
|
int kj;
|
|
|
|
int index;
|
|
|
|
uint16_t data;
|
|
|
|
int filter_width = 3;
|
|
|
|
int filter_size = filter_width * filter_width;
|
|
|
|
int stride = linesize / s->pixel_depth;
|
|
|
|
// For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
|
|
|
|
int factor = s->pixel_depth == 1 ? 1 : 4;
|
|
|
|
|
|
|
|
// Dst matrix is smaller than src since we ignore edges that can't be convolved
|
|
|
|
#define CONVOLVE(bps) \
|
|
|
|
{ \
|
|
|
|
uint##bps##_t *vsrc = (uint##bps##_t*)src; \
|
|
|
|
for (int j = 1; j < s->height - 1; j++) { \
|
|
|
|
for (int i = 1; i < s->width - 1; i++) { \
|
|
|
|
x_conv_sum = 0.0; \
|
|
|
|
y_conv_sum = 0.0; \
|
|
|
|
for (int k = 0; k < filter_size; k++) { \
|
|
|
|
ki = k % filter_width - 1; \
|
|
|
|
kj = floor(k / filter_width) - 1; \
|
|
|
|
index = (j + kj) * stride + (i + ki); \
|
|
|
|
data = s->full_range ? vsrc[index] : convert_full_range(factor, vsrc[index]); \
|
|
|
|
x_conv_sum += data * X_FILTER[k]; \
|
|
|
|
y_conv_sum += data * Y_FILTER[k]; \
|
|
|
|
} \
|
|
|
|
gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum); \
|
|
|
|
dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient; \
|
|
|
|
} \
|
|
|
|
} \
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s->pixel_depth == 2) {
|
|
|
|
CONVOLVE(16);
|
|
|
|
} else {
|
|
|
|
CONVOLVE(8);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Calculate pixel difference between current and previous frame, and update previous
|
|
|
|
static void calculate_motion(SiTiContext *s, const uint8_t *curr,
|
|
|
|
float *motion_matrix, int linesize)
|
|
|
|
{
|
|
|
|
int stride = linesize / s->pixel_depth;
|
|
|
|
float motion;
|
|
|
|
int curr_index;
|
|
|
|
int prev_index;
|
|
|
|
uint16_t curr_data;
|
|
|
|
// For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
|
|
|
|
int factor = s->pixel_depth == 1 ? 1 : 4;
|
|
|
|
|
|
|
|
// Previous frame is already converted to full range
|
|
|
|
#define CALCULATE(bps) \
|
|
|
|
{ \
|
|
|
|
uint##bps##_t *vsrc = (uint##bps##_t*)curr; \
|
|
|
|
uint##bps##_t *vdst = (uint##bps##_t*)s->prev_frame; \
|
|
|
|
for (int j = 0; j < s->height; j++) { \
|
|
|
|
for (int i = 0; i < s->width; i++) { \
|
|
|
|
motion = 0; \
|
|
|
|
curr_index = j * stride + i; \
|
|
|
|
prev_index = j * s->width + i; \
|
|
|
|
curr_data = s->full_range ? vsrc[curr_index] : convert_full_range(factor, vsrc[curr_index]); \
|
|
|
|
if (s->nb_frames > 1) \
|
|
|
|
motion = curr_data - vdst[prev_index]; \
|
|
|
|
vdst[prev_index] = curr_data; \
|
|
|
|
motion_matrix[j * s->width + i] = motion; \
|
|
|
|
} \
|
|
|
|
} \
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s->pixel_depth == 2) {
|
|
|
|
CALCULATE(16);
|
|
|
|
} else {
|
|
|
|
CALCULATE(8);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static float std_deviation(float *img_metrics, int width, int height)
|
|
|
|
{
|
|
|
|
int size = height * width;
|
|
|
|
double mean = 0.0;
|
|
|
|
double sqr_diff = 0;
|
|
|
|
|
|
|
|
for (int j = 0; j < height; j++)
|
|
|
|
for (int i = 0; i < width; i++)
|
|
|
|
mean += img_metrics[j * width + i];
|
|
|
|
|
|
|
|
mean /= size;
|
|
|
|
|
|
|
|
for (int j = 0; j < height; j++) {
|
|
|
|
for (int i = 0; i < width; i++) {
|
|
|
|
float mean_diff = img_metrics[j * width + i] - mean;
|
|
|
|
sqr_diff += (mean_diff * mean_diff);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
sqr_diff = sqr_diff / size;
|
|
|
|
return sqrt(sqr_diff);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void set_meta(AVDictionary **metadata, const char *key, float d)
|
|
|
|
{
|
|
|
|
char value[128];
|
|
|
|
snprintf(value, sizeof(value), "%0.2f", d);
|
|
|
|
av_dict_set(metadata, key, value, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
|
|
|
|
{
|
|
|
|
AVFilterContext *ctx = inlink->dst;
|
|
|
|
SiTiContext *s = ctx->priv;
|
|
|
|
float si;
|
|
|
|
float ti;
|
|
|
|
|
|
|
|
s->full_range = is_full_range(frame);
|
|
|
|
s->nb_frames++;
|
|
|
|
|
|
|
|
// Calculate si and ti
|
|
|
|
convolve_sobel(s, frame->data[0], s->gradient_matrix, frame->linesize[0]);
|
|
|
|
calculate_motion(s, frame->data[0], s->motion_matrix, frame->linesize[0]);
|
|
|
|
si = std_deviation(s->gradient_matrix, s->width - 2, s->height - 2);
|
|
|
|
ti = std_deviation(s->motion_matrix, s->width, s->height);
|
|
|
|
|
|
|
|
// Calculate statistics
|
|
|
|
s->max_si = fmaxf(si, s->max_si);
|
|
|
|
s->max_ti = fmaxf(ti, s->max_ti);
|
|
|
|
s->sum_si += si;
|
|
|
|
s->sum_ti += ti;
|
|
|
|
s->min_si = s->nb_frames == 1 ? si : fminf(si, s->min_si);
|
|
|
|
s->min_ti = s->nb_frames == 1 ? ti : fminf(ti, s->min_ti);
|
|
|
|
|
|
|
|
// Set si ti information in frame metadata
|
|
|
|
set_meta(&frame->metadata, "lavfi.siti.si", si);
|
|
|
|
set_meta(&frame->metadata, "lavfi.siti.ti", ti);
|
|
|
|
|
|
|
|
return ff_filter_frame(inlink->dst->outputs[0], frame);
|
|
|
|
}
|
|
|
|
|
|
|
|
#define OFFSET(x) offsetof(SiTiContext, x)
|
|
|
|
#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
|
|
|
|
|
|
|
|
static const AVOption siti_options[] = {
|
|
|
|
{ "print_summary", "Print summary showing average values", OFFSET(print_summary), AV_OPT_TYPE_BOOL, { .i64=0 }, 0, 1, FLAGS },
|
|
|
|
{ NULL }
|
|
|
|
};
|
|
|
|
|
|
|
|
AVFILTER_DEFINE_CLASS(siti);
|
|
|
|
|
|
|
|
static const AVFilterPad avfilter_vf_siti_inputs[] = {
|
|
|
|
{
|
|
|
|
.name = "default",
|
|
|
|
.type = AVMEDIA_TYPE_VIDEO,
|
|
|
|
.config_props = config_input,
|
|
|
|
.filter_frame = filter_frame,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
|
|
|
static const AVFilterPad avfilter_vf_siti_outputs[] = {
|
|
|
|
{
|
|
|
|
.name = "default",
|
|
|
|
.type = AVMEDIA_TYPE_VIDEO
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
2022-04-01 20:54:54 +02:00
|
|
|
const AVFilter ff_vf_siti = {
|
2022-03-22 10:34:02 +02:00
|
|
|
.name = "siti",
|
|
|
|
.description = NULL_IF_CONFIG_SMALL("Calculate spatial information (SI) and temporal information (TI)."),
|
|
|
|
.priv_size = sizeof(SiTiContext),
|
|
|
|
.priv_class = &siti_class,
|
|
|
|
.init = init,
|
|
|
|
.uninit = uninit,
|
|
|
|
.flags = AVFILTER_FLAG_METADATA_ONLY,
|
|
|
|
FILTER_PIXFMTS_ARRAY(pix_fmts),
|
|
|
|
FILTER_INPUTS(avfilter_vf_siti_inputs),
|
|
|
|
FILTER_OUTPUTS(avfilter_vf_siti_outputs),
|
|
|
|
};
|