You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	lavfilter: Add SITI filter
Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined in ITU-T P.910: Subjective video quality assessment methods for multimedia applications.
This commit is contained in:
		
				
					committed by
					
						 Thilo Borgmann
						Thilo Borgmann
					
				
			
			
				
	
			
			
			
						parent
						
							451300d0e8
						
					
				
				
					commit
					80831e742b
				
			| @@ -6,6 +6,7 @@ version 5.1: | ||||
| - dropped obsolete XvMC hwaccel | ||||
| - pcm-bluray encoder | ||||
| - DFPWM audio encoder/decoder and raw muxer/demuxer | ||||
| - SITI filter | ||||
|  | ||||
|  | ||||
| version 5.0: | ||||
|   | ||||
| @@ -20512,6 +20512,29 @@ ffmpeg -i input1.mkv -i input2.mkv -filter_complex "[0:v][1:v] signature=nb_inpu | ||||
|  | ||||
| @end itemize | ||||
|  | ||||
| @anchor{siti} | ||||
| @section siti | ||||
|  | ||||
| Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined | ||||
| in ITU-T P.910: Subjective video quality assessment methods for multimedia | ||||
| applications. Available PDF at @url{https://www.itu.int/rec/T-REC-P.910-199909-S/en }. | ||||
|  | ||||
| It accepts the following option: | ||||
|  | ||||
| @table @option | ||||
| @item print_summary | ||||
| If set to 1, Summary statistics will be printed to the console. Default 0. | ||||
| @end table | ||||
|  | ||||
| @subsection Examples | ||||
| @itemize | ||||
| @item | ||||
| To calculate SI/TI metrics and print summary: | ||||
| @example | ||||
| ffmpeg -i input.mp4 -vf siti=print_summary=1 -f null - | ||||
| @end example | ||||
| @end itemize | ||||
|  | ||||
| @anchor{smartblur} | ||||
| @section smartblur | ||||
|  | ||||
|   | ||||
| @@ -457,6 +457,7 @@ OBJS-$(CONFIG_SMARTBLUR_FILTER)              += vf_smartblur.o | ||||
| OBJS-$(CONFIG_SOBEL_FILTER)                  += vf_convolution.o | ||||
| OBJS-$(CONFIG_SOBEL_OPENCL_FILTER)           += vf_convolution_opencl.o opencl.o \ | ||||
|                                                 opencl/convolution.o | ||||
| OBJS-$(CONFIG_SITI_FILTER)                   += vf_siti.o | ||||
| OBJS-$(CONFIG_SPLIT_FILTER)                  += split.o | ||||
| OBJS-$(CONFIG_SPP_FILTER)                    += vf_spp.o qp_table.o | ||||
| OBJS-$(CONFIG_SR_FILTER)                     += vf_sr.o | ||||
|   | ||||
| @@ -432,6 +432,7 @@ extern const AVFilter ff_vf_shuffleplanes; | ||||
| extern const AVFilter ff_vf_sidedata; | ||||
| extern const AVFilter ff_vf_signalstats; | ||||
| extern const AVFilter ff_vf_signature; | ||||
| extern const AVFilter ff_vf_siti; | ||||
| extern const AVFilter ff_vf_smartblur; | ||||
| extern const AVFilter ff_vf_sobel; | ||||
| extern const AVFilter ff_vf_sobel_opencl; | ||||
|   | ||||
| @@ -31,7 +31,7 @@ | ||||
|  | ||||
| #include "version_major.h" | ||||
|  | ||||
| #define LIBAVFILTER_VERSION_MINOR  29 | ||||
| #define LIBAVFILTER_VERSION_MINOR  30 | ||||
| #define LIBAVFILTER_VERSION_MICRO 100 | ||||
|  | ||||
|  | ||||
|   | ||||
							
								
								
									
										349
									
								
								libavfilter/vf_siti.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										349
									
								
								libavfilter/vf_siti.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,349 @@ | ||||
| /* | ||||
|  * Copyright (c) 2021 Boris Baracaldo | ||||
|  * Copyright (c) 2022 Thilo Borgmann | ||||
|  * | ||||
|  * This file is part of FFmpeg. | ||||
|  * | ||||
|  * FFmpeg is free software; you can redistribute it and/or modify | ||||
|  * it under the terms of the GNU General Public License as published by | ||||
|  * the Free Software Foundation; either version 2 of the License, or | ||||
|  * (at your option) any later version. | ||||
|  * | ||||
|  * FFmpeg is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|  * GNU General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU General Public License along | ||||
|  * with FFmpeg; if not, write to the Free Software Foundation, Inc., | ||||
|  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  */ | ||||
|  | ||||
| /** | ||||
|  * @file | ||||
|  * Calculate Spatial Info (SI) and Temporal Info (TI) scores | ||||
|  */ | ||||
|  | ||||
| #include <math.h> | ||||
|  | ||||
| #include "libavutil/imgutils.h" | ||||
| #include "libavutil/internal.h" | ||||
| #include "libavutil/opt.h" | ||||
|  | ||||
| #include "avfilter.h" | ||||
| #include "formats.h" | ||||
| #include "internal.h" | ||||
| #include "video.h" | ||||
|  | ||||
| static const int X_FILTER[9] = { | ||||
|     1, 0, -1, | ||||
|     2, 0, -2, | ||||
|     1, 0, -1 | ||||
| }; | ||||
|  | ||||
| static const int Y_FILTER[9] = { | ||||
|     1, 2, 1, | ||||
|     0, 0, 0, | ||||
|     -1, -2, -1 | ||||
| }; | ||||
|  | ||||
| typedef struct SiTiContext { | ||||
|     const AVClass *class; | ||||
|     int pixel_depth; | ||||
|     int width, height; | ||||
|     uint64_t nb_frames; | ||||
|     uint8_t *prev_frame; | ||||
|     float max_si; | ||||
|     float max_ti; | ||||
|     float min_si; | ||||
|     float min_ti; | ||||
|     float sum_si; | ||||
|     float sum_ti; | ||||
|     float *gradient_matrix; | ||||
|     float *motion_matrix; | ||||
|     int full_range; | ||||
|     int print_summary; | ||||
| } SiTiContext; | ||||
|  | ||||
| static const enum AVPixelFormat pix_fmts[] = { | ||||
|     AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, | ||||
|     AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, | ||||
|     AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, | ||||
|     AV_PIX_FMT_NONE | ||||
| }; | ||||
|  | ||||
| static av_cold int init(AVFilterContext *ctx) | ||||
| { | ||||
|     // User options but no input data | ||||
|     SiTiContext *s = ctx->priv; | ||||
|     s->max_si = 0; | ||||
|     s->max_ti = 0; | ||||
|     return 0; | ||||
| } | ||||
|  | ||||
| static av_cold void uninit(AVFilterContext *ctx) | ||||
| { | ||||
|     SiTiContext *s = ctx->priv; | ||||
|  | ||||
|     if (s->print_summary) { | ||||
|         float avg_si = s->sum_si / s->nb_frames; | ||||
|         float avg_ti = s->sum_ti / s->nb_frames; | ||||
|         av_log(ctx, AV_LOG_INFO, | ||||
|                "SITI Summary:\nTotal frames: %"PRId64"\n\n" | ||||
|                "Spatial Information:\nAverage: %f\nMax: %f\nMin: %f\n\n" | ||||
|                "Temporal Information:\nAverage: %f\nMax: %f\nMin: %f\n", | ||||
|                s->nb_frames, avg_si, s->max_si, s->min_si, avg_ti, s->max_ti, s->min_ti | ||||
|         ); | ||||
|     } | ||||
|  | ||||
|     av_freep(&s->prev_frame); | ||||
|     av_freep(&s->gradient_matrix); | ||||
|     av_freep(&s->motion_matrix); | ||||
| } | ||||
|  | ||||
| static int config_input(AVFilterLink *inlink) | ||||
| { | ||||
|     // Video input data avilable | ||||
|     AVFilterContext *ctx = inlink->dst; | ||||
|     SiTiContext *s = ctx->priv; | ||||
|     int max_pixsteps[4]; | ||||
|     size_t pixel_sz; | ||||
|     size_t data_sz; | ||||
|     size_t gradient_sz; | ||||
|     size_t motion_sz; | ||||
|  | ||||
|     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); | ||||
|     av_image_fill_max_pixsteps(max_pixsteps, NULL, desc); | ||||
|  | ||||
|     // free previous buffers in case they are allocated already | ||||
|     av_freep(&s->prev_frame); | ||||
|     av_freep(&s->gradient_matrix); | ||||
|     av_freep(&s->motion_matrix); | ||||
|  | ||||
|     s->pixel_depth = max_pixsteps[0]; | ||||
|     s->width = inlink->w; | ||||
|     s->height = inlink->h; | ||||
|     pixel_sz = s->pixel_depth == 1 ? sizeof(uint8_t) : sizeof(uint16_t); | ||||
|     data_sz = s->width * pixel_sz * s->height; | ||||
|  | ||||
|     s->prev_frame = av_malloc(data_sz); | ||||
|  | ||||
|     gradient_sz = (s->width - 2) * sizeof(float) * (s->height - 2); | ||||
|     s->gradient_matrix = av_malloc(gradient_sz); | ||||
|  | ||||
|     motion_sz = s->width * sizeof(float) * s->height; | ||||
|     s->motion_matrix = av_malloc(motion_sz); | ||||
|  | ||||
|     if (!s->prev_frame || ! s->gradient_matrix || !s->motion_matrix) { | ||||
|         return AVERROR(ENOMEM); | ||||
|     } | ||||
|  | ||||
|     return 0; | ||||
| } | ||||
|  | ||||
| // Determine whether the video is in full or limited range. If not defined, assume limited. | ||||
| static int is_full_range(AVFrame* frame) | ||||
| { | ||||
|     // If color range not specified, fallback to pixel format | ||||
|     if (frame->color_range == AVCOL_RANGE_UNSPECIFIED || frame->color_range == AVCOL_RANGE_NB) | ||||
|         return frame->format == AV_PIX_FMT_YUVJ420P || frame->format == AV_PIX_FMT_YUVJ422P; | ||||
|     return frame->color_range == AVCOL_RANGE_JPEG; | ||||
| } | ||||
|  | ||||
| // Check frame's color range and convert to full range if needed | ||||
| static uint16_t convert_full_range(int factor, uint16_t y) | ||||
| { | ||||
|     int shift; | ||||
|     int limit_upper; | ||||
|     int full_upper; | ||||
|     int limit_y; | ||||
|  | ||||
|     // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4 | ||||
|     shift = 16 * factor; | ||||
|     limit_upper = 235 * factor - shift; | ||||
|     full_upper = 256 * factor - 1; | ||||
|     limit_y = fminf(fmaxf(y - shift, 0), limit_upper); | ||||
|     return (full_upper * limit_y / limit_upper); | ||||
| } | ||||
|  | ||||
| // Applies sobel convolution | ||||
| static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int linesize) | ||||
| { | ||||
|     double x_conv_sum; | ||||
|     double y_conv_sum; | ||||
|     float gradient; | ||||
|     int ki; | ||||
|     int kj; | ||||
|     int index; | ||||
|     uint16_t data; | ||||
|     int filter_width = 3; | ||||
|     int filter_size = filter_width * filter_width; | ||||
|     int stride = linesize / s->pixel_depth; | ||||
|     // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4 | ||||
|     int factor = s->pixel_depth == 1 ? 1 : 4; | ||||
|  | ||||
|     // Dst matrix is smaller than src since we ignore edges that can't be convolved | ||||
|     #define CONVOLVE(bps)                                           \ | ||||
|     {                                                               \ | ||||
|         uint##bps##_t *vsrc = (uint##bps##_t*)src;                  \ | ||||
|         for (int j = 1; j < s->height - 1; j++) {                   \ | ||||
|             for (int i = 1; i < s->width - 1; i++) {                \ | ||||
|                 x_conv_sum = 0.0;                                   \ | ||||
|                 y_conv_sum = 0.0;                                   \ | ||||
|                 for (int k = 0; k < filter_size; k++) {             \ | ||||
|                     ki = k % filter_width - 1;                      \ | ||||
|                     kj = floor(k / filter_width) - 1;               \ | ||||
|                     index = (j + kj) * stride + (i + ki);           \ | ||||
|                     data = s->full_range ? vsrc[index] : convert_full_range(factor, vsrc[index]); \ | ||||
|                     x_conv_sum += data * X_FILTER[k];               \ | ||||
|                     y_conv_sum += data * Y_FILTER[k];               \ | ||||
|                 }                                                   \ | ||||
|                 gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum); \ | ||||
|                 dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient; \ | ||||
|             }                                                       \ | ||||
|         }                                                           \ | ||||
|     } | ||||
|  | ||||
|     if (s->pixel_depth == 2) { | ||||
|         CONVOLVE(16); | ||||
|     } else { | ||||
|         CONVOLVE(8); | ||||
|     } | ||||
| } | ||||
|  | ||||
| // Calculate pixel difference between current and previous frame, and update previous | ||||
| static void calculate_motion(SiTiContext *s, const uint8_t *curr, | ||||
|                              float *motion_matrix, int linesize) | ||||
| { | ||||
|     int stride = linesize / s->pixel_depth; | ||||
|     float motion; | ||||
|     int curr_index; | ||||
|     int prev_index; | ||||
|     uint16_t curr_data; | ||||
|     // For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4 | ||||
|     int factor = s->pixel_depth == 1 ? 1 : 4; | ||||
|  | ||||
|     // Previous frame is already converted to full range | ||||
|     #define CALCULATE(bps)                                           \ | ||||
|     {                                                                \ | ||||
|         uint##bps##_t *vsrc = (uint##bps##_t*)curr;                  \ | ||||
|         uint##bps##_t *vdst = (uint##bps##_t*)s->prev_frame;         \ | ||||
|         for (int j = 0; j < s->height; j++) {                        \ | ||||
|             for (int i = 0; i < s->width; i++) {                     \ | ||||
|                 motion = 0;                                          \ | ||||
|                 curr_index = j * stride + i;                         \ | ||||
|                 prev_index = j * s->width + i;                       \ | ||||
|                 curr_data = s->full_range ? vsrc[curr_index] : convert_full_range(factor, vsrc[curr_index]); \ | ||||
|                 if (s->nb_frames > 1)                                \ | ||||
|                     motion = curr_data - vdst[prev_index];           \ | ||||
|                 vdst[prev_index] = curr_data;                        \ | ||||
|                 motion_matrix[j * s->width + i] = motion;            \ | ||||
|             }                                                        \ | ||||
|         }                                                            \ | ||||
|     } | ||||
|  | ||||
|     if (s->pixel_depth == 2) { | ||||
|         CALCULATE(16); | ||||
|     } else { | ||||
|         CALCULATE(8); | ||||
|     } | ||||
| } | ||||
|  | ||||
| static float std_deviation(float *img_metrics, int width, int height) | ||||
| { | ||||
|     int size = height * width; | ||||
|     double mean = 0.0; | ||||
|     double sqr_diff = 0; | ||||
|  | ||||
|     for (int j = 0; j < height; j++) | ||||
|         for (int i = 0; i < width; i++) | ||||
|             mean += img_metrics[j * width + i]; | ||||
|  | ||||
|     mean /= size; | ||||
|  | ||||
|     for (int j = 0; j < height; j++) { | ||||
|         for (int i = 0; i < width; i++) { | ||||
|             float mean_diff = img_metrics[j * width + i] - mean; | ||||
|             sqr_diff += (mean_diff * mean_diff); | ||||
|         } | ||||
|     } | ||||
|     sqr_diff = sqr_diff / size; | ||||
|     return sqrt(sqr_diff); | ||||
| } | ||||
|  | ||||
| static void set_meta(AVDictionary **metadata, const char *key, float d) | ||||
| { | ||||
|     char value[128]; | ||||
|     snprintf(value, sizeof(value), "%0.2f", d); | ||||
|     av_dict_set(metadata, key, value, 0); | ||||
| } | ||||
|  | ||||
| static int filter_frame(AVFilterLink *inlink, AVFrame *frame) | ||||
| { | ||||
|     AVFilterContext *ctx = inlink->dst; | ||||
|     SiTiContext *s = ctx->priv; | ||||
|     float si; | ||||
|     float ti; | ||||
|  | ||||
|     s->full_range = is_full_range(frame); | ||||
|     s->nb_frames++; | ||||
|  | ||||
|     // Calculate si and ti | ||||
|     convolve_sobel(s, frame->data[0], s->gradient_matrix, frame->linesize[0]); | ||||
|     calculate_motion(s, frame->data[0], s->motion_matrix, frame->linesize[0]); | ||||
|     si = std_deviation(s->gradient_matrix, s->width - 2, s->height - 2); | ||||
|     ti = std_deviation(s->motion_matrix, s->width, s->height); | ||||
|  | ||||
|     // Calculate statistics | ||||
|     s->max_si  = fmaxf(si, s->max_si); | ||||
|     s->max_ti  = fmaxf(ti, s->max_ti); | ||||
|     s->sum_si += si; | ||||
|     s->sum_ti += ti; | ||||
|     s->min_si  = s->nb_frames == 1 ? si : fminf(si, s->min_si); | ||||
|     s->min_ti  = s->nb_frames == 1 ? ti : fminf(ti, s->min_ti); | ||||
|  | ||||
|     // Set si ti information in frame metadata | ||||
|     set_meta(&frame->metadata, "lavfi.siti.si", si); | ||||
|     set_meta(&frame->metadata, "lavfi.siti.ti", ti); | ||||
|  | ||||
|     return ff_filter_frame(inlink->dst->outputs[0], frame); | ||||
| } | ||||
|  | ||||
| #define OFFSET(x) offsetof(SiTiContext, x) | ||||
| #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM | ||||
|  | ||||
| static const AVOption siti_options[] = { | ||||
|     { "print_summary", "Print summary showing average values", OFFSET(print_summary), AV_OPT_TYPE_BOOL, { .i64=0 }, 0, 1, FLAGS }, | ||||
|     { NULL } | ||||
| }; | ||||
|  | ||||
| AVFILTER_DEFINE_CLASS(siti); | ||||
|  | ||||
| static const AVFilterPad avfilter_vf_siti_inputs[] = { | ||||
|     { | ||||
|         .name         = "default", | ||||
|         .type         = AVMEDIA_TYPE_VIDEO, | ||||
|         .config_props = config_input, | ||||
|         .filter_frame = filter_frame, | ||||
|     }, | ||||
| }; | ||||
|  | ||||
| static const AVFilterPad avfilter_vf_siti_outputs[] = { | ||||
|     { | ||||
|         .name = "default", | ||||
|         .type = AVMEDIA_TYPE_VIDEO | ||||
|     }, | ||||
| }; | ||||
|  | ||||
| AVFilter ff_vf_siti = { | ||||
|     .name          = "siti", | ||||
|     .description   = NULL_IF_CONFIG_SMALL("Calculate spatial information (SI) and temporal information (TI)."), | ||||
|     .priv_size     = sizeof(SiTiContext), | ||||
|     .priv_class    = &siti_class, | ||||
|     .init          = init, | ||||
|     .uninit        = uninit, | ||||
|     .flags         = AVFILTER_FLAG_METADATA_ONLY, | ||||
|     FILTER_PIXFMTS_ARRAY(pix_fmts), | ||||
|     FILTER_INPUTS(avfilter_vf_siti_inputs), | ||||
|     FILTER_OUTPUTS(avfilter_vf_siti_outputs), | ||||
| }; | ||||
| @@ -382,6 +382,15 @@ refcmp_metadata(){ | ||||
|         -f null /dev/null | awk -v ref=${ref} -v fuzz=${fuzz} -f ${base}/refcmp-metadata.awk - | ||||
| } | ||||
|  | ||||
| cmp_metadata(){ | ||||
|     refcmp=$1 | ||||
|     pixfmt=$2 | ||||
|     fuzz=${3:-0.001} | ||||
|     ffmpeg $FLAGS $ENC_OPTS \ | ||||
|         -lavfi "testsrc2=size=300x200:rate=1:duration=5,format=${pixfmt},${refcmp},metadata=print:file=-" \ | ||||
|         -f null /dev/null | awk -v ref=${ref} -v fuzz=${fuzz} -f ${base}/refcmp-metadata.awk - | ||||
| } | ||||
|  | ||||
| pixfmt_conversion(){ | ||||
|     conversion="${test#pixfmt-}" | ||||
|     outdir="tests/data/pixfmt" | ||||
|   | ||||
| @@ -879,6 +879,9 @@ fate-filter-refcmp-ssim-rgb: CMD = refcmp_metadata ssim rgb24 0.015 | ||||
| FATE_FILTER-$(call ALLYES, $(REFCMP_DEPS) SSIM_FILTER) += fate-filter-refcmp-ssim-yuv | ||||
| fate-filter-refcmp-ssim-yuv: CMD = refcmp_metadata ssim yuv422p 0.015 | ||||
|  | ||||
| FATE_FILTER-$(call ALLYES, $(REFCMP_DEPS) SITI_FILTER) += fate-filter-refcmp-siti-yuv | ||||
| fate-filter-refcmp-siti-yuv: CMD = cmp_metadata siti yuv420p 0.015 | ||||
|  | ||||
| FATE_SAMPLES_FFPROBE += $(FATE_METADATA_FILTER-yes) | ||||
| FATE_SAMPLES_FFMPEG += $(FATE_FILTER_SAMPLES-yes) | ||||
| FATE_FFMPEG += $(FATE_FILTER-yes) | ||||
|   | ||||
							
								
								
									
										15
									
								
								tests/ref/fate/filter-refcmp-siti-yuv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								tests/ref/fate/filter-refcmp-siti-yuv
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,15 @@ | ||||
| frame:0    pts:0       pts_time:0 | ||||
| lavfi.siti.si=106.72 | ||||
| lavfi.siti.ti=0.00 | ||||
| frame:1    pts:1       pts_time:1 | ||||
| lavfi.siti.si=109.40 | ||||
| lavfi.siti.ti=28.00 | ||||
| frame:2    pts:2       pts_time:2 | ||||
| lavfi.siti.si=109.29 | ||||
| lavfi.siti.ti=28.38 | ||||
| frame:3    pts:3       pts_time:3 | ||||
| lavfi.siti.si=113.27 | ||||
| lavfi.siti.ti=33.42 | ||||
| frame:4    pts:4       pts_time:4 | ||||
| lavfi.siti.si=110.87 | ||||
| lavfi.siti.ti=30.53 | ||||
		Reference in New Issue
	
	Block a user