avfilter/vf_idet: add a repeated field detection

This can be useful for determining telecine. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
2025-11-06 08:29:25 +02:00 · 2014-11-02 04:49:34 -08:00
parent 07c3a4f693
commit fdf22f973d
4 changed files with 67 additions and 5 deletions
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -5574,8 +5574,9 @@ value.

 Detect video interlacing type.

-This filter tries to detect if the input is interlaced or progressive,
-top or bottom field first.
+This filter tries to detect if the input frames as interlaced, progressive,
+top or bottom field first. It will also try and detect fields that are
+repeated between adjacent frames (a sign of telecine).

 Single frame detection considers only immediately adjacent frames when classifying each frame.
 Multiple frame detection incorporates the classification history of previous frames.
@@ -5616,6 +5617,18 @@ Cumulative number of frames that could not be classified using single-frame dete

@item multiple.undetermined
 Cumulative number of frames that could not be classified using multiple-frame detection.
+
+@item repeated.current_frame
+Which field in the current frame is repeated from the last. One of ``neither'', ``top'', or ``bottom''.
+
+@item repeated.neither
+Cumulative number of frames with no repeated field.
+
+@item repeated.top
+Cumulative number of frames with the top field repeated from the previous frame's top field.
+
+@item repeated.bottom
+Cumulative number of frames with the bottom field repeated from the previous frame's bottom field.
@end table

 The filter accepts the following options:
@@ -5625,6 +5638,8 @@ The filter accepts the following options:
 Set interlacing threshold.
@item prog_thres
 Set progressive threshold.
+@item repeat_thres
+Threshold for repeated field detection.
@item half_life
 Number of frames after which a given frame's contribution to the
 statistics is halved (i.e., it contributes only 0.5 to it's
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -31,7 +31,7 @@

 #define LIBAVFILTER_VERSION_MAJOR  5
 #define LIBAVFILTER_VERSION_MINOR  2
-#define LIBAVFILTER_VERSION_MICRO 102
+#define LIBAVFILTER_VERSION_MICRO 103

 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
                                               LIBAVFILTER_VERSION_MINOR, \
--- a/libavfilter/vf_idet.c
+++ b/libavfilter/vf_idet.c
@@ -32,6 +32,7 @@
 static const AVOption idet_options[] = {
    { "intl_thres", "set interlacing threshold", OFFSET(interlace_threshold),   AV_OPT_TYPE_FLOAT, {.dbl = 1.04}, -1, FLT_MAX, FLAGS },
    { "prog_thres", "set progressive threshold", OFFSET(progressive_threshold), AV_OPT_TYPE_FLOAT, {.dbl = 1.5},  -1, FLT_MAX, FLAGS },
+    { "rep_thres",  "set repeat threshold",      OFFSET(repeat_threshold),      AV_OPT_TYPE_FLOAT, {.dbl = 3.0},  -1, FLT_MAX, FLAGS },
    { "half_life", "half life of cumulative statistics", OFFSET(half_life),     AV_OPT_TYPE_FLOAT, {.dbl = 0.0},  -1, INT_MAX, FLAGS },
    { NULL }
 };
@@ -72,6 +73,16 @@ static int av_dict_set_fxp(AVDictionary **pm, const char *key, uint64_t value, u
    return av_dict_set(pm, key, valuestr, flags);
 }

+static const char *rep2str(RepeatedField repeated_field)
+{
+    switch(repeated_field) {
+        case REPEAT_NONE    : return "neither";
+        case REPEAT_TOP     : return "top";
+        case REPEAT_BOTTOM  : return "bottom";
+    }
+    return NULL;
+}
+
 int ff_idet_filter_line_c(const uint8_t *a, const uint8_t *b, const uint8_t *c, int w)
 {
    int x;
@@ -104,7 +115,9 @@ static void filter(AVFilterContext *ctx)
    int y, i;
    int64_t alpha[2]={0};
    int64_t delta=0;
+    int64_t gamma[2]={0};
    Type type, best_type;
+    RepeatedField repeat;
    int match = 0;
    AVDictionary **metadata = avpriv_frame_get_metadatap(idet->cur);

@@ -125,6 +138,7 @@ static void filter(AVFilterContext *ctx)
            alpha[ y   &1] += idet->filter_line(cur-refs, prev, cur+refs, w);
            alpha[(y^1)&1] += idet->filter_line(cur-refs, next, cur+refs, w);
            delta          += idet->filter_line(cur-refs,  cur, cur+refs, w);
+            gamma[(y^1)&1] += idet->filter_line(cur     , prev, cur     , w);
        }
    }

@@ -138,6 +152,14 @@ static void filter(AVFilterContext *ctx)
        type = UNDETERMINED;
    }

+    if ( gamma[0] > idet->repeat_threshold * gamma[1] ){
+        repeat = REPEAT_TOP;
+    } else if ( gamma[1] > idet->repeat_threshold * gamma[0] ){
+        repeat = REPEAT_BOTTOM;
+    } else {
+        repeat = REPEAT_NONE;
+    }
+
    memmove(idet->history+1, idet->history, HIST_SIZE-1);
    idet->history[0] = type;
    best_type = UNDETERMINED;
@@ -170,20 +192,30 @@ static void filter(AVFilterContext *ctx)
        idet->cur->interlaced_frame = 0;
    }

-
+    for(i=0; i<3; i++)
+        idet->repeats[i]  = av_rescale(idet->repeats [i], idet->decay_coefficient, PRECISION);

    for(i=0; i<4; i++){
        idet->prestat [i] = av_rescale(idet->prestat [i], idet->decay_coefficient, PRECISION);
        idet->poststat[i] = av_rescale(idet->poststat[i], idet->decay_coefficient, PRECISION);
    }

+    idet->total_repeats [         repeat] ++;
+    idet->repeats       [         repeat] += PRECISION;
+
    idet->total_prestat [           type] ++;
    idet->prestat       [           type] += PRECISION;

    idet->total_poststat[idet->last_type] ++;
    idet->poststat      [idet->last_type] += PRECISION;

-    av_log(ctx, AV_LOG_DEBUG, "Single frame:%12s, Multi frame:%12s\n", type2str(type), type2str(idet->last_type));
+    av_log(ctx, AV_LOG_DEBUG, "Repeated Field:%12s, Single frame:%12s, Multi frame:%12s\n",
+           rep2str(repeat), type2str(type), type2str(idet->last_type));
+
+    av_dict_set    (metadata, "lavfi.idet.repeated.current_frame", rep2str(repeat), 0);
+    av_dict_set_fxp(metadata, "lavfi.idet.repeated.neither",       idet->repeats[REPEAT_NONE], 2, 0);
+    av_dict_set_fxp(metadata, "lavfi.idet.repeated.top",           idet->repeats[REPEAT_TOP], 2, 0);
+    av_dict_set_fxp(metadata, "lavfi.idet.repeated.bottom",        idet->repeats[REPEAT_BOTTOM], 2, 0);

    av_dict_set    (metadata, "lavfi.idet.single.current_frame",   type2str(type), 0);
    av_dict_set_fxp(metadata, "lavfi.idet.single.tff",             idet->prestat[TFF], 2 , 0);
@@ -261,6 +293,11 @@ static av_cold void uninit(AVFilterContext *ctx)
 {
    IDETContext *idet = ctx->priv;

+    av_log(ctx, AV_LOG_INFO, "Repeated Fields: Neither:%6"PRId64" Top:%6"PRId64" Bottom:%6"PRId64"\n",
+           idet->total_repeats[REPEAT_NONE],
+           idet->total_repeats[REPEAT_TOP],
+           idet->total_repeats[REPEAT_BOTTOM]
+        );
    av_log(ctx, AV_LOG_INFO, "Single frame detection: TFF:%6"PRId64" BFF:%6"PRId64" Progressive:%6"PRId64" Undetermined:%6"PRId64"\n",
           idet->total_prestat[TFF],
           idet->total_prestat[BFF],
--- a/libavfilter/vf_idet.h
+++ b/libavfilter/vf_idet.h
@@ -33,16 +33,26 @@ typedef enum {
    UNDETERMINED,
 } Type;

+typedef enum {
+    REPEAT_NONE,
+    REPEAT_TOP,
+    REPEAT_BOTTOM,
+} RepeatedField;
+
 typedef struct {
    const AVClass *class;
    float interlace_threshold;
    float progressive_threshold;
+    float repeat_threshold;
    float half_life;
    uint64_t decay_coefficient;

    Type last_type;
+
+    uint64_t repeats[3];
    uint64_t prestat[4];
    uint64_t poststat[4];
+    uint64_t total_repeats[3];
    uint64_t total_prestat[4];
    uint64_t total_poststat[4];