From 140a0485d3775330c42f431e4ff49737d747799c Mon Sep 17 00:00:00 2001
From: Paul B Mahol <onemda@gmail.com>
Date: Sat, 10 Sep 2016 12:17:08 +0200
Subject: [PATCH] avfilter/vf_overlay: split blend_image into functions for
 each overlay format

Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 libavfilter/vf_overlay.c | 370 +++++++++++++++++++++------------------
 1 file changed, 195 insertions(+), 175 deletions(-)

diff --git a/libavfilter/vf_overlay.c b/libavfilter/vf_overlay.c
index c33b35d769..177544e8f0 100644
--- a/libavfilter/vf_overlay.c
+++ b/libavfilter/vf_overlay.c
@@ -132,6 +132,8 @@ typedef struct OverlayContext {
     int eof_action;             ///< action to take on EOF from source
 
     AVExpr *x_pexpr, *y_pexpr;
+
+    void (*blend_image)(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y);
 } OverlayContext;
 
 static av_cold void uninit(AVFilterContext *ctx)
@@ -304,22 +306,6 @@ static const enum AVPixelFormat alpha_pix_fmts[] = {
     AV_PIX_FMT_BGRA, AV_PIX_FMT_NONE
 };
 
-static int config_input_main(AVFilterLink *inlink)
-{
-    OverlayContext *s = inlink->dst->priv;
-    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
-
-    av_image_fill_max_pixsteps(s->main_pix_step,    NULL, pix_desc);
-
-    s->hsub = pix_desc->log2_chroma_w;
-    s->vsub = pix_desc->log2_chroma_h;
-
-    s->main_is_packed_rgb =
-        ff_fill_rgba_map(s->main_rgba_map, inlink->format) >= 0;
-    s->main_has_alpha = ff_fmt_is_in(inlink->format, alpha_pix_fmts);
-    return 0;
-}
-
 static int config_input_overlay(AVFilterLink *inlink)
 {
     AVFilterContext *ctx  = inlink->dst;
@@ -397,9 +383,88 @@ static int config_output(AVFilterLink *outlink)
 /**
  * Blend image in src to destination buffer dst at position (x, y).
  */
-static void blend_image(AVFilterContext *ctx,
-                        AVFrame *dst, const AVFrame *src,
-                        int x, int y)
+
+static void blend_image_packed_rgb(AVFilterContext *ctx,
+                                   AVFrame *dst, const AVFrame *src,
+                                   int x, int y)
+{
+    OverlayContext *s = ctx->priv;
+    int i, imax, j, jmax;
+    const int src_w = src->width;
+    const int src_h = src->height;
+    const int dst_w = dst->width;
+    const int dst_h = dst->height;
+    uint8_t alpha;          ///< the amount of overlay to blend on to main
+    const int dr = s->main_rgba_map[R];
+    const int dg = s->main_rgba_map[G];
+    const int db = s->main_rgba_map[B];
+    const int da = s->main_rgba_map[A];
+    const int dstep = s->main_pix_step[0];
+    const int sr = s->overlay_rgba_map[R];
+    const int sg = s->overlay_rgba_map[G];
+    const int sb = s->overlay_rgba_map[B];
+    const int sa = s->overlay_rgba_map[A];
+    const int sstep = s->overlay_pix_step[0];
+    const int main_has_alpha = s->main_has_alpha;
+    uint8_t *S, *sp, *d, *dp;
+
+    i = FFMAX(-y, 0);
+    sp = src->data[0] + i     * src->linesize[0];
+    dp = dst->data[0] + (y+i) * dst->linesize[0];
+
+    for (imax = FFMIN(-y + dst_h, src_h); i < imax; i++) {
+        j = FFMAX(-x, 0);
+        S = sp + j     * sstep;
+        d = dp + (x+j) * dstep;
+
+        for (jmax = FFMIN(-x + dst_w, src_w); j < jmax; j++) {
+            alpha = S[sa];
+
+            // if the main channel has an alpha channel, alpha has to be calculated
+            // to create an un-premultiplied (straight) alpha value
+            if (main_has_alpha && alpha != 0 && alpha != 255) {
+                uint8_t alpha_d = d[da];
+                alpha = UNPREMULTIPLY_ALPHA(alpha, alpha_d);
+            }
+
+            switch (alpha) {
+            case 0:
+                break;
+            case 255:
+                d[dr] = S[sr];
+                d[dg] = S[sg];
+                d[db] = S[sb];
+                break;
+            default:
+                // main_value = main_value * (1 - alpha) + overlay_value * alpha
+                // since alpha is in the range 0-255, the result must divided by 255
+                d[dr] = FAST_DIV255(d[dr] * (255 - alpha) + S[sr] * alpha);
+                d[dg] = FAST_DIV255(d[dg] * (255 - alpha) + S[sg] * alpha);
+                d[db] = FAST_DIV255(d[db] * (255 - alpha) + S[sb] * alpha);
+            }
+            if (main_has_alpha) {
+                switch (alpha) {
+                case 0:
+                    break;
+                case 255:
+                    d[da] = S[sa];
+                    break;
+                default:
+                    // apply alpha compositing: main_alpha += (1-main_alpha) * overlay_alpha
+                    d[da] += FAST_DIV255((255 - d[da]) * S[sa]);
+                }
+            }
+            d += dstep;
+            S += sstep;
+        }
+        dp += dst->linesize[0];
+        sp += src->linesize[0];
+    }
+}
+
+static void blend_image_yuv(AVFilterContext *ctx,
+                            AVFrame *dst, const AVFrame *src,
+                            int x, int y)
 {
     OverlayContext *s = ctx->priv;
     int i, imax, j, jmax, k, kmax;
@@ -407,182 +472,135 @@ static void blend_image(AVFilterContext *ctx,
     const int src_h = src->height;
     const int dst_w = dst->width;
     const int dst_h = dst->height;
+    const int main_has_alpha = s->main_has_alpha;
 
-    if (x >= dst_w || x+src_w < 0 ||
-        y >= dst_h || y+src_h < 0)
-        return; /* no intersection */
-
-    if (s->main_is_packed_rgb) {
+    if (main_has_alpha) {
         uint8_t alpha;          ///< the amount of overlay to blend on to main
-        const int dr = s->main_rgba_map[R];
-        const int dg = s->main_rgba_map[G];
-        const int db = s->main_rgba_map[B];
-        const int da = s->main_rgba_map[A];
-        const int dstep = s->main_pix_step[0];
-        const int sr = s->overlay_rgba_map[R];
-        const int sg = s->overlay_rgba_map[G];
-        const int sb = s->overlay_rgba_map[B];
-        const int sa = s->overlay_rgba_map[A];
-        const int sstep = s->overlay_pix_step[0];
-        const int main_has_alpha = s->main_has_alpha;
-        uint8_t *s, *sp, *d, *dp;
+        uint8_t *s, *sa, *d, *da;
 
         i = FFMAX(-y, 0);
-        sp = src->data[0] + i     * src->linesize[0];
-        dp = dst->data[0] + (y+i) * dst->linesize[0];
+        sa = src->data[3] + i     * src->linesize[3];
+        da = dst->data[3] + (y+i) * dst->linesize[3];
 
         for (imax = FFMIN(-y + dst_h, src_h); i < imax; i++) {
             j = FFMAX(-x, 0);
-            s = sp + j     * sstep;
-            d = dp + (x+j) * dstep;
+            s = sa + j;
+            d = da + x+j;
 
             for (jmax = FFMIN(-x + dst_w, src_w); j < jmax; j++) {
-                alpha = s[sa];
-
-                // if the main channel has an alpha channel, alpha has to be calculated
-                // to create an un-premultiplied (straight) alpha value
-                if (main_has_alpha && alpha != 0 && alpha != 255) {
-                    uint8_t alpha_d = d[da];
+                alpha = *s;
+                if (alpha != 0 && alpha != 255) {
+                    uint8_t alpha_d = *d;
                     alpha = UNPREMULTIPLY_ALPHA(alpha, alpha_d);
                 }
-
                 switch (alpha) {
                 case 0:
                     break;
                 case 255:
-                    d[dr] = s[sr];
-                    d[dg] = s[sg];
-                    d[db] = s[sb];
+                    *d = *s;
                     break;
                 default:
-                    // main_value = main_value * (1 - alpha) + overlay_value * alpha
-                    // since alpha is in the range 0-255, the result must divided by 255
-                    d[dr] = FAST_DIV255(d[dr] * (255 - alpha) + s[sr] * alpha);
-                    d[dg] = FAST_DIV255(d[dg] * (255 - alpha) + s[sg] * alpha);
-                    d[db] = FAST_DIV255(d[db] * (255 - alpha) + s[sb] * alpha);
+                    // apply alpha compositing: main_alpha += (1-main_alpha) * overlay_alpha
+                    *d += FAST_DIV255((255 - *d) * *s);
                 }
-                if (main_has_alpha) {
-                    switch (alpha) {
-                    case 0:
-                        break;
-                    case 255:
-                        d[da] = s[sa];
-                        break;
-                    default:
-                        // apply alpha compositing: main_alpha += (1-main_alpha) * overlay_alpha
-                        d[da] += FAST_DIV255((255 - d[da]) * s[sa]);
-                    }
-                }
-                d += dstep;
-                s += sstep;
-            }
-            dp += dst->linesize[0];
-            sp += src->linesize[0];
-        }
-    } else {
-        const int main_has_alpha = s->main_has_alpha;
-        if (main_has_alpha) {
-            uint8_t alpha;          ///< the amount of overlay to blend on to main
-            uint8_t *s, *sa, *d, *da;
-
-            i = FFMAX(-y, 0);
-            sa = src->data[3] + i     * src->linesize[3];
-            da = dst->data[3] + (y+i) * dst->linesize[3];
-
-            for (imax = FFMIN(-y + dst_h, src_h); i < imax; i++) {
-                j = FFMAX(-x, 0);
-                s = sa + j;
-                d = da + x+j;
-
-                for (jmax = FFMIN(-x + dst_w, src_w); j < jmax; j++) {
-                    alpha = *s;
-                    if (alpha != 0 && alpha != 255) {
-                        uint8_t alpha_d = *d;
-                        alpha = UNPREMULTIPLY_ALPHA(alpha, alpha_d);
-                    }
-                    switch (alpha) {
-                    case 0:
-                        break;
-                    case 255:
-                        *d = *s;
-                        break;
-                    default:
-                        // apply alpha compositing: main_alpha += (1-main_alpha) * overlay_alpha
-                        *d += FAST_DIV255((255 - *d) * *s);
-                    }
-                    d += 1;
-                    s += 1;
-                }
-                da += dst->linesize[3];
-                sa += src->linesize[3];
-            }
-        }
-        for (i = 0; i < 3; i++) {
-            int hsub = i ? s->hsub : 0;
-            int vsub = i ? s->vsub : 0;
-            int src_wp = AV_CEIL_RSHIFT(src_w, hsub);
-            int src_hp = AV_CEIL_RSHIFT(src_h, vsub);
-            int dst_wp = AV_CEIL_RSHIFT(dst_w, hsub);
-            int dst_hp = AV_CEIL_RSHIFT(dst_h, vsub);
-            int yp = y>>vsub;
-            int xp = x>>hsub;
-            uint8_t *s, *sp, *d, *dp, *a, *ap;
-
-            j = FFMAX(-yp, 0);
-            sp = src->data[i] + j         * src->linesize[i];
-            dp = dst->data[i] + (yp+j)    * dst->linesize[i];
-            ap = src->data[3] + (j<<vsub) * src->linesize[3];
-
-            for (jmax = FFMIN(-yp + dst_hp, src_hp); j < jmax; j++) {
-                k = FFMAX(-xp, 0);
-                d = dp + xp+k;
-                s = sp + k;
-                a = ap + (k<<hsub);
-
-                for (kmax = FFMIN(-xp + dst_wp, src_wp); k < kmax; k++) {
-                    int alpha_v, alpha_h, alpha;
-
-                    // average alpha for color components, improve quality
-                    if (hsub && vsub && j+1 < src_hp && k+1 < src_wp) {
-                        alpha = (a[0] + a[src->linesize[3]] +
-                                 a[1] + a[src->linesize[3]+1]) >> 2;
-                    } else if (hsub || vsub) {
-                        alpha_h = hsub && k+1 < src_wp ?
-                            (a[0] + a[1]) >> 1 : a[0];
-                        alpha_v = vsub && j+1 < src_hp ?
-                            (a[0] + a[src->linesize[3]]) >> 1 : a[0];
-                        alpha = (alpha_v + alpha_h) >> 1;
-                    } else
-                        alpha = a[0];
-                    // if the main channel has an alpha channel, alpha has to be calculated
-                    // to create an un-premultiplied (straight) alpha value
-                    if (main_has_alpha && alpha != 0 && alpha != 255) {
-                        // average alpha for color components, improve quality
-                        uint8_t alpha_d;
-                        if (hsub && vsub && j+1 < src_hp && k+1 < src_wp) {
-                            alpha_d = (d[0] + d[src->linesize[3]] +
-                                       d[1] + d[src->linesize[3]+1]) >> 2;
-                        } else if (hsub || vsub) {
-                            alpha_h = hsub && k+1 < src_wp ?
-                                (d[0] + d[1]) >> 1 : d[0];
-                            alpha_v = vsub && j+1 < src_hp ?
-                                (d[0] + d[src->linesize[3]]) >> 1 : d[0];
-                            alpha_d = (alpha_v + alpha_h) >> 1;
-                        } else
-                            alpha_d = d[0];
-                        alpha = UNPREMULTIPLY_ALPHA(alpha, alpha_d);
-                    }
-                    *d = FAST_DIV255(*d * (255 - alpha) + *s * alpha);
-                    s++;
-                    d++;
-                    a += 1 << hsub;
-                }
-                dp += dst->linesize[i];
-                sp += src->linesize[i];
-                ap += (1 << vsub) * src->linesize[3];
+                d += 1;
+                s += 1;
             }
+            da += dst->linesize[3];
+            sa += src->linesize[3];
         }
     }
+    for (i = 0; i < 3; i++) {
+        int hsub = i ? s->hsub : 0;
+        int vsub = i ? s->vsub : 0;
+        int src_wp = AV_CEIL_RSHIFT(src_w, hsub);
+        int src_hp = AV_CEIL_RSHIFT(src_h, vsub);
+        int dst_wp = AV_CEIL_RSHIFT(dst_w, hsub);
+        int dst_hp = AV_CEIL_RSHIFT(dst_h, vsub);
+        int yp = y>>vsub;
+        int xp = x>>hsub;
+        uint8_t *s, *sp, *d, *dp, *a, *ap;
+
+        j = FFMAX(-yp, 0);
+        sp = src->data[i] + j         * src->linesize[i];
+        dp = dst->data[i] + (yp+j)    * dst->linesize[i];
+        ap = src->data[3] + (j<<vsub) * src->linesize[3];
+
+        for (jmax = FFMIN(-yp + dst_hp, src_hp); j < jmax; j++) {
+            k = FFMAX(-xp, 0);
+            d = dp + xp+k;
+            s = sp + k;
+            a = ap + (k<<hsub);
+
+            for (kmax = FFMIN(-xp + dst_wp, src_wp); k < kmax; k++) {
+                int alpha_v, alpha_h, alpha;
+
+                // average alpha for color components, improve quality
+                if (hsub && vsub && j+1 < src_hp && k+1 < src_wp) {
+                    alpha = (a[0] + a[src->linesize[3]] +
+                             a[1] + a[src->linesize[3]+1]) >> 2;
+                } else if (hsub || vsub) {
+                    alpha_h = hsub && k+1 < src_wp ?
+                        (a[0] + a[1]) >> 1 : a[0];
+                    alpha_v = vsub && j+1 < src_hp ?
+                        (a[0] + a[src->linesize[3]]) >> 1 : a[0];
+                    alpha = (alpha_v + alpha_h) >> 1;
+                } else
+                    alpha = a[0];
+                // if the main channel has an alpha channel, alpha has to be calculated
+                // to create an un-premultiplied (straight) alpha value
+                if (main_has_alpha && alpha != 0 && alpha != 255) {
+                    // average alpha for color components, improve quality
+                    uint8_t alpha_d;
+                    if (hsub && vsub && j+1 < src_hp && k+1 < src_wp) {
+                        alpha_d = (d[0] + d[src->linesize[3]] +
+                                   d[1] + d[src->linesize[3]+1]) >> 2;
+                    } else if (hsub || vsub) {
+                        alpha_h = hsub && k+1 < src_wp ?
+                            (d[0] + d[1]) >> 1 : d[0];
+                        alpha_v = vsub && j+1 < src_hp ?
+                            (d[0] + d[src->linesize[3]]) >> 1 : d[0];
+                        alpha_d = (alpha_v + alpha_h) >> 1;
+                    } else
+                        alpha_d = d[0];
+                    alpha = UNPREMULTIPLY_ALPHA(alpha, alpha_d);
+                }
+                *d = FAST_DIV255(*d * (255 - alpha) + *s * alpha);
+                s++;
+                d++;
+                a += 1 << hsub;
+            }
+            dp += dst->linesize[i];
+            sp += src->linesize[i];
+            ap += (1 << vsub) * src->linesize[3];
+        }
+    }
+}
+
+static int config_input_main(AVFilterLink *inlink)
+{
+    OverlayContext *s = inlink->dst->priv;
+    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
+
+    av_image_fill_max_pixsteps(s->main_pix_step,    NULL, pix_desc);
+
+    s->hsub = pix_desc->log2_chroma_w;
+    s->vsub = pix_desc->log2_chroma_h;
+
+    s->main_is_packed_rgb =
+        ff_fill_rgba_map(s->main_rgba_map, inlink->format) >= 0;
+    s->main_has_alpha = ff_fmt_is_in(inlink->format, alpha_pix_fmts);
+    switch (s->format) {
+    case OVERLAY_FORMAT_YUV420:
+    case OVERLAY_FORMAT_YUV422:
+    case OVERLAY_FORMAT_YUV444:
+        s->blend_image = blend_image_yuv;
+        break;
+    case OVERLAY_FORMAT_RGB:
+        s->blend_image = blend_image_packed_rgb;
+        break;
+    }
+    return 0;
 }
 
 static AVFrame *do_blend(AVFilterContext *ctx, AVFrame *mainpic,
@@ -611,7 +629,9 @@ static AVFrame *do_blend(AVFilterContext *ctx, AVFrame *mainpic,
                s->var_values[VAR_Y], s->y);
     }
 
-    blend_image(ctx, mainpic, second, s->x, s->y);
+    if (s->x < mainpic->width  && s->x + second->width  >= 0 ||
+        s->y < mainpic->height && s->y + second->height >= 0)
+        s->blend_image(ctx, mainpic, second, s->x, s->y);
     return mainpic;
 }