avfilter/vf_maskedmerge: fix rounding when masking

2025-08-10 06:10:52 +02:00 · 2022-03-02 22:30:40 +01:00
parent 59520f068d
commit dae95b3ffd
3 changed files with 23 additions and 16 deletions
--- a/libavfilter/maskedmerge.h
+++ b/libavfilter/maskedmerge.h
@@ -30,7 +30,7 @@ typedef struct MaskedMergeContext {
    int linesize[4];
    int nb_planes;
    int planes;
-    int half, depth;
+    int half, depth, max;
    FFFrameSync fs;

    void (*maskedmerge)(const uint8_t *bsrc, const uint8_t *osrc,
--- a/libavfilter/vf_maskedmerge.c
+++ b/libavfilter/vf_maskedmerge.c
@@ -96,7 +96,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
                       base->linesize[p], overlay->linesize[p],
                       mask->linesize[p], out->linesize[p],
                       s->width[p], slice_end - slice_start,
-                       s->half, s->depth);
+                       s->half, s->max);
    }

    return 0;
@@ -138,13 +138,13 @@ static int process_frame(FFFrameSync *fs)
    return ff_filter_frame(outlink, out);
 }

-#define MASKEDMERGE(n, type, half, shift)                              \
+#define MASKEDMERGE(n, type, ctype, half, max, div)                    \
 static void maskedmerge##n(const uint8_t *bbsrc, const uint8_t *oosrc, \
                           const uint8_t *mmsrc, uint8_t *ddst,        \
                           ptrdiff_t blinesize, ptrdiff_t olinesize,   \
                           ptrdiff_t mlinesize, ptrdiff_t dlinesize,   \
                           int w, int h,                               \
-                           int hhalf, int sshift)                      \
+                           int hhalf, int mmax)                        \
 {                                                                      \
    const type *bsrc = (const type *)bbsrc;                            \
    const type *osrc = (const type *)oosrc;                            \
@@ -158,7 +158,10 @@ static void maskedmerge##n(const uint8_t *bbsrc, const uint8_t *oosrc, \
                                                                       \
    for (int y = 0; y < h; y++) {                                      \
        for (int x = 0; x < w; x++) {                                  \
-            dst[x] = bsrc[x] + ((msrc[x] * (osrc[x] - bsrc[x]) + half) shift); \
+            const type invm = max - msrc[x];                           \
+            const ctype r = ((ctype)(bsrc[x] * invm) +                 \
+                             (ctype)(msrc[x] * osrc[x] + half))  div;  \
+            dst[x] = r;                                                \
        }                                                              \
                                                                       \
        dst  += dlinesize;                                             \
@@ -168,9 +171,9 @@ static void maskedmerge##n(const uint8_t *bbsrc, const uint8_t *oosrc, \
    }                                                                  \
 }

-MASKEDMERGE(8,  uint8_t, 128, >> 8)
-MASKEDMERGE(16, uint16_t, hhalf, >> sshift)
-MASKEDMERGE(32, float, 0.f, + 0.f)
+MASKEDMERGE(8,  uint8_t,  uint16_t,   127, 255,  / 255)
+MASKEDMERGE(16, uint16_t, uint32_t, hhalf, mmax, / mmax)
+MASKEDMERGE(32, float,    float,      0.f, 1.f,  + 0.f)

 static int config_input(AVFilterLink *inlink)
 {
@@ -189,7 +192,8 @@ static int config_input(AVFilterLink *inlink)
    s->width[0]  = s->width[3]  = inlink->w;

    s->depth = desc->comp[0].depth;
-    s->half = (1 << s->depth) / 2;
+    s->max  = (1 << s->depth) - 1;
+    s->half = s->max / 2;

    if (s->depth == 8)
        s->maskedmerge = maskedmerge8;
--- a/libavfilter/x86/vf_maskedmerge.asm
+++ b/libavfilter/x86/vf_maskedmerge.asm
@@ -24,26 +24,28 @@

 SECTION_RODATA

-pw_128: times 8 dw 128
-pw_256: times 8 dw 256
+pw_127: times 8 dw 127
+pw_255: times 8 dw 255
+pw_32897: times 8 dw 32897

 SECTION .text

 INIT_XMM sse2
 %if ARCH_X86_64
-cglobal maskedmerge8, 8, 11, 7, bsrc, osrc, msrc, dst, blinesize, olinesize, mlinesize, dlinesize, w, h, x
+cglobal maskedmerge8, 8, 11, 8, bsrc, osrc, msrc, dst, blinesize, olinesize, mlinesize, dlinesize, w, h, x
    mov         wd, dword wm
    mov         hd, dword hm
 %else
-cglobal maskedmerge8, 5, 7, 7, bsrc, osrc, msrc, dst, blinesize, w, x
+cglobal maskedmerge8, 5, 7, 8, bsrc, osrc, msrc, dst, blinesize, w, x
    mov         wd, r8m
 %define olinesizeq r5mp
 %define mlinesizeq r6mp
 %define dlinesizeq r7mp
 %define hd r9mp
 %endif
-    mova        m4, [pw_256]
-    mova        m5, [pw_128]
+    mova        m4, [pw_255]
+    mova        m5, [pw_127]
+    mova        m7, [pw_32897]
    pxor        m6, m6
    add      bsrcq, wq
    add      osrcq, wq
@@ -66,7 +68,8 @@ cglobal maskedmerge8, 5, 7, 7, bsrc, osrc, msrc, dst, blinesize, w, x
        pmullw          m1, m3
        paddw           m1, m2
        paddw           m1, m5
-        psrlw           m1, 8
+        pmulhuw         m1, m7
+        psrlw           m1, 7
        packuswb        m1, m1
        movh   [dstq + xq], m1
        add             xq, mmsize / 2