avfilter/vf_ciescope: little optimizations

2025-08-10 06:10:52 +02:00 · 2022-04-19 19:50:49 +02:00
parent 8b717a4044
commit da4b7c2be4
1 changed files with 164 additions and 134 deletions
--- a/libavfilter/vf_ciescope.c
+++ b/libavfilter/vf_ciescope.c
@@ -62,12 +62,14 @@ typedef struct CiescopeContext {
    float contrast;
    int background;
-    double log2lin[65536];
+    float log2lin[65536];
-    double igamma;
+    float igamma;
-    double i[3][3];
+    float i[3][3];
-    double m[3][3];
+    float m[3][3];
    AVFrame *f;
-    void (*filter)(AVFilterContext *ctx, AVFrame *in, double *cx, double *cy, int x, int y);
+    void (*filter)(AVFilterContext *ctx, const uint8_t *ptr,
                   ptrdiff_t linesize,
                   float *cx, float *cy, int x, int y);
 } CiescopeContext;
 #define OFFSET(x) offsetof(CiescopeContext, x)
@@ -163,11 +165,11 @@ static int config_output(AVFilterLink *outlink)
   point. */
 struct ColorSystem {
-    double xRed, yRed,                /* Red primary illuminant */
+    float xRed, yRed,        /* Red primary illuminant */
-           xGreen, yGreen,            /* Green primary illuminant */
+          xGreen, yGreen,    /* Green primary illuminant */
-           xBlue, yBlue,              /* Blue primary illuminant */
+          xBlue, yBlue,      /* Blue primary illuminant */
-           xWhite, yWhite,            /* White point */
+          xWhite, yWhite,    /* White point */
-           gamma;             /* gamma of nonlinear correction */
+          gamma;             /* gamma of nonlinear correction */
 };
 static float const spectral_chromaticity[][3] = {
@@ -712,23 +714,23 @@ static struct ColorSystem CustomSystem = {
 */
 static void
-uv_to_xy(double   const u,
+uv_to_xy(float  const u,
-         double   const v,
+         float  const v,
-         double * const xc,
+         float *const xc,
-         double * const yc)
+         float *const yc)
 {
 /*
    Given 1970 coordinates u, v, determine 1931 chromaticities x, y
 */
-    *xc = 3*u / (2*u - 8*v + 4);
+    *xc = 3.f*u / (2.f*u - 8.f*v + 4.f);
-    *yc = 2*v / (2*u - 8*v + 4);
+    *yc = 2.f*v / (2.f*u - 8.f*v + 4.f);
 }
 static void
-upvp_to_xy(double   const up,
+upvp_to_xy(float   const up,
-           double   const vp,
+           float   const vp,
-           double * const xc,
+           float * const xc,
-           double * const yc)
+           float * const yc)
 {
 /*
    Given 1976 coordinates u', v', determine 1931 chromaticities x, y
@@ -738,48 +740,50 @@ upvp_to_xy(double   const up,
 }
 static void
-xy_to_upvp(double xc,
+xy_to_upvp(float xc,
-           double yc,
+           float yc,
-           double * const up,
+           float * const up,
-           double * const vp)
+           float * const vp)
 {
 /*
    Given 1931 chromaticities x, y, determine 1976 coordinates u', v'
 */
-    *up = 4*xc / (- 2*xc + 12*yc + 3);
+    const float scale = 1.f / (-2.f*xc + 12.f*yc + 3.f);
-    *vp = 9*yc / (- 2*xc + 12*yc + 3);
+    *up = 4.f*xc * scale;
    *vp = 9.f*yc * scale;
 }
 static void
-xy_to_uv(double xc,
+xy_to_uv(float xc,
-         double yc,
+         float yc,
-         double * const u,
+         float * const u,
-         double * const v)
+         float * const v)
 {
 /*
    Given 1931 chromaticities x, y, determine 1960 coordinates u, v
 */
-    *u = 4*xc / (- 2*xc + 12*yc + 3);
+    const float scale = 1.f / (-2.f*xc + 12.f*yc + 3.f);
-    *v = 6*yc / (- 2*xc + 12*yc + 3);
+    *u = 4.f*xc * scale;
    *v = 6.f*yc * scale;
 }
 static void
-xyz_to_rgb(const double m[3][3],
+xyz_to_rgb(const float m[3][3],
-           double xc, double yc, double zc,
+           float xc, float yc, float zc,
-           double * const r, double * const g, double * const b)
+           float * const r, float * const g, float * const b)
 {
    *r = m[0][0]*xc + m[0][1]*yc + m[0][2]*zc;
    *g = m[1][0]*xc + m[1][1]*yc + m[1][2]*zc;
    *b = m[2][0]*xc + m[2][1]*yc + m[2][2]*zc;
 }
-static void invert_matrix3x3(double in[3][3], double out[3][3])
+static void invert_matrix3x3(float in[3][3], float out[3][3])
 {
-    double m00 = in[0][0], m01 = in[0][1], m02 = in[0][2],
+    float m00 = in[0][0], m01 = in[0][1], m02 = in[0][2],
           m10 = in[1][0], m11 = in[1][1], m12 = in[1][2],
           m20 = in[2][0], m21 = in[2][1], m22 = in[2][2];
    int i, j;
-    double det;
+    float det;
    out[0][0] =  (m11 * m22 - m21 * m12);
    out[0][1] = -(m01 * m22 - m21 * m02);
@@ -800,9 +804,9 @@ static void invert_matrix3x3(double in[3][3], double out[3][3])
    }
 }
-static void get_rgb2xyz_matrix(struct ColorSystem system, double m[3][3])
+static void get_rgb2xyz_matrix(struct ColorSystem system, float m[3][3])
 {
-    double S[3], X[4], Z[4];
+    float S[3], X[4], Z[4];
    int i;
    X[0] = system.xRed   / system.yRed;
@@ -834,31 +838,32 @@ static void get_rgb2xyz_matrix(struct ColorSystem system, double m[3][3])
 }
 static void
-rgb_to_xy(double rc,
+rgb_to_xy(float rc,
-          double gc,
+          float gc,
-          double bc,
+          float bc,
-          double * const x,
+          float * const x,
-          double * const y,
+          float * const y,
-          double * const z,
+          float * const z,
-          const double m[3][3])
+          const float m[3][3])
 {
-    double sum;
+    float scale;
    *x = m[0][0] * rc + m[0][1] * gc + m[0][2] * bc;
    *y = m[1][0] * rc + m[1][1] * gc + m[1][2] * bc;
    *z = m[2][0] * rc + m[2][1] * gc + m[2][2] * bc;
-    sum = *x + *y + *z;
+    scale = *x + *y + *z;
-    if (sum == 0)
+    if (scale == 0.f)
-        sum = 1;
+        scale = 1.f;
-    *x = *x / sum;
+    scale = 1.f / scale;
-    *y = *y / sum;
+    *x = *x * scale;
    *y = *y * scale;
 }
 static int
-constrain_rgb(double * const r,
+constrain_rgb(float * const r,
-              double * const g,
+              float * const g,
-              double * const b)
+              float * const b)
 {
 /*----------------------------------------------------------------------------
    If  the  requested RGB shade contains a negative weight for one of
@@ -866,7 +871,7 @@ constrain_rgb(double * const r,
    the  given  triple  of  primaries.  Desaturate it by adding white,
    equal quantities of R, G, and B, enough to make RGB all positive.
 -----------------------------------------------------------------------------*/
-    double w;
+    float w;
    /* Amount of white needed is w = - min(0, *r, *g, *b) */
    w = (0 < *r) ? 0 : *r;
@@ -886,7 +891,7 @@ constrain_rgb(double * const r,
 static void
 gamma_correct(const struct ColorSystem * const cs,
-              double *                   const c)
+              float *                   const c)
 {
 /*----------------------------------------------------------------------------
    Transform linear RGB values to nonlinear RGB values.
@@ -899,8 +904,8 @@ gamma_correct(const struct ColorSystem * const cs,
       http://www.inforamp.net/~poynton/ColorFAQ.html
       http://www.inforamp.net/~poynton/GammaFAQ.html
 -----------------------------------------------------------------------------*/
-    double gamma;
+    float gamma;
-    double cc;
+    float cc;
    gamma = cs->gamma;
@@ -922,9 +927,9 @@ gamma_correct(const struct ColorSystem * const cs,
 static void
 gamma_correct_rgb(const struct ColorSystem * const cs,
-                  double * const r,
+                  float * const r,
-                  double * const g,
+                  float * const g,
-                  double * const b)
+                  float * const b)
 {
    gamma_correct(cs, r);
    gamma_correct(cs, g);
@@ -938,24 +943,24 @@ gamma_correct_rgb(const struct ColorSystem * const cs,
 #define Sz(x) (((x) * (int)FFMIN(w, h)) / 512)
 static void
-monochrome_color_location(double waveLength, int w, int h,
+monochrome_color_location(float waveLength, int w, int h,
                          int cie, int *xP, int *yP)
 {
    const int ix = waveLength - 360;
-    const double pX = spectral_chromaticity[ix][0];
+    const float pX = spectral_chromaticity[ix][0];
-    const double pY = spectral_chromaticity[ix][1];
+    const float pY = spectral_chromaticity[ix][1];
-    const double pZ = spectral_chromaticity[ix][2];
+    const float pZ = spectral_chromaticity[ix][2];
-    const double px = pX / (pX + pY + pZ);
+    const float px = pX / (pX + pY + pZ);
-    const double py = pY / (pX + pY + pZ);
+    const float py = pY / (pX + pY + pZ);
    if (cie == LUV) {
-        double up, vp;
+        float up, vp;
        xy_to_upvp(px, py, &up, &vp);
        *xP = up * (w - 1);
        *yP = (h - 1) - vp * (h - 1);
    } else if (cie == UCS) {
-        double u, v;
+        float u, v;
        xy_to_uv(px, py, &u, &v);
        *xP = u * (w - 1);
@@ -1099,7 +1104,7 @@ fill_in_tongue(uint16_t*                  const pixels,
               int                        const h,
               uint16_t                   const maxval,
               const struct ColorSystem * const cs,
-               double                     const m[3][3],
+               float                      const m[3][3],
               int                        const cie,
               int                        const correct_gamma,
               float                      const contrast)
@@ -1122,24 +1127,24 @@ fill_in_tongue(uint16_t*                  const pixels,
            int x;
            for (x = leftEdge; x <= rightEdge; ++x) {
-                double cx, cy, cz, jr, jg, jb, jmax;
+                float cx, cy, cz, jr, jg, jb, jmax;
                int r, g, b, mx = maxval;
                if (cie == LUV) {
-                    double up, vp;
+                    float up, vp;
-                    up = ((double) x) / (w - 1);
+                    up = ((float) x) / (w - 1);
-                    vp = 1.0 - ((double) y) / (h - 1);
+                    vp = 1.0 - ((float) y) / (h - 1);
                    upvp_to_xy(up, vp, &cx, &cy);
                    cz = 1.0 - (cx + cy);
                } else if (cie == UCS) {
-                    double u, v;
+                    float u, v;
-                    u = ((double) x) / (w - 1);
+                    u = ((float) x) / (w - 1);
-                    v = 1.0 - ((double) y) / (h - 1);
+                    v = 1.0 - ((float) y) / (h - 1);
                    uv_to_xy(u, v, &cx, &cy);
                    cz = 1.0 - (cx + cy);
                } else if (cie == XYY) {
-                    cx = ((double) x) / (w - 1);
+                    cx = ((float) x) / (w - 1);
-                    cy = 1.0 - ((double) y) / (h - 1);
+                    cy = 1.0 - ((float) y) / (h - 1);
                    cz = 1.0 - (cx + cy);
                } else {
                    av_assert0(0);
@@ -1189,12 +1194,12 @@ plot_white_point(uint16_t*      pixels,
    int wx, wy;
    if (cie == LUV) {
-        double wup, wvp;
+        float wup, wvp;
        xy_to_upvp(cs->xWhite, cs->yWhite, &wup, &wvp);
        wx = (w - 1) * wup;
        wy = (h - 1) - ((int) ((h - 1) * wvp));
    } else if (cie == UCS) {
-        double wu, wv;
+        float wu, wv;
        xy_to_uv(cs->xWhite, cs->yWhite, &wu, &wv);
        wx = (w - 1) * wu;
        wy = (h - 1) - ((int) ((h - 1) * wv));
@@ -1234,68 +1239,82 @@ static int draw_background(AVFilterContext *ctx)
    tongue_outline(pixels, s->f->linesize[0] / 2, w, h, 65535, s->cie);
-    fill_in_tongue(pixels, s->f->linesize[0] / 2, w, h, 65535, cs, (const double (*)[3])s->i, s->cie,
+    fill_in_tongue(pixels, s->f->linesize[0] / 2, w, h, 65535, cs, (const float (*)[3])s->i, s->cie,
                   s->correct_gamma, s->contrast);
    return 0;
 }
-static void filter_rgb48(AVFilterContext *ctx, AVFrame *in, double *cx, double *cy, int x, int y)
+static void filter_rgb48(AVFilterContext *ctx, const uint8_t *ptr,
                         ptrdiff_t linesize,
                         float *cx, float *cy, int x, int y)
 {
    CiescopeContext *s = ctx->priv;
-    const uint16_t* src = (const uint16_t*)(in->data[0] + in->linesize[0] * y + x * 6);
+    const float scale = 1. / 65535.;
-    double r = src[0] / 65535.;
+    const uint16_t *src = (const uint16_t*)(ptr + linesize * y + x * 6);
-    double g = src[1] / 65535.;
+    float r = src[0] * scale;
-    double b = src[2] / 65535.;
+    float g = src[1] * scale;
-    double cz;
+    float b = src[2] * scale;
    float cz;
-    rgb_to_xy(r, g, b, cx, cy, &cz, (const double (*)[3])s->m);
+    rgb_to_xy(r, g, b, cx, cy, &cz, (const float (*)[3])s->m);
 }
-static void filter_rgba64(AVFilterContext *ctx, AVFrame *in, double *cx, double *cy, int x, int y)
+static void filter_rgba64(AVFilterContext *ctx, const uint8_t *ptr,
                          ptrdiff_t linesize,
                          float *cx, float *cy, int x, int y)
 {
    CiescopeContext *s = ctx->priv;
-    const uint16_t* src = (const uint16_t*)(in->data[0] + in->linesize[0] * y + x * 8);
+    const float scale = 1. / 65535.;
-    double r = src[0] / 65535.;
+    const uint16_t *src = (const uint16_t*)(ptr + linesize * y + x * 8);
-    double g = src[1] / 65535.;
+    float r = src[0] * scale;
-    double b = src[2] / 65535.;
+    float g = src[1] * scale;
-    double cz;
+    float b = src[2] * scale;
    float cz;
-    rgb_to_xy(r, g, b, cx, cy, &cz, (const double (*)[3])s->m);
+    rgb_to_xy(r, g, b, cx, cy, &cz, (const float (*)[3])s->m);
 }
-static void filter_rgb24(AVFilterContext *ctx, AVFrame *in, double *cx, double *cy, int x, int y)
+static void filter_rgb24(AVFilterContext *ctx, const uint8_t *ptr,
                         ptrdiff_t linesize,
                         float *cx, float *cy, int x, int y)
 {
    CiescopeContext *s = ctx->priv;
-    const uint8_t* src = in->data[0] + in->linesize[0] * y + x * 3;
+    const float scale = 1. / 255.;
-    double r = src[0] / 255.;
+    const uint8_t *src = ptr + linesize * y + x * 3;
-    double g = src[1] / 255.;
+    float r = src[0] * scale;
-    double b = src[2] / 255.;
+    float g = src[1] * scale;
-    double cz;
+    float b = src[2] * scale;
    float cz;
-    rgb_to_xy(r, g, b, cx, cy, &cz, (const double (*)[3])s->m);
+    rgb_to_xy(r, g, b, cx, cy, &cz, (const float (*)[3])s->m);
 }
-static void filter_rgba(AVFilterContext *ctx, AVFrame *in, double *cx, double *cy, int x, int y)
+static void filter_rgba(AVFilterContext *ctx, const uint8_t *ptr,
                        ptrdiff_t linesize,
                        float *cx, float *cy, int x, int y)
 {
    CiescopeContext *s = ctx->priv;
-    const uint8_t* src = in->data[0] + in->linesize[0] * y + x * 4;
+    const float scale = 1. / 255.;
-    double r = src[0] / 255.;
+    const uint8_t *src = ptr + linesize * y + x * 4;
-    double g = src[1] / 255.;
+    float r = src[0] * scale;
-    double b = src[2] / 255.;
+    float g = src[1] * scale;
-    double cz;
+    float b = src[2] * scale;
    float cz;
-    rgb_to_xy(r, g, b, cx, cy, &cz, (const double (*)[3])s->m);
+    rgb_to_xy(r, g, b, cx, cy, &cz, (const float (*)[3])s->m);
 }
-static void filter_xyz(AVFilterContext *ctx, AVFrame *in, double *cx, double *cy, int x, int y)
+static void filter_xyz(AVFilterContext *ctx, const uint8_t *ptr,
                       ptrdiff_t linesize,
                       float *cx, float *cy, int x, int y)
 {
    CiescopeContext *s = ctx->priv;
-    const uint16_t* src = (uint16_t *)(in->data[0] + in->linesize[0] * y + x * 6);
+    const uint16_t* src = (uint16_t *)(ptr + linesize * y + x * 6);
-    double lx = s->log2lin[src[0]];
+    float lx = s->log2lin[src[0]];
-    double ly = s->log2lin[src[1]];
+    float ly = s->log2lin[src[1]];
-    double lz = s->log2lin[src[2]];
+    float lz = s->log2lin[src[2]];
-    double sum = lx + ly + lz;
+    float sum = lx + ly + lz;
    if (sum == 0)
        sum = 1;
@@ -1315,7 +1334,7 @@ static void plot_gamuts(uint16_t *pixels, int linesize, int w, int h,
        if (!((1 << i) & gamuts))
            continue;
        if (cie == LUV) {
-            double wup, wvp;
+            float wup, wvp;
            xy_to_upvp(cs->xRed, cs->yRed, &wup, &wvp);
            rx = (w - 1) * wup;
            ry = (h - 1) - ((int) ((h - 1) * wvp));
@@ -1326,7 +1345,7 @@ static void plot_gamuts(uint16_t *pixels, int linesize, int w, int h,
            bx = (w - 1) * wup;
            by = (h - 1) - ((int) ((h - 1) * wvp));
        } else if (cie == UCS) {
-            double wu, wv;
+            float wu, wv;
            xy_to_uv(cs->xRed, cs->yRed, &wu, &wv);
            rx = (w - 1) * wu;
            ry = (h - 1) - ((int) ((h - 1) * wv));
@@ -1384,37 +1403,48 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
    }
    for (y = 0; y < in->height; y++) {
-        for (x = 0; x < in->width; x++) {
+        const uint8_t *src = in->data[0];
-            double cx, cy;
+        const ptrdiff_t src_linesize = in->linesize[0];
-            uint16_t *dst;
+        uint16_t *dst = (uint16_t *)out->data[0];
-            int wx, wy;
+        const ptrdiff_t linesize = out->linesize[0] / 2;
        const int w_1 = w - 1;
        const int h_1 = h - 1;
-            s->filter(ctx, in, &cx, &cy, x, y);
+        for (x = 0; x < in->width; x++) {
            float cx, cy;
            int wx, wy, pos;
            int r, g, b;
            s->filter(ctx, src, src_linesize, &cx, &cy, x, y);
            if (s->cie == LUV) {
-                double up, vp;
+                float up, vp;
                xy_to_upvp(cx, cy, &up, &vp);
                cx = up;
                cy = vp;
            } else if (s->cie == UCS) {
-                double u, v;
+                float u, v;
                xy_to_uv(cx, cy, &u, &v);
                cx = u;
                cy = v;
            }
-            wx = (w - 1) * cx;
+            wx = w_1 * cx;
-            wy = (h - 1) - ((h - 1) * cy);
+            wy = h_1 - h_1 * cy;
            if (wx < 0 || wx >= w ||
                wy < 0 || wy >= h)
                continue;
-            dst = (uint16_t *)(out->data[0] + wy * out->linesize[0] + wx * 8 + 0);
+            pos = wy * linesize + wx * 4;
-            dst[0] = FFMIN(dst[0] + i, 65535);
+            r = dst[pos + 0] + i;
-            dst[1] = FFMIN(dst[1] + i, 65535);
+            g = dst[pos + 1] + i;
-            dst[2] = FFMIN(dst[2] + i, 65535);
+            b = dst[pos + 2] + i;
-            dst[3] = 65535;
+
            dst[pos + 0] = FFMIN(r, 65535);
            dst[pos + 1] = FFMIN(g, 65535);
            dst[pos + 2] = FFMIN(b, 65535);
            dst[pos + 3] = 65535;
        }
    }