diff --git a/libswscale/Makefile b/libswscale/Makefile index c4e45d494e..267952d870 100644 --- a/libswscale/Makefile +++ b/libswscale/Makefile @@ -14,6 +14,7 @@ OBJS = alphablend.o \ graph.o \ half2float.o \ input.o \ + lut3d.o \ options.o \ output.o \ rgb2rgb.o \ diff --git a/libswscale/lut3d.c b/libswscale/lut3d.c new file mode 100644 index 0000000000..db8e5f6f84 --- /dev/null +++ b/libswscale/lut3d.c @@ -0,0 +1,290 @@ +/* + * Copyright (C) 2024 Niklas Haas + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include + +#include "libavutil/attributes.h" +#include "libavutil/avassert.h" +#include "libavutil/mem.h" + +#include "cms.h" +#include "csputils.h" +#include "lut3d.h" + +SwsLut3D *sws_lut3d_alloc(void) +{ + SwsLut3D *lut3d = av_malloc(sizeof(*lut3d)); + if (!lut3d) + return NULL; + + lut3d->dynamic = false; + return lut3d; +} + +void sws_lut3d_free(SwsLut3D **plut3d) +{ + av_freep(plut3d); +} + +bool sws_lut3d_test_fmt(enum AVPixelFormat fmt, int output) +{ + return fmt == AV_PIX_FMT_RGBA64; +} + +enum AVPixelFormat sws_lut3d_pick_pixfmt(SwsFormat fmt, int output) +{ + return AV_PIX_FMT_RGBA64; +} + +/** + * v0 and v1 are 'black' and 'white' + * v2 and v3 are closest RGB/CMY vertices + * x >= y >= z are relative weights + */ +static av_always_inline +v3u16_t barycentric(int shift, int x, int y, int z, + v3u16_t v0, v3u16_t v1, v3u16_t v2, v3u16_t v3) +{ + const int a = (1 << shift) - x; + const int b = x - y; + const int c = y - z; + const int d = z; + av_assert2(x >= y); + av_assert2(y >= z); + + return (v3u16_t) { + (a * v0.x + b * v1.x + c * v2.x + d * v3.x) >> shift, + (a * v0.y + b * v1.y + c * v2.y + d * v3.y) >> shift, + (a * v0.z + b * v1.z + c * v2.z + d * v3.z) >> shift, + }; +} + +static av_always_inline +v3u16_t tetrahedral(const SwsLut3D *lut3d, int Rx, int Gx, int Bx, + int Rf, int Gf, int Bf) +{ + const int shift = 16 - INPUT_LUT_BITS; + const int Rn = FFMIN(Rx + 1, INPUT_LUT_SIZE - 1); + const int Gn = FFMIN(Gx + 1, INPUT_LUT_SIZE - 1); + const int Bn = FFMIN(Bx + 1, INPUT_LUT_SIZE - 1); + + const v3u16_t c000 = lut3d->input[Bx][Gx][Rx]; + const v3u16_t c111 = lut3d->input[Bn][Gn][Rn]; + if (Rf > Gf) { + if (Gf > Bf) { + const v3u16_t c100 = lut3d->input[Bx][Gx][Rn]; + const v3u16_t c110 = lut3d->input[Bx][Gn][Rn]; + return barycentric(shift, Rf, Gf, Bf, c000, c100, c110, c111); + } else if (Rf > Bf) { + const v3u16_t c100 = lut3d->input[Bx][Gx][Rn]; + const v3u16_t c101 = lut3d->input[Bn][Gx][Rn]; + return barycentric(shift, Rf, Bf, Gf, c000, c100, c101, c111); + } else { + const v3u16_t c001 = lut3d->input[Bn][Gx][Rx]; + const v3u16_t c101 = lut3d->input[Bn][Gx][Rn]; + return barycentric(shift, Bf, Rf, Gf, c000, c001, c101, c111); + } + } else { + if (Bf > Gf) { + const v3u16_t c001 = lut3d->input[Bn][Gx][Rx]; + const v3u16_t c011 = lut3d->input[Bn][Gn][Rx]; + return barycentric(shift, Bf, Gf, Rf, c000, c001, c011, c111); + } else if (Bf > Rf) { + const v3u16_t c010 = lut3d->input[Bx][Gn][Rx]; + const v3u16_t c011 = lut3d->input[Bn][Gn][Rx]; + return barycentric(shift, Gf, Bf, Rf, c000, c010, c011, c111); + } else { + const v3u16_t c010 = lut3d->input[Bx][Gn][Rx]; + const v3u16_t c110 = lut3d->input[Bx][Gn][Rn]; + return barycentric(shift, Gf, Rf, Bf, c000, c010, c110, c111); + } + } +} + +static av_always_inline v3u16_t lookup_input16(const SwsLut3D *lut3d, v3u16_t rgb) +{ + const int shift = 16 - INPUT_LUT_BITS; + const int Rx = rgb.x >> shift; + const int Gx = rgb.y >> shift; + const int Bx = rgb.z >> shift; + const int Rf = rgb.x & ((1 << shift) - 1); + const int Gf = rgb.y & ((1 << shift) - 1); + const int Bf = rgb.z & ((1 << shift) - 1); + return tetrahedral(lut3d, Rx, Gx, Bx, Rf, Gf, Bf); +} + +static av_always_inline v3u16_t lookup_input8(const SwsLut3D *lut3d, v3u8_t rgb) +{ + static_assert(INPUT_LUT_BITS <= 8, "INPUT_LUT_BITS must be <= 8"); + const int shift = 8 - INPUT_LUT_BITS; + const int Rx = rgb.x >> shift; + const int Gx = rgb.y >> shift; + const int Bx = rgb.z >> shift; + const int Rf = rgb.x & ((1 << shift) - 1); + const int Gf = rgb.y & ((1 << shift) - 1); + const int Bf = rgb.z & ((1 << shift) - 1); + return tetrahedral(lut3d, Rx, Gx, Bx, Rf, Gf, Bf); +} + +/** + * Note: These functions are scaled such that x == (1 << shift) corresponds to + * a value of 1.0. This makes them suitable for use when interpolation LUT + * entries with a fractional part that is just masked away from the index, + * since a fractional coordinate of e.g. 0xFFFF corresponds to a mix weight of + * just slightly *less* than 1.0. + */ +static av_always_inline v2u16_t lerp2u16(v2u16_t a, v2u16_t b, int x, int shift) +{ + const int xi = (1 << shift) - x; + return (v2u16_t) { + (a.x * xi + b.x * x) >> shift, + (a.y * xi + b.y * x) >> shift, + }; +} + +static av_always_inline v3u16_t lerp3u16(v3u16_t a, v3u16_t b, int x, int shift) +{ + const int xi = (1 << shift) - x; + return (v3u16_t) { + (a.x * xi + b.x * x) >> shift, + (a.y * xi + b.y * x) >> shift, + (a.z * xi + b.z * x) >> shift, + }; +} + +static av_always_inline v3u16_t lookup_output(const SwsLut3D *lut3d, v3u16_t ipt) +{ + const int Ishift = 16 - OUTPUT_LUT_BITS_I; + const int Cshift = 16 - OUTPUT_LUT_BITS_PT; + const int Ix = ipt.x >> Ishift; + const int Px = ipt.y >> Cshift; + const int Tx = ipt.z >> Cshift; + const int If = ipt.x & ((1 << Ishift) - 1); + const int Pf = ipt.y & ((1 << Cshift) - 1); + const int Tf = ipt.z & ((1 << Cshift) - 1); + const int In = FFMIN(Ix + 1, OUTPUT_LUT_SIZE_I - 1); + const int Pn = FFMIN(Px + 1, OUTPUT_LUT_SIZE_PT - 1); + const int Tn = FFMIN(Tx + 1, OUTPUT_LUT_SIZE_PT - 1); + + /* Trilinear interpolation */ + const v3u16_t c000 = lut3d->output[Tx][Px][Ix]; + const v3u16_t c001 = lut3d->output[Tx][Px][In]; + const v3u16_t c010 = lut3d->output[Tx][Pn][Ix]; + const v3u16_t c011 = lut3d->output[Tx][Pn][In]; + const v3u16_t c100 = lut3d->output[Tn][Px][Ix]; + const v3u16_t c101 = lut3d->output[Tn][Px][In]; + const v3u16_t c110 = lut3d->output[Tn][Pn][Ix]; + const v3u16_t c111 = lut3d->output[Tn][Pn][In]; + const v3u16_t c00 = lerp3u16(c000, c100, Tf, Cshift); + const v3u16_t c10 = lerp3u16(c010, c110, Tf, Cshift); + const v3u16_t c01 = lerp3u16(c001, c101, Tf, Cshift); + const v3u16_t c11 = lerp3u16(c011, c111, Tf, Cshift); + const v3u16_t c0 = lerp3u16(c00, c10, Pf, Cshift); + const v3u16_t c1 = lerp3u16(c01, c11, Pf, Cshift); + const v3u16_t c = lerp3u16(c0, c1, If, Ishift); + return c; +} + +static av_always_inline v3u16_t apply_tone_map(const SwsLut3D *lut3d, v3u16_t ipt) +{ + const int shift = 16 - TONE_LUT_BITS; + const int Ix = ipt.x >> shift; + const int If = ipt.x & ((1 << shift) - 1); + const int In = FFMIN(Ix + 1, TONE_LUT_SIZE - 1); + + const v2u16_t w0 = lut3d->tone_map[Ix]; + const v2u16_t w1 = lut3d->tone_map[In]; + const v2u16_t w = lerp2u16(w0, w1, If, shift); + const int base = (1 << 15) - w.y; + + ipt.x = w.x; + ipt.y = base + (ipt.y * w.y >> 15); + ipt.z = base + (ipt.z * w.y >> 15); + return ipt; +} + +int sws_lut3d_generate(SwsLut3D *lut3d, enum AVPixelFormat fmt_in, + enum AVPixelFormat fmt_out, const SwsColorMap *map) +{ + int ret; + + if (!sws_lut3d_test_fmt(fmt_in, 0) || !sws_lut3d_test_fmt(fmt_out, 1)) + return AVERROR(EINVAL); + + lut3d->dynamic = map->src.frame_peak.num > 0; + lut3d->map = *map; + + if (lut3d->dynamic) { + ret = sws_color_map_generate_dynamic(&lut3d->input[0][0][0], + &lut3d->output[0][0][0], + INPUT_LUT_SIZE, OUTPUT_LUT_SIZE_I, + OUTPUT_LUT_SIZE_PT, map); + if (ret < 0) + return ret; + + /* Make sure initial state is valid */ + sws_lut3d_update(lut3d, &map->src); + return 0; + } else { + return sws_color_map_generate_static(&lut3d->input[0][0][0], + INPUT_LUT_SIZE, map); + } +} + +void sws_lut3d_update(SwsLut3D *lut3d, const SwsColor *new_src) +{ + if (!new_src || !lut3d->dynamic) + return; + + lut3d->map.src.frame_peak = new_src->frame_peak; + lut3d->map.src.frame_avg = new_src->frame_avg; + + sws_tone_map_generate(lut3d->tone_map, TONE_LUT_SIZE, &lut3d->map); +} + +void sws_lut3d_apply(const SwsLut3D *lut3d, const uint8_t *in, int in_stride, + uint8_t *out, int out_stride, int w, int h) +{ + while (h--) { + const uint16_t *in16 = (const uint16_t *) in; + uint16_t *out16 = (uint16_t *) out; + + for (int x = 0; x < w; x++) { + v3u16_t c = { in16[0], in16[1], in16[2] }; + c = lookup_input16(lut3d, c); + + if (lut3d->dynamic) { + c = apply_tone_map(lut3d, c); + c = lookup_output(lut3d, c); + } + + out16[0] = c.x; + out16[1] = c.y; + out16[2] = c.z; + out16[3] = in16[3]; + in16 += 4; + out16 += 4; + } + + in += in_stride; + out += out_stride; + } +} diff --git a/libswscale/lut3d.h b/libswscale/lut3d.h new file mode 100644 index 0000000000..1933552830 --- /dev/null +++ b/libswscale/lut3d.h @@ -0,0 +1,98 @@ +/* + * Copyright (C) 2024 Niklas Haas + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef SWSCALE_LUT3D_H +#define SWSCALE_LUT3D_H + +#include + +#include "libavutil/csp.h" +#include "libavutil/pixfmt.h" + +#include "cms.h" +#include "csputils.h" +#include "utils.h" + +enum { + /* Input LUT size. This is only calculated once. */ + INPUT_LUT_BITS = 6, + INPUT_LUT_SIZE = (1 << INPUT_LUT_BITS) + 1, /* +1 to simplify interpolation */ + + /* Tone mapping LUT size. This is regenerated possibly per frame. */ + TONE_LUT_BITS = 8, + TONE_LUT_SIZE = (1 << TONE_LUT_BITS) + 1, + + /* Output LUT size (for dynamic tone mapping). This is only calculated once. */ + OUTPUT_LUT_BITS_I = 6, + OUTPUT_LUT_BITS_PT = 7, + + OUTPUT_LUT_SIZE_I = (1 << OUTPUT_LUT_BITS_I) + 1, + OUTPUT_LUT_SIZE_PT = (1 << OUTPUT_LUT_BITS_PT) + 1, +}; + +typedef struct SwsLut3D { + SwsColorMap map; + bool dynamic; + + /* Gamut mapping 3DLUT(s) */ + v3u16_t input[INPUT_LUT_SIZE][INPUT_LUT_SIZE][INPUT_LUT_SIZE]; + v3u16_t output[OUTPUT_LUT_SIZE_PT][OUTPUT_LUT_SIZE_PT][OUTPUT_LUT_SIZE_I]; + + /* Split tone mapping LUT (for dynamic tone mapping) */ + v2u16_t tone_map[TONE_LUT_SIZE]; /* new luma, desaturation */ +} SwsLut3D; + +SwsLut3D *sws_lut3d_alloc(void); +void sws_lut3d_free(SwsLut3D **lut3d); + +/** + * Test to see if a given format is supported by the 3DLUT input/output code. + */ +bool sws_lut3d_test_fmt(enum AVPixelFormat fmt, int output); + +/** + * Pick the best compatible pixfmt for a given SwsFormat. + */ +enum AVPixelFormat sws_lut3d_pick_pixfmt(SwsFormat fmt, int output); + +/** + * Recalculate the (static) 3DLUT state with new settings. This will recompute + * everything. To only update per-frame tone mapping state, instead call + * sws_lut3d_update(). + * + * Returns 0 or a negative error code. + */ +int sws_lut3d_generate(SwsLut3D *lut3d, enum AVPixelFormat fmt_in, + enum AVPixelFormat fmt_out, const SwsColorMap *map); + +/** + * Update the tone mapping state. This will only use per-frame metadata. The + * static metadata is ignored. + */ +void sws_lut3d_update(SwsLut3D *lut3d, const SwsColor *new_src); + +/** + * Applies a color transformation to a plane. The format must match the format + * provided during sws_lut3d_update(). + */ +void sws_lut3d_apply(const SwsLut3D *lut3d, const uint8_t *in, int in_stride, + uint8_t *out, int out_stride, int w, int h); + +#endif /* SWSCALE_LUT3D_H */