From 6bc29a6b571c83058d04dc7b8b0f827dfee31b2c Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Tue, 17 Aug 2021 21:25:32 +0200 Subject: [PATCH] avcodec/h274: add film grain synthesis routine This could arguably also be a vf, but I decided to put it here since decoders are technically required to apply film grain during the output step, and I would rather want to avoid requiring users insert the correct film grain synthesis filter on their own. The code, while in C, is written in a way that unrolls/vectorizes fairly well under -O3, and is reasonably cache friendly. On my CPU, a single thread pushes about 400 FPS at 1080p. Apart from hand-written assembly, one possible avenue of improvement would be to change the access order to compute the grain row-by-row rather than in 8x8 blocks. This requires some redundant PRNG calls, but would make the algorithm more cache-oblivious. The implementation has been written to the wording of SMPTE RDD 5-2006 as faithfully as I can manage. However, apart from passing a visual inspection, no guarantee of correctness can be made due to the lack of any publicly available reference implementation against which to compare it. Signed-off-by: Niklas Haas Signed-off-by: James Almer --- libavcodec/Makefile | 2 +- libavcodec/h274.c | 811 ++++++++++++++++++++++++++++++++++++++++++++ libavcodec/h274.h | 52 +++ 3 files changed, 864 insertions(+), 1 deletion(-) create mode 100644 libavcodec/h274.c create mode 100644 libavcodec/h274.h diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 0dc564f5dc..68d808de42 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -367,7 +367,7 @@ OBJS-$(CONFIG_H264_DECODER) += h264dec.o h264_cabac.o h264_cavlc.o \ h264_direct.o h264_loopfilter.o \ h264_mb.o h264_picture.o \ h264_refs.o h264_sei.o \ - h264_slice.o h264data.o + h264_slice.o h264data.o h274.o OBJS-$(CONFIG_H264_AMF_ENCODER) += amfenc_h264.o OBJS-$(CONFIG_H264_CUVID_DECODER) += cuviddec.o OBJS-$(CONFIG_H264_MEDIACODEC_DECODER) += mediacodecdec.o diff --git a/libavcodec/h274.c b/libavcodec/h274.c new file mode 100644 index 0000000000..0efc00ca1d --- /dev/null +++ b/libavcodec/h274.c @@ -0,0 +1,811 @@ +/* + * H.274 film grain synthesis + * Copyright (c) 2021 Niklas Haas + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * H.274 film grain synthesis. + * @author Niklas Haas + */ + +#include "libavutil/avassert.h" +#include "libavutil/imgutils.h" + +#include "h274.h" + +// The code in this file has a lot of loops that vectorize very well, this is +// about a 40% speedup for no obvious downside. +#pragma GCC optimize("tree-vectorize") + +static const int8_t Gaussian_LUT[2048+256]; +static const uint32_t Seed_LUT[256]; +static const int8_t R64T[64][64]; + +static void prng_shift(uint32_t *state) +{ + // Primitive polynomial x^31 + x^3 + 1 (modulo 2) + uint32_t x = *state; + uint8_t feedback = (x >> 2) ^ (x >> 30); + *state = (x << 1) | (feedback & 1u); +} + +static void init_slice_c(int8_t out[64][64], uint8_t h, uint8_t v, + int16_t tmp[64][64]) +{ + static const uint8_t deblock_factors[13] = { + 64, 71, 77, 84, 90, 96, 103, 109, 116, 122, 128, 128, 128 + }; + + const uint8_t deblock_coeff = deblock_factors[v]; + const uint8_t freq_h = ((h + 3) << 2) - 1; + const uint8_t freq_v = ((v + 3) << 2) - 1; + uint32_t seed = Seed_LUT[h + v * 13]; + + // Initialize with random gaussian values, using the output array as a + // temporary buffer for these intermediate values. + // + // Note: To make the subsequent matrix multiplication cache friendlier, we + // store each *column* of the starting image in a *row* of `out` + for (int y = 0; y <= freq_v; y++) { + for (int x = 0; x <= freq_h; x += 4) { + uint16_t offset = seed % 2048; + out[x + 0][y] = Gaussian_LUT[offset + 0]; + out[x + 1][y] = Gaussian_LUT[offset + 1]; + out[x + 2][y] = Gaussian_LUT[offset + 2]; + out[x + 3][y] = Gaussian_LUT[offset + 3]; + prng_shift(&seed); + } + } + + out[0][0] = 0; + + // 64x64 inverse integer transform + for (int y = 0; y < 64; y++) { + for (int x = 0; x < 64; x++) { + int32_t sum = 0; + for (int p = 0; p < 64; p++) + sum += R64T[y][p] * out[x][p]; + tmp[y][x] = (sum + 128) >> 8; + } + } + + for (int y = 0; y < 64; y++) { + for (int x = 0; x < 64; x++) { + int32_t sum = 0; + for (int p = 0; p < 64; p++) + sum += tmp[y][p] * R64T[x][p]; // R64T^T = R64 + // Renormalize and clip to [-127, 127] + out[y][x] = av_clip((sum + 128) >> 8, -127, 127); + } + } + + // Deblock horizontal edges by simple attentuation of values + for (int y = 0; y < 64; y += 8) { + for (int x = 0; x < 64; x++) { + out[y + 0][x] = (out[y + 0][x] * deblock_coeff) >> 7; + out[y + 7][x] = (out[y + 7][x] * deblock_coeff) >> 7; + } + } +} + +static void init_slice(H274FilmGrainDatabase *database, uint8_t h, uint8_t v) +{ + if (database->residency[h] & (1 << v)) + return; + + database->residency[h] |= (1 << v); + init_slice_c(database->db[h][v], h, v, database->slice_tmp); +} + +// Computes the average of an 8x8 block, right-shifted by 6 +static uint16_t avg_8x8_c(const uint8_t *in, int in_stride) +{ + uint16_t avg[8] = {0}; // summing over an array vectorizes better + + for (int y = 0; y < 8; y++) { + for (int x = 0; x < 8; x++) + avg[x] += in[x]; + in += in_stride; + } + + return (avg[0] + avg[1] + avg[2] + avg[3] + + avg[4] + avg[5] + avg[6] + avg[7]) >> 6; +} + +// Synthesize an 8x8 block of film grain by copying the pattern from `db` +static void synth_grain_8x8_c(int8_t *out, const int out_stride, + const int16_t scale, const uint8_t shift, + const int8_t *db) +{ + for (int y = 0; y < 8; y++) { + for (int x = 0; x < 8; x++) + out[x] = (scale * db[x]) >> shift; + + out += out_stride; + db += 64; + } +} + +// Deblock vertical edges of an 8x8 block, mixing with the previous block +static void deblock_8x8_c(int8_t *out, const int out_stride) +{ + for (int y = 0; y < 8; y++) { + const int8_t l1 = out[-2], l0 = out[-1]; + const int8_t r0 = out[0], r1 = out[1]; + out[0] = (l0 + (r0 << 1) + r1) >> 2; + out[-1] = (r0 + (l0 << 1) + l1) >> 2; + out += out_stride; + } +} + +// Generates a single 8x8 block of grain, optionally also applying the +// deblocking step (note that this implies writing to the previous block). +static av_always_inline void generate(int8_t *out, int out_stride, + const uint8_t *in, int in_stride, + H274FilmGrainDatabase *database, + const AVFilmGrainH274Params *h274, + int c, int invert, int deblock, + int y_offset, int x_offset) +{ + const uint8_t shift = h274->log2_scale_factor + 6; + const uint16_t avg = avg_8x8_c(in, in_stride); + int16_t scale; + uint8_t h, v; + int8_t s = -1; + + // FIXME: This logic only generates grain with a single + // intensity interval. Strictly speaking, the H.274 specification allows + // for overlapping intensity intervals, however SMPTE RDD 5-2006 (which + // concerns the implementation of H.274 for H.264) forbids this as it + // requires a nontrivial grain synthesis process (FFT). + // + // In principle, we should detect this possibility ahead of time and warn + // the user that the output is unlikely to be correct, or alternatively + // return an AVERROR_PATCHWELCOME. + for (int i = 0; i < h274->num_intensity_intervals[c]; i++) { + if (avg >= h274->intensity_interval_lower_bound[c][i] && + avg <= h274->intensity_interval_upper_bound[c][i]) + { + s = i; + break; + } + } + + if (s < 0) { + // No matching intensity interval, synthesize blank film grain + for (int y = 0; y < 8; y++) + memset(out + y * out_stride, 0, sizeof(int8_t[8])); + return; + } + + h = av_clip(h274->comp_model_value[c][s][1], 2, 14) - 2; + v = av_clip(h274->comp_model_value[c][s][2], 2, 14) - 2; + init_slice(database, h, v); + + scale = h274->comp_model_value[c][s][0]; + if (invert) + scale = -scale; + + synth_grain_8x8_c(out, out_stride, scale, shift, + &database->db[h][v][y_offset][x_offset]); + + if (deblock) + deblock_8x8_c(out, out_stride); +} + +// Saturating 8-bit sum of a+b +static void add_8x8_clip_c(uint8_t *out, const uint8_t *a, const int8_t *b, + int n) +{ + for (int i = 0; i < n; i++) + out[i] = av_clip_uint8(a[i] + b[i]); +} + +int ff_h274_apply_film_grain(AVFrame *out_frame, const AVFrame *in_frame, + H274FilmGrainDatabase *database, + const AVFilmGrainParams *params) +{ + AVFilmGrainH274Params h274 = params->codec.h274; + av_assert1(params->type == AV_FILM_GRAIN_PARAMS_H274); + if (h274.model_id != 0) + return AVERROR_PATCHWELCOME; + + av_assert1(out_frame->format == in_frame->format); + if (in_frame->format != AV_PIX_FMT_YUV420P) + return AVERROR_PATCHWELCOME; + + for (int c = 0; c < 3; c++) { + static const uint8_t color_offset[3] = { 0, 85, 170 }; + uint32_t seed = Seed_LUT[(params->seed + color_offset[c]) % 256]; + const int width = c > 0 ? AV_CEIL_RSHIFT(out_frame->width, 1) : out_frame->width; + const int height = c > 0 ? AV_CEIL_RSHIFT(out_frame->height, 1) : out_frame->height; + + uint8_t * const out = out_frame->data[c]; + const int out_stride = out_frame->linesize[c]; + int8_t * const grain = out_frame->data[c]; // re-use output buffer for grain + const int grain_stride = out_stride; + const uint8_t * const in = in_frame->data[c]; + const int in_stride = in_frame->linesize[c]; + + if (!h274.component_model_present[c]) { + av_image_copy_plane(out, out_stride, in, in_stride, + width * sizeof(uint8_t), height); + continue; + } + + if (c > 0) { + // Adaptation for 4:2:0 chroma subsampling + for (int i = 0; i < h274.num_intensity_intervals[c]; i++) { + h274.comp_model_value[c][i][0] >>= 1; + h274.comp_model_value[c][i][1] <<= 1; + h274.comp_model_value[c][i][2] <<= 1; + } + } + + // Film grain synthesis is done in 8x8 blocks, but the PRNG state is + // only advanced in 16x16 blocks, so use a nested loop + for (int y = 0; y < height; y += 16) { + for (int x = 0; x < width; x += 16) { + uint16_t y_offset = (seed >> 16) % 52; + uint16_t x_offset = (seed & 0xFFFF) % 56; + const int invert = (seed & 0x1); + y_offset &= 0xFFFC; + x_offset &= 0xFFF8; + prng_shift(&seed); + + for (int yy = 0; yy < 16 && y+yy < height; yy += 8) { + for (int xx = 0; xx < 16 && x+xx < width; xx += 8) { + generate(grain + (y+yy) * grain_stride + (x+xx), grain_stride, + in + (y+yy) * in_stride + (x+xx), in_stride, + database, &h274, c, invert, (x+xx) > 0, + y_offset + yy, x_offset + xx); + } + } + } + } + + // Final output blend pass, done after grain synthesis is complete + // because deblocking depends on previous grain values + for (int y = 0; y < height; y++) { + add_8x8_clip_c(out + y * out_stride, in + y * in_stride, + grain + y * grain_stride, width); + } + } + + return 0; +} + +// These tables are all taken from the SMPTE RDD 5-2006 specification +static const int8_t Gaussian_LUT[2048+256] = { + -11, 12, 103, -11, 42, -35, 12, 59, 77, 98, -87, 3, 65, -78, 45, 56, -51, 21, + 13, -11, -20, -19, 33, -127, 17, -6, -105, 18, 19, 71, 48, -10, -38, 42, + -2, 75, -67, 52, -90, 33, -47, 21, -3, -56, 49, 1, -57, -42, -1, 120, -127, + -108, -49, 9, 14, 127, 122, 109, 52, 127, 2, 7, 114, 19, 30, 12, 77, 112, + 82, -61, -127, 111, -52, -29, 2, -49, -24, 58, -29, -73, 12, 112, 67, 79, + -3, -114, -87, -6, -5, 40, 58, -81, 49, -27, -31, -34, -105, 50, 16, -24, + -35, -14, -15, -127, -55, -22, -55, -127, -112, 5, -26, -72, 127, 127, -2, + 41, 87, -65, -16, 55, 19, 91, -81, -65, -64, 35, -7, -54, 99, -7, 88, 125, + -26, 91, 0, 63, 60, -14, -23, 113, -33, 116, 14, 26, 51, -16, 107, -8, 53, + 38, -34, 17, -7, 4, -91, 6, 63, 63, -15, 39, -36, 19, 55, 17, -51, 40, 33, + -37, 126, -39, -118, 17, -30, 0, 19, 98, 60, 101, -12, -73, -17, -52, 98, + 3, 3, 60, 33, -3, -2, 10, -42, -106, -38, 14, 127, 16, -127, -31, -86, -39, + -56, 46, -41, 75, 23, -19, -22, -70, 74, -54, -2, 32, -45, 17, -92, 59, + -64, -67, 56, -102, -29, -87, -34, -92, 68, 5, -74, -61, 93, -43, 14, -26, + -38, -126, -17, 16, -127, 64, 34, 31, 93, 17, -51, -59, 71, 77, 81, 127, + 127, 61, 33, -106, -93, 0, 0, 75, -69, 71, 127, -19, -111, 30, 23, 15, 2, + 39, 92, 5, 42, 2, -6, 38, 15, 114, -30, -37, 50, 44, 106, 27, 119, 7, -80, + 25, -68, -21, 92, -11, -1, 18, 41, -50, 79, -127, -43, 127, 18, 11, -21, + 32, -52, 27, -88, -90, -39, -19, -10, 24, -118, 72, -24, -44, 2, 12, 86, + -107, 39, -33, -127, 47, 51, -24, -22, 46, 0, 15, -35, -69, -2, -74, 24, + -6, 0, 29, -3, 45, 32, -32, 117, -45, 79, -24, -17, -109, -10, -70, 88, + -48, 24, -91, 120, -37, 50, -127, 58, 32, -82, -10, -17, -7, 46, -127, -15, + 89, 127, 17, 98, -39, -33, 37, 42, -40, -32, -21, 105, -19, 19, 19, -59, + -9, 30, 0, -127, 34, 127, -84, 75, 24, -40, -49, -127, -107, -14, 45, -75, + 1, 30, -20, 41, -68, -40, 12, 127, -3, 5, 20, -73, -59, -127, -3, -3, -53, + -6, -119, 93, 120, -80, -50, 0, 20, -46, 67, 78, -12, -22, -127, 36, -41, + 56, 119, -5, -116, -22, 68, -14, -90, 24, -82, -44, -127, 107, -25, -37, + 40, -7, -7, -82, 5, -87, 44, -34, 9, -127, 39, 70, 49, -63, 74, -49, 109, + -27, -89, -47, -39, 44, 49, -4, 60, -42, 80, 9, -127, -9, -56, -49, 125, + -66, 47, 36, 117, 15, -11, -96, 109, 94, -17, -56, 70, 8, -14, -5, 50, 37, + -45, 120, -30, -76, 40, -46, 6, 3, 69, 17, -78, 1, -79, 6, 127, 43, 26, + 127, -127, 28, -55, -26, 55, 112, 48, 107, -1, -77, -1, 53, -9, -22, -43, + 123, 108, 127, 102, 68, 46, 5, 1, 123, -13, -55, -34, -49, 89, 65, -105, + -5, 94, -53, 62, 45, 30, 46, 18, -35, 15, 41, 47, -98, -24, 94, -75, 127, + -114, 127, -68, 1, -17, 51, -95, 47, 12, 34, -45, -75, 89, -107, -9, -58, + -29, -109, -24, 127, -61, -13, 77, -45, 17, 19, 83, -24, 9, 127, -66, 54, + 4, 26, 13, 111, 43, -113, -22, 10, -24, 83, 67, -14, 75, -123, 59, 127, + -12, 99, -19, 64, -38, 54, 9, 7, 61, -56, 3, -57, 113, -104, -59, 3, -9, + -47, 74, 85, -55, -34, 12, 118, 28, 93, -72, 13, -99, -72, -20, 30, 72, + -94, 19, -54, 64, -12, -63, -25, 65, 72, -10, 127, 0, -127, 103, -20, -73, + -112, -103, -6, 28, -42, -21, -59, -29, -26, 19, -4, -51, 94, -58, -95, + -37, 35, 20, -69, 127, -19, -127, -22, -120, -53, 37, 74, -127, -1, -12, + -119, -53, -28, 38, 69, 17, 16, -114, 89, 62, 24, 37, -23, 49, -101, -32, + -9, -95, -53, 5, 93, -23, -49, -8, 51, 3, -75, -90, -10, -39, 127, -86, + -22, 20, 20, 113, 75, 52, -31, 92, -63, 7, -12, 46, 36, 101, -43, -17, -53, + -7, -38, -76, -31, -21, 62, 31, 62, 20, -127, 31, 64, 36, 102, -85, -10, + 77, 80, 58, -79, -8, 35, 8, 80, -24, -9, 3, -17, 72, 127, 83, -87, 55, 18, + -119, -123, 36, 10, 127, 56, -55, 113, 13, 26, 32, -13, -48, 22, -13, 5, + 58, 27, 24, 26, -11, -36, 37, -92, 78, 81, 9, 51, 14, 67, -13, 0, 32, 45, + -76, 32, -39, -22, -49, -127, -27, 31, -9, 36, 14, 71, 13, 57, 12, -53, + -86, 53, -44, -35, 2, 127, 12, -66, -44, 46, -115, 3, 10, 56, -35, 119, + -19, -61, 52, -59, -127, -49, -23, 4, -5, 17, -82, -6, 127, 25, 79, 67, 64, + -25, 14, -64, -37, -127, -28, 21, -63, 66, -53, -41, 109, -62, 15, -22, 13, + 29, -63, 20, 27, 95, -44, -59, -116, -10, 79, -49, 22, -43, -16, 46, -47, + -120, -36, -29, -52, -44, 29, 127, -13, 49, -9, -127, 75, -28, -23, 88, 59, + 11, -95, 81, -59, 58, 60, -26, 40, -92, -3, -22, -58, -45, -59, -22, -53, + 71, -29, 66, -32, -23, 14, -17, -66, -24, -28, -62, 47, 38, 17, 16, -37, + -24, -11, 8, -27, -19, 59, 45, -49, -47, -4, -22, -81, 30, -67, -127, 74, + 102, 5, -18, 98, 34, -66, 42, -52, 7, -59, 24, -58, -19, -24, -118, -73, + 91, 15, -16, 79, -32, -79, -127, -36, 41, 77, -83, 2, 56, 22, -75, 127, + -16, -21, 12, 31, 56, -113, -127, 90, 55, 61, 12, 55, -14, -113, -14, 32, + 49, -67, -17, 91, -10, 1, 21, 69, -70, 99, -19, -112, 66, -90, -10, -9, + -71, 127, 50, -81, -49, 24, 61, -61, -111, 7, -41, 127, 88, -66, 108, -127, + -6, 36, -14, 41, -50, 14, 14, 73, -101, -28, 77, 127, -8, -100, 88, 38, + 121, 88, -125, -60, 13, -94, -115, 20, -67, -87, -94, -119, 44, -28, -30, + 18, 5, -53, -61, 20, -43, 11, -77, -60, 13, 29, 3, 6, -72, 38, -60, -11, + 108, -53, 41, 66, -12, -127, -127, -49, 24, 29, 46, 36, 91, 34, -33, 116, + -51, -34, -52, 91, 7, -83, 73, -26, -103, 24, -10, 76, 84, 5, 68, -80, -13, + -17, -32, -48, 20, 50, 26, 10, 63, -104, -14, 37, 127, 114, 97, 35, 1, -33, + -55, 127, -124, -33, 61, -7, 119, -32, -127, -53, -42, 63, 3, -5, -26, 70, + -58, -33, -44, -43, 34, -56, -127, 127, 25, -35, -11, 16, -81, 29, -58, 40, + -127, -127, 20, -47, -11, -36, -63, -52, -32, -82, 78, -76, -73, 8, 27, + -72, -9, -74, -85, -86, -57, 25, 78, -10, -97, 35, -65, 8, -59, 14, 1, -42, + 32, -88, -44, 17, -3, -9, 59, 40, 12, -108, -40, 24, 34, 18, -28, 2, 51, + -110, -4, 100, 1, 65, 22, 0, 127, 61, 45, 25, -31, 6, 9, -7, -48, 99, 16, + 44, -2, -40, 32, -39, -52, 10, -110, -19, 56, -127, 69, 26, 51, 92, 40, 61, + -52, 45, -38, 13, 85, 122, 27, 66, 45, -111, -83, -3, 31, 37, 19, -36, 58, + 71, 39, -78, -47, 58, -78, 8, -62, -36, -14, 61, 42, -127, 71, -4, 24, -54, + 52, -127, 67, -4, -42, 30, -63, 59, -3, -1, -18, -46, -92, -81, -96, -14, + -53, -10, -11, -77, 13, 1, 8, -67, -127, 127, -28, 26, -14, 18, -13, -26, + 2, 10, -46, -32, -15, 27, -31, -59, 59, 77, -121, 28, 40, -54, -62, -31, + -21, -37, -32, -6, -127, -25, -60, 70, -127, 112, -127, 127, 88, -7, 116, + 110, 53, 87, -127, 3, 16, 23, 74, -106, -51, 3, 74, -82, -112, -74, 65, 81, + 25, 53, 127, -45, -50, -103, -41, -65, -29, 79, -67, 64, -33, -30, -8, 127, + 0, -13, -51, 67, -14, 5, -92, 29, -35, -8, -90, -57, -3, 36, 43, 44, -31, + -69, -7, 36, 39, -51, 43, -81, 58, 6, 127, 12, 57, 66, 46, 59, -43, -42, + 41, -15, -120, 24, 3, -11, 19, -13, 51, 28, 3, 55, -48, -12, -1, 2, 97, + -19, 29, 42, 13, 43, 78, -44, 56, -108, -43, -19, 127, 15, -11, -18, -81, + 83, -37, 77, -109, 15, 65, -50, 43, 12, 13, 27, 28, 61, 57, 30, 26, 106, + -18, 56, 13, 97, 4, -8, -62, -103, 94, 108, -44, 52, 27, -47, -9, 105, -53, + 46, 89, 103, -33, 38, -34, 55, 51, 70, -94, -35, -87, -107, -19, -31, 9, + -19, 79, -14, 77, 5, -19, -107, 85, 21, -45, -39, -42, 9, -29, 74, 47, -75, + 60, -127, 120, -112, -57, -32, 41, 7, 79, 76, 66, 57, 41, -25, 31, 37, -47, + -36, 43, -73, -37, 63, 127, -69, -52, 90, -33, -61, 60, -55, 44, 15, 4, + -67, 13, -92, 64, 29, -39, -3, 83, -2, -38, -85, -86, 58, 35, -69, -61, 29, + -37, -95, -78, 4, 30, -4, -32, -80, -22, -9, -77, 46, 7, -93, -71, 65, 9, + -50, 127, -70, 26, -12, -39, -114, 63, -127, -100, 4, -32, 111, 22, -60, + 65, -101, 26, -42, 21, -59, -27, -74, 2, -94, 6, 126, 5, 76, -88, -9, -43, + -101, 127, 1, 125, 92, -63, 52, 56, 4, 81, -127, 127, 80, 127, -29, 30, + 116, -74, -17, -57, 105, 48, 45, 25, -72, 48, -38, -108, 31, -34, 4, -11, + 41, -127, 52, -104, -43, -37, 52, 2, 47, 87, -9, 77, 27, -41, -25, 90, 86, + -56, 75, 10, 33, 78, 58, 127, 127, -7, -73, 49, -33, -106, -35, 38, 57, 53, + -17, -4, 83, 52, -108, 54, -125, 28, 23, 56, -43, -88, -17, -6, 47, 23, -9, + 0, -13, 111, 75, 27, -52, -38, -34, 39, 30, 66, 39, 38, -64, 38, 3, 21, + -32, -51, -28, 54, -38, -87, 20, 52, 115, 18, -81, -70, 0, -14, -46, -46, + -3, 125, 16, -14, 23, -82, -84, -69, -20, -65, -127, 9, 81, -49, 61, 7, + -36, -45, -42, 57, -26, 47, 20, -85, 46, -13, 41, -37, -75, -60, 86, -78, + -127, 12, 50, 2, -3, 13, 47, 5, 19, -78, -55, -27, 65, -71, 12, -108, 20, + -16, 11, -31, 63, -55, 37, 75, -17, 127, -73, -33, -28, -120, 105, 68, 106, + -103, -106, 71, 61, 2, 23, -3, 33, -5, -15, -67, -15, -23, -54, 15, -63, + 76, 58, -110, 1, 83, -27, 22, 75, -39, -17, -11, 64, -17, -127, -54, -66, + 31, 96, 116, 3, -114, -7, -108, -63, 97, 9, 50, 8, 75, -28, 72, 112, -36, + -112, 95, -50, 23, -13, -19, 55, 21, 23, 92, 91, 22, -49, 16, -75, 23, 9, + -49, -97, -37, 49, -36, 36, -127, -86, 43, 127, -24, -24, 84, 83, -35, -34, + -12, 109, 102, -38, 51, -68, 34, 19, -22, 49, -32, 127, 40, 24, -93, -4, + -3, 105, 3, -58, -18, 8, 127, -18, 125, 68, 69, -62, 30, -36, 54, -57, -24, + 17, 43, -36, -27, -57, -67, -21, -10, -49, 68, 12, 65, 4, 48, 55, 127, -75, + 44, 89, -66, -13, -78, -82, -91, 22, 30, 33, -40, -87, -34, 96, -91, 39, + 10, -64, -3, -12, 127, -50, -37, -56, 23, -35, -36, -54, 90, -91, 2, 50, + 77, -6, -127, 16, 46, -5, -73, 0, -56, -18, -72, 28, 93, 60, 49, 20, 18, + 111, -111, 32, -83, 47, 47, -10, 35, -88, 43, 57, -98, 127, -17, 0, 1, -39, + -127, -2, 0, 63, 93, 0, 36, -66, -61, -19, 39, -127, 58, 50, -17, 127, 88, + -43, -108, -51, -16, 7, -36, 68, 46, -14, 107, 40, 57, 7, 19, 8, 3, 88, + -90, -92, -18, -21, -24, 13, 7, -4, -78, -91, -4, 8, -35, -5, 19, 2, -111, + 4, -66, -81, 122, -20, -34, -37, -84, 127, 68, 46, 17, 47, + + // Repeat the beginning of the array to allow wrapping reads + -11, 12, 103, -11, 42, -35, 12, 59, 77, 98, -87, 3, 65, -78, 45, 56, -51, 21, + 13, -11, -20, -19, 33, -127, 17, -6, -105, 18, 19, 71, 48, -10, -38, 42, + -2, 75, -67, 52, -90, 33, -47, 21, -3, -56, 49, 1, -57, -42, -1, 120, -127, + -108, -49, 9, 14, 127, 122, 109, 52, 127, 2, 7, 114, 19, 30, 12, 77, 112, + 82, -61, -127, 111, -52, -29, 2, -49, -24, 58, -29, -73, 12, 112, 67, 79, + -3, -114, -87, -6, -5, 40, 58, -81, 49, -27, -31, -34, -105, 50, 16, -24, + -35, -14, -15, -127, -55, -22, -55, -127, -112, 5, -26, -72, 127, 127, -2, + 41, 87, -65, -16, 55, 19, 91, -81, -65, -64, 35, -7, -54, 99, -7, 88, 125, + -26, 91, 0, 63, 60, -14, -23, 113, -33, 116, 14, 26, 51, -16, 107, -8, 53, + 38, -34, 17, -7, 4, -91, 6, 63, 63, -15, 39, -36, 19, 55, 17, -51, 40, 33, + -37, 126, -39, -118, 17, -30, 0, 19, 98, 60, 101, -12, -73, -17, -52, 98, + 3, 3, 60, 33, -3, -2, 10, -42, -106, -38, 14, 127, 16, -127, -31, -86, -39, + -56, 46, -41, 75, 23, -19, -22, -70, 74, -54, -2, 32, -45, 17, -92, 59, + -64, -67, 56, -102, -29, -87, -34, -92, 68, 5, -74, -61, 93, -43, 14, -26, + -38, -126, -17, 16, -127, 64, 34, 31, 93, 17, -51, -59, 71, 77, 81, 127, + 127, 61, 33, -106, -93, 0, 0, 75, +}; + +static const uint32_t Seed_LUT[256] = { + 747538460, 1088979410, 1744950180, 1767011913, 1403382928, + 521866116, 1060417601, 2110622736, 1557184770, 105289385, 585624216, + 1827676546, 1191843873, 1018104344, 1123590530, 663361569, 2023850500, + 76561770, 1226763489, 80325252, 1992581442, 502705249, 740409860, + 516219202, 557974537, 1883843076, 720112066, 1640137737, 1820967556, + 40667586, 155354121, 1820967557, 1115949072, 1631803309, 98284748, + 287433856, 2119719977, 988742797, 1827432592, 579378475, 1017745956, + 1309377032, 1316535465, 2074315269, 1923385360, 209722667, 1546228260, + 168102420, 135274561, 355958469, 248291472, 2127839491, 146920100, + 585982612, 1611702337, 696506029, 1386498192, 1258072451, 1212240548, + 1043171860, 1217404993, 1090770605, 1386498193, 169093201, 541098240, + 1468005469, 456510673, 1578687785, 1838217424, 2010752065, 2089828354, + 1362717428, 970073673, 854129835, 714793201, 1266069081, 1047060864, + 1991471829, 1098097741, 913883585, 1669598224, 1337918685, 1219264706, + 1799741108, 1834116681, 683417731, 1120274457, 1073098457, 1648396544, + 176642749, 31171789, 718317889, 1266977808, 1400892508, 549749008, + 1808010512, 67112961, 1005669825, 903663673, 1771104465, 1277749632, + 1229754427, 950632997, 1979371465, 2074373264, 305357524, 1049387408, + 1171033360, 1686114305, 2147468765, 1941195985, 117709841, 809550080, + 991480851, 1816248997, 1561503561, 329575568, 780651196, 1659144592, + 1910793616, 604016641, 1665084765, 1530186961, 1870928913, 809550081, + 2079346113, 71307521, 876663040, 1073807360, 832356664, 1573927377, + 204073344, 2026918147, 1702476788, 2043881033, 57949587, 2001393952, + 1197426649, 1186508931, 332056865, 950043140, 890043474, 349099312, + 148914948, 236204097, 2022643605, 1441981517, 498130129, 1443421481, + 924216797, 1817491777, 1913146664, 1411989632, 929068432, 495735097, + 1684636033, 1284520017, 432816184, 1344884865, 210843729, 676364544, + 234449232, 12112337, 1350619139, 1753272996, 2037118872, 1408560528, + 533334916, 1043640385, 357326099, 201376421, 110375493, 541106497, + 416159637, 242512193, 777294080, 1614872576, 1535546636, 870600145, + 910810409, 1821440209, 1605432464, 1145147393, 951695441, 1758494976, + 1506656568, 1557150160, 608221521, 1073840384, 217672017, 684818688, + 1750138880, 16777217, 677990609, 953274371, 1770050213, 1359128393, + 1797602707, 1984616737, 1865815816, 2120835200, 2051677060, 1772234061, + 1579794881, 1652821009, 1742099468, 1887260865, 46468113, 1011925248, + 1134107920, 881643832, 1354774993, 472508800, 1892499769, 1752793472, + 1962502272, 687898625, 883538000, 1354355153, 1761673473, 944820481, + 2020102353, 22020353, 961597696, 1342242816, 964808962, 1355809701, + 17016649, 1386540177, 647682692, 1849012289, 751668241, 1557184768, + 127374604, 1927564752, 1045744913, 1614921984, 43588881, 1016185088, + 1544617984, 1090519041, 136122424, 215038417, 1563027841, 2026918145, + 1688778833, 701530369, 1372639488, 1342242817, 2036945104, 953274369, + 1750192384, 16842753, 964808960, 1359020032, 1358954497 +}; + +// Note: This is pre-transposed, i.e. stored column-major order +static const int8_t R64T[64][64] = { + { + 32, 45, 45, 45, 45, 45, 45, 45, 44, 44, 44, 44, 43, 43, 43, 42, + 42, 41, 41, 40, 40, 39, 39, 38, 38, 37, 36, 36, 35, 34, 34, 33, + 32, 31, 30, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, + 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 4, 3, 2, 1, + }, { + 32, 45, 45, 44, 43, 42, 41, 39, 38, 36, 34, 31, 29, 26, 23, 20, + 17, 14, 11, 8, 4, 1, -2, -6, -9, -12, -15, -18, -21, -24, -27, -30, + -32, -34, -36, -38, -40, -41, -43, -44, -44, -45, -45, -45, -45, -45, -44, -43, + -42, -40, -39, -37, -35, -33, -30, -28, -25, -22, -19, -16, -13, -10, -7, -3, + }, { + 32, 45, 44, 42, 40, 37, 34, 30, 25, 20, 15, 10, 4, -1, -7, -12, + -17, -22, -27, -31, -35, -38, -41, -43, -44, -45, -45, -45, -43, -41, -39, -36, + -32, -28, -23, -18, -13, -8, -2, 3, 9, 14, 19, 24, 29, 33, 36, 39, + 42, 44, 45, 45, 45, 44, 43, 40, 38, 34, 30, 26, 21, 16, 11, 6, + }, { + 32, 45, 43, 39, 35, 30, 23, 16, 9, 1, -7, -14, -21, -28, -34, -38, + -42, -44, -45, -45, -43, -40, -36, -31, -25, -18, -11, -3, 4, 12, 19, 26, + 32, 37, 41, 44, 45, 45, 44, 41, 38, 33, 27, 20, 13, 6, -2, -10, + -17, -24, -30, -36, -40, -43, -45, -45, -44, -42, -39, -34, -29, -22, -15, -8, + }, { + 32, 44, 41, 36, 29, 20, 11, 1, -9, -18, -27, -34, -40, -44, -45, -45, + -42, -37, -30, -22, -13, -3, 7, 16, 25, 33, 39, 43, 45, 45, 43, 38, + 32, 24, 15, 6, -4, -14, -23, -31, -38, -42, -45, -45, -43, -39, -34, -26, + -17, -8, 2, 12, 21, 30, 36, 41, 44, 45, 44, 40, 35, 28, 19, 10, + }, { + 32, 44, 39, 31, 21, 10, -2, -14, -25, -34, -41, -45, -45, -42, -36, -28, + -17, -6, 7, 18, 29, 37, 43, 45, 44, 40, 34, 24, 13, 1, -11, -22, + -32, -39, -44, -45, -43, -38, -30, -20, -9, 3, 15, 26, 35, 41, 45, 45, + 42, 36, 27, 16, 4, -8, -19, -30, -38, -43, -45, -44, -40, -33, -23, -12, + }, { + 32, 43, 36, 26, 13, -1, -15, -28, -38, -44, -45, -42, -35, -24, -11, 3, + 17, 30, 39, 44, 45, 41, 34, 22, 9, -6, -19, -31, -40, -45, -45, -40, + -32, -20, -7, 8, 21, 33, 41, 45, 44, 39, 30, 18, 4, -10, -23, -34, + -42, -45, -44, -38, -29, -16, -2, 12, 25, 36, 43, 45, 43, 37, 27, 14, + }, { + 32, 42, 34, 20, 4, -12, -27, -38, -44, -45, -39, -28, -13, 3, 19, 33, + 42, 45, 43, 34, 21, 6, -11, -26, -38, -44, -45, -39, -29, -14, 2, 18, + 32, 41, 45, 43, 35, 22, 7, -10, -25, -37, -44, -45, -40, -30, -15, 1, + 17, 31, 41, 45, 43, 36, 23, 8, -9, -24, -36, -44, -45, -40, -30, -16, + }, { + 32, 41, 30, 14, -4, -22, -36, -44, -44, -37, -23, -6, 13, 30, 41, 45, + 42, 31, 15, -3, -21, -36, -44, -45, -38, -24, -7, 12, 29, 40, 45, 42, + 32, 16, -2, -20, -35, -44, -45, -38, -25, -8, 11, 28, 40, 45, 43, 33, + 17, -1, -19, -34, -43, -45, -39, -26, -9, 10, 27, 39, 45, 43, 34, 18, + }, { + 32, 40, 27, 8, -13, -31, -43, -45, -38, -22, -2, 18, 35, 44, 44, 34, + 17, -3, -23, -38, -45, -42, -30, -12, 9, 28, 41, 45, 40, 26, 7, -14, + -32, -43, -45, -37, -21, -1, 19, 36, 44, 44, 34, 16, -4, -24, -39, -45, + -42, -30, -11, 10, 29, 41, 45, 39, 25, 6, -15, -33, -43, -45, -36, -20, + }, { + 32, 39, 23, 1, -21, -38, -45, -40, -25, -3, 19, 37, 45, 41, 27, 6, + -17, -36, -45, -42, -29, -8, 15, 34, 44, 43, 30, 10, -13, -33, -44, -44, + -32, -12, 11, 31, 43, 44, 34, 14, -9, -30, -43, -45, -35, -16, 7, 28, + 42, 45, 36, 18, -4, -26, -41, -45, -38, -20, 2, 24, 40, 45, 39, 22, + }, { + 32, 38, 19, -6, -29, -43, -44, -31, -9, 16, 36, 45, 40, 22, -2, -26, + -42, -45, -34, -12, 13, 34, 45, 41, 25, 1, -23, -40, -45, -36, -15, 10, + 32, 44, 43, 28, 4, -20, -39, -45, -38, -18, 7, 30, 43, 44, 30, 8, + -17, -37, -45, -39, -21, 3, 27, 42, 44, 33, 11, -14, -35, -45, -41, -24, + }, { + 32, 37, 15, -12, -35, -45, -39, -18, 9, 33, 45, 40, 21, -6, -30, -44, + -42, -24, 2, 28, 43, 43, 27, 1, -25, -42, -44, -30, -4, 22, 41, 45, + 32, 8, -19, -39, -45, -34, -11, 16, 38, 45, 36, 14, -13, -36, -45, -38, + -17, 10, 34, 45, 40, 20, -7, -31, -44, -41, -23, 3, 29, 44, 43, 26, + }, { + 32, 36, 11, -18, -40, -45, -30, -3, 25, 43, 43, 24, -4, -31, -45, -39, + -17, 12, 36, 45, 35, 10, -19, -40, -44, -30, -2, 26, 43, 42, 23, -6, + -32, -45, -39, -16, 13, 37, 45, 34, 9, -20, -41, -44, -29, -1, 27, 44, + 42, 22, -7, -33, -45, -38, -15, 14, 38, 45, 34, 8, -21, -41, -44, -28, + }, { + 32, 34, 7, -24, -43, -41, -19, 12, 38, 45, 30, 1, -29, -45, -39, -14, + 17, 40, 44, 26, -4, -33, -45, -36, -9, 22, 43, 42, 21, -10, -36, -45, + -32, -3, 27, 44, 40, 16, -15, -39, -44, -28, 2, 31, 45, 37, 11, -20, + -42, -43, -23, 8, 35, 45, 34, 6, -25, -44, -41, -18, 13, 38, 45, 30, + }, { + 32, 33, 2, -30, -45, -36, -7, 26, 44, 38, 11, -22, -43, -40, -15, 18, + 42, 42, 19, -14, -40, -44, -23, 10, 38, 45, 27, -6, -35, -45, -30, 1, + 32, 45, 34, 3, -29, -45, -36, -8, 25, 44, 39, 12, -21, -43, -41, -16, + 17, 41, 43, 20, -13, -39, -44, -24, 9, 37, 45, 28, -4, -34, -45, -31, + }, { + 32, 31, -2, -34, -45, -28, 7, 37, 44, 24, -11, -39, -43, -20, 15, 41, + 42, 16, -19, -43, -40, -12, 23, 44, 38, 8, -27, -45, -35, -3, 30, 45, + 32, -1, -34, -45, -29, 6, 36, 45, 25, -10, -39, -44, -21, 14, 41, 42, + 17, -18, -43, -40, -13, 22, 44, 38, 9, -26, -45, -36, -4, 30, 45, 33, + }, { + 32, 30, -7, -38, -43, -18, 19, 44, 38, 6, -30, -45, -29, 8, 39, 43, + 17, -20, -44, -37, -4, 31, 45, 28, -9, -39, -43, -16, 21, 44, 36, 3, + -32, -45, -27, 10, 40, 42, 15, -22, -44, -36, -2, 33, 45, 26, -11, -40, + -42, -14, 23, 45, 35, 1, -34, -45, -25, 12, 41, 41, 13, -24, -45, -34, + }, { + 32, 28, -11, -41, -40, -8, 30, 45, 25, -14, -43, -38, -4, 33, 45, 22, + -17, -44, -36, -1, 35, 44, 19, -20, -44, -34, 2, 37, 43, 16, -23, -45, + -32, 6, 39, 42, 13, -26, -45, -30, 9, 40, 41, 10, -29, -45, -27, 12, + 42, 39, 7, -31, -45, -24, 15, 43, 38, 3, -34, -45, -21, 18, 44, 36, + }, { + 32, 26, -15, -44, -35, 3, 39, 41, 9, -31, -45, -20, 21, 45, 30, -10, + -42, -38, -2, 36, 43, 14, -27, -45, -25, 16, 44, 34, -4, -39, -41, -8, + 32, 45, 19, -22, -45, -30, 11, 42, 38, 1, -36, -43, -13, 28, 45, 24, + -17, -44, -34, 6, 40, 40, 7, -33, -44, -18, 23, 45, 29, -12, -43, -37, + }, { + 32, 24, -19, -45, -29, 14, 44, 33, -9, -42, -36, 3, 40, 39, 2, -37, + -42, -8, 34, 44, 13, -30, -45, -18, 25, 45, 23, -20, -45, -28, 15, 44, + 32, -10, -43, -36, 4, 40, 39, 1, -38, -41, -7, 34, 43, 12, -30, -45, + -17, 26, 45, 22, -21, -45, -27, 16, 44, 31, -11, -43, -35, 6, 41, 38, + }, { + 32, 22, -23, -45, -21, 24, 45, 20, -25, -45, -19, 26, 45, 18, -27, -45, + -17, 28, 45, 16, -29, -45, -15, 30, 44, 14, -30, -44, -13, 31, 44, 12, + -32, -44, -11, 33, 43, 10, -34, -43, -9, 34, 43, 8, -35, -42, -7, 36, + 42, 6, -36, -41, -4, 37, 41, 3, -38, -40, -2, 38, 40, 1, -39, -39, + }, { + 32, 20, -27, -45, -13, 33, 43, 6, -38, -39, 2, 41, 35, -10, -44, -30, + 17, 45, 23, -24, -45, -16, 30, 44, 9, -36, -41, -1, 40, 37, -7, -43, + -32, 14, 45, 26, -21, -45, -19, 28, 44, 12, -34, -42, -4, 38, 39, -3, + -42, -34, 11, 44, 29, -18, -45, -22, 25, 45, 15, -31, -43, -8, 36, 40, + }, { + 32, 18, -30, -43, -4, 39, 36, -10, -44, -26, 23, 45, 13, -34, -41, 1, + 42, 33, -15, -45, -21, 28, 44, 8, -38, -38, 7, 44, 29, -20, -45, -16, + 32, 42, 2, -40, -35, 12, 45, 24, -25, -45, -11, 36, 40, -3, -43, -31, + 17, 45, 19, -30, -43, -6, 39, 37, -9, -44, -27, 22, 45, 14, -34, -41, + }, { + 32, 16, -34, -40, 4, 44, 27, -24, -44, -8, 39, 36, -13, -45, -19, 31, + 42, -1, -43, -30, 21, 45, 11, -37, -38, 10, 45, 22, -29, -43, -2, 41, + 32, -18, -45, -14, 35, 39, -7, -44, -25, 26, 44, 6, -40, -34, 15, 45, + 17, -33, -41, 3, 43, 28, -23, -45, -9, 38, 36, -12, -45, -20, 30, 42, + }, { + 32, 14, -36, -37, 13, 45, 15, -36, -38, 12, 45, 16, -35, -38, 11, 45, + 17, -34, -39, 10, 45, 18, -34, -39, 9, 45, 19, -33, -40, 8, 45, 20, + -32, -40, 7, 45, 21, -31, -41, 6, 44, 22, -30, -41, 4, 44, 23, -30, + -42, 3, 44, 24, -29, -42, 2, 44, 25, -28, -43, 1, 43, 26, -27, -43, + }, { + 32, 12, -39, -33, 21, 44, 2, -43, -25, 30, 41, -8, -45, -16, 36, 36, + -17, -45, -7, 41, 29, -26, -43, 3, 44, 20, -34, -38, 13, 45, 11, -39, + -32, 22, 44, 1, -43, -24, 30, 40, -9, -45, -15, 37, 35, -18, -45, -6, + 42, 28, -27, -42, 4, 45, 19, -34, -38, 14, 45, 10, -40, -31, 23, 44, + }, { + 32, 10, -41, -28, 29, 40, -11, -45, -9, 41, 27, -30, -40, 12, 45, 8, + -42, -26, 30, 39, -13, -45, -7, 42, 25, -31, -39, 14, 45, 6, -43, -24, + 32, 38, -15, -45, -4, 43, 23, -33, -38, 16, 45, 3, -43, -22, 34, 37, + -17, -45, -2, 44, 21, -34, -36, 18, 44, 1, -44, -20, 35, 36, -19, -44, + }, { + 32, 8, -43, -22, 35, 34, -23, -42, 9, 45, 7, -43, -21, 36, 34, -24, + -42, 10, 45, 6, -43, -20, 36, 33, -25, -41, 11, 45, 4, -44, -19, 37, + 32, -26, -41, 12, 45, 3, -44, -18, 38, 31, -27, -40, 13, 45, 2, -44, + -17, 38, 30, -28, -40, 14, 45, 1, -44, -16, 39, 30, -29, -39, 15, 45, + }, { + 32, 6, -44, -16, 40, 26, -34, -34, 25, 40, -15, -44, 4, 45, 7, -44, + -17, 39, 27, -33, -35, 24, 41, -14, -44, 3, 45, 8, -43, -18, 39, 28, + -32, -36, 23, 41, -13, -45, 2, 45, 9, -43, -19, 38, 29, -31, -36, 22, + 42, -12, -45, 1, 45, 10, -43, -20, 38, 30, -30, -37, 21, 42, -11, -45, + }, { + 32, 3, -45, -10, 43, 16, -41, -22, 38, 28, -34, -33, 29, 37, -23, -40, + 17, 43, -11, -45, 4, 45, 2, -45, -9, 44, 15, -41, -21, 38, 27, -34, + -32, 30, 36, -24, -40, 18, 43, -12, -44, 6, 45, 1, -45, -8, 44, 14, + -42, -20, 39, 26, -35, -31, 30, 36, -25, -39, 19, 42, -13, -44, 7, 45, + }, { + 32, 1, -45, -3, 45, 6, -45, -8, 44, 10, -44, -12, 43, 14, -43, -16, + 42, 18, -41, -20, 40, 22, -39, -24, 38, 26, -36, -28, 35, 30, -34, -31, + 32, 33, -30, -34, 29, 36, -27, -37, 25, 38, -23, -39, 21, 40, -19, -41, + 17, 42, -15, -43, 13, 44, -11, -44, 9, 45, -7, -45, 4, 45, -2, -45, + }, { + 32, -1, -45, 3, 45, -6, -45, 8, 44, -10, -44, 12, 43, -14, -43, 16, + 42, -18, -41, 20, 40, -22, -39, 24, 38, -26, -36, 28, 35, -30, -34, 31, + 32, -33, -30, 34, 29, -36, -27, 37, 25, -38, -23, 39, 21, -40, -19, 41, + 17, -42, -15, 43, 13, -44, -11, 44, 9, -45, -7, 45, 4, -45, -2, 45, + }, { + 32, -3, -45, 10, 43, -16, -41, 22, 38, -28, -34, 33, 29, -37, -23, 40, + 17, -43, -11, 45, 4, -45, 2, 45, -9, -44, 15, 41, -21, -38, 27, 34, + -32, -30, 36, 24, -40, -18, 43, 12, -44, -6, 45, -1, -45, 8, 44, -14, + -42, 20, 39, -26, -35, 31, 30, -36, -25, 39, 19, -42, -13, 44, 7, -45, + }, { + 32, -6, -44, 16, 40, -26, -34, 34, 25, -40, -15, 44, 4, -45, 7, 44, + -17, -39, 27, 33, -35, -24, 41, 14, -44, -3, 45, -8, -43, 18, 39, -28, + -32, 36, 23, -41, -13, 45, 2, -45, 9, 43, -19, -38, 29, 31, -36, -22, + 42, 12, -45, -1, 45, -10, -43, 20, 38, -30, -30, 37, 21, -42, -11, 45, + }, { + 32, -8, -43, 22, 35, -34, -23, 42, 9, -45, 7, 43, -21, -36, 34, 24, + -42, -10, 45, -6, -43, 20, 36, -33, -25, 41, 11, -45, 4, 44, -19, -37, + 32, 26, -41, -12, 45, -3, -44, 18, 38, -31, -27, 40, 13, -45, 2, 44, + -17, -38, 30, 28, -40, -14, 45, -1, -44, 16, 39, -30, -29, 39, 15, -45, + }, { + 32, -10, -41, 28, 29, -40, -11, 45, -9, -41, 27, 30, -40, -12, 45, -8, + -42, 26, 30, -39, -13, 45, -7, -42, 25, 31, -39, -14, 45, -6, -43, 24, + 32, -38, -15, 45, -4, -43, 23, 33, -38, -16, 45, -3, -43, 22, 34, -37, + -17, 45, -2, -44, 21, 34, -36, -18, 44, -1, -44, 20, 35, -36, -19, 44, + }, { + 32, -12, -39, 33, 21, -44, 2, 43, -25, -30, 41, 8, -45, 16, 36, -36, + -17, 45, -7, -41, 29, 26, -43, -3, 44, -20, -34, 38, 13, -45, 11, 39, + -32, -22, 44, -1, -43, 24, 30, -40, -9, 45, -15, -37, 35, 18, -45, 6, + 42, -28, -27, 42, 4, -45, 19, 34, -38, -14, 45, -10, -40, 31, 23, -44, + }, { + 32, -14, -36, 37, 13, -45, 15, 36, -38, -12, 45, -16, -35, 38, 11, -45, + 17, 34, -39, -10, 45, -18, -34, 39, 9, -45, 19, 33, -40, -8, 45, -20, + -32, 40, 7, -45, 21, 31, -41, -6, 44, -22, -30, 41, 4, -44, 23, 30, + -42, -3, 44, -24, -29, 42, 2, -44, 25, 28, -43, -1, 43, -26, -27, 43, + }, { + 32, -16, -34, 40, 4, -44, 27, 24, -44, 8, 39, -36, -13, 45, -19, -31, + 42, 1, -43, 30, 21, -45, 11, 37, -38, -10, 45, -22, -29, 43, -2, -41, + 32, 18, -45, 14, 35, -39, -7, 44, -25, -26, 44, -6, -40, 34, 15, -45, + 17, 33, -41, -3, 43, -28, -23, 45, -9, -38, 36, 12, -45, 20, 30, -42, + }, { + 32, -18, -30, 43, -4, -39, 36, 10, -44, 26, 23, -45, 13, 34, -41, -1, + 42, -33, -15, 45, -21, -28, 44, -8, -38, 38, 7, -44, 29, 20, -45, 16, + 32, -42, 2, 40, -35, -12, 45, -24, -25, 45, -11, -36, 40, 3, -43, 31, + 17, -45, 19, 30, -43, 6, 39, -37, -9, 44, -27, -22, 45, -14, -34, 41, + }, { + 32, -20, -27, 45, -13, -33, 43, -6, -38, 39, 2, -41, 35, 10, -44, 30, + 17, -45, 23, 24, -45, 16, 30, -44, 9, 36, -41, 1, 40, -37, -7, 43, + -32, -14, 45, -26, -21, 45, -19, -28, 44, -12, -34, 42, -4, -38, 39, 3, + -42, 34, 11, -44, 29, 18, -45, 22, 25, -45, 15, 31, -43, 8, 36, -40, + }, { + 32, -22, -23, 45, -21, -24, 45, -20, -25, 45, -19, -26, 45, -18, -27, 45, + -17, -28, 45, -16, -29, 45, -15, -30, 44, -14, -30, 44, -13, -31, 44, -12, + -32, 44, -11, -33, 43, -10, -34, 43, -9, -34, 43, -8, -35, 42, -7, -36, + 42, -6, -36, 41, -4, -37, 41, -3, -38, 40, -2, -38, 40, -1, -39, 39, + }, { + 32, -24, -19, 45, -29, -14, 44, -33, -9, 42, -36, -3, 40, -39, 2, 37, + -42, 8, 34, -44, 13, 30, -45, 18, 25, -45, 23, 20, -45, 28, 15, -44, + 32, 10, -43, 36, 4, -40, 39, -1, -38, 41, -7, -34, 43, -12, -30, 45, + -17, -26, 45, -22, -21, 45, -27, -16, 44, -31, -11, 43, -35, -6, 41, -38, + }, { + 32, -26, -15, 44, -35, -3, 39, -41, 9, 31, -45, 20, 21, -45, 30, 10, + -42, 38, -2, -36, 43, -14, -27, 45, -25, -16, 44, -34, -4, 39, -41, 8, + 32, -45, 19, 22, -45, 30, 11, -42, 38, -1, -36, 43, -13, -28, 45, -24, + -17, 44, -34, -6, 40, -40, 7, 33, -44, 18, 23, -45, 29, 12, -43, 37, + }, { + 32, -28, -11, 41, -40, 8, 30, -45, 25, 14, -43, 38, -4, -33, 45, -22, + -17, 44, -36, 1, 35, -44, 19, 20, -44, 34, 2, -37, 43, -16, -23, 45, + -32, -6, 39, -42, 13, 26, -45, 30, 9, -40, 41, -10, -29, 45, -27, -12, + 42, -39, 7, 31, -45, 24, 15, -43, 38, -3, -34, 45, -21, -18, 44, -36, + }, { + 32, -30, -7, 38, -43, 18, 19, -44, 38, -6, -30, 45, -29, -8, 39, -43, + 17, 20, -44, 37, -4, -31, 45, -28, -9, 39, -43, 16, 21, -44, 36, -3, + -32, 45, -27, -10, 40, -42, 15, 22, -44, 36, -2, -33, 45, -26, -11, 40, + -42, 14, 23, -45, 35, -1, -34, 45, -25, -12, 41, -41, 13, 24, -45, 34, + }, { + 32, -31, -2, 34, -45, 28, 7, -37, 44, -24, -11, 39, -43, 20, 15, -41, + 42, -16, -19, 43, -40, 12, 23, -44, 38, -8, -27, 45, -35, 3, 30, -45, + 32, 1, -34, 45, -29, -6, 36, -45, 25, 10, -39, 44, -21, -14, 41, -42, + 17, 18, -43, 40, -13, -22, 44, -38, 9, 26, -45, 36, -4, -30, 45, -33, + }, { + 32, -33, 2, 30, -45, 36, -7, -26, 44, -38, 11, 22, -43, 40, -15, -18, + 42, -42, 19, 14, -40, 44, -23, -10, 38, -45, 27, 6, -35, 45, -30, -1, + 32, -45, 34, -3, -29, 45, -36, 8, 25, -44, 39, -12, -21, 43, -41, 16, + 17, -41, 43, -20, -13, 39, -44, 24, 9, -37, 45, -28, -4, 34, -45, 31, + }, { + 32, -34, 7, 24, -43, 41, -19, -12, 38, -45, 30, -1, -29, 45, -39, 14, + 17, -40, 44, -26, -4, 33, -45, 36, -9, -22, 43, -42, 21, 10, -36, 45, + -32, 3, 27, -44, 40, -16, -15, 39, -44, 28, 2, -31, 45, -37, 11, 20, + -42, 43, -23, -8, 35, -45, 34, -6, -25, 44, -41, 18, 13, -38, 45, -30, + }, { + 32, -36, 11, 18, -40, 45, -30, 3, 25, -43, 43, -24, -4, 31, -45, 39, + -17, -12, 36, -45, 35, -10, -19, 40, -44, 30, -2, -26, 43, -42, 23, 6, + -32, 45, -39, 16, 13, -37, 45, -34, 9, 20, -41, 44, -29, 1, 27, -44, + 42, -22, -7, 33, -45, 38, -15, -14, 38, -45, 34, -8, -21, 41, -44, 28, + }, { + 32, -37, 15, 12, -35, 45, -39, 18, 9, -33, 45, -40, 21, 6, -30, 44, + -42, 24, 2, -28, 43, -43, 27, -1, -25, 42, -44, 30, -4, -22, 41, -45, + 32, -8, -19, 39, -45, 34, -11, -16, 38, -45, 36, -14, -13, 36, -45, 38, + -17, -10, 34, -45, 40, -20, -7, 31, -44, 41, -23, -3, 29, -44, 43, -26, + }, { + 32, -38, 19, 6, -29, 43, -44, 31, -9, -16, 36, -45, 40, -22, -2, 26, + -42, 45, -34, 12, 13, -34, 45, -41, 25, -1, -23, 40, -45, 36, -15, -10, + 32, -44, 43, -28, 4, 20, -39, 45, -38, 18, 7, -30, 43, -44, 30, -8, + -17, 37, -45, 39, -21, -3, 27, -42, 44, -33, 11, 14, -35, 45, -41, 24, + }, { + 32, -39, 23, -1, -21, 38, -45, 40, -25, 3, 19, -37, 45, -41, 27, -6, + -17, 36, -45, 42, -29, 8, 15, -34, 44, -43, 30, -10, -13, 33, -44, 44, + -32, 12, 11, -31, 43, -44, 34, -14, -9, 30, -43, 45, -35, 16, 7, -28, + 42, -45, 36, -18, -4, 26, -41, 45, -38, 20, 2, -24, 40, -45, 39, -22, + }, { + 32, -40, 27, -8, -13, 31, -43, 45, -38, 22, -2, -18, 35, -44, 44, -34, + 17, 3, -23, 38, -45, 42, -30, 12, 9, -28, 41, -45, 40, -26, 7, 14, + -32, 43, -45, 37, -21, 1, 19, -36, 44, -44, 34, -16, -4, 24, -39, 45, + -42, 30, -11, -10, 29, -41, 45, -39, 25, -6, -15, 33, -43, 45, -36, 20, + }, { + 32, -41, 30, -14, -4, 22, -36, 44, -44, 37, -23, 6, 13, -30, 41, -45, + 42, -31, 15, 3, -21, 36, -44, 45, -38, 24, -7, -12, 29, -40, 45, -42, + 32, -16, -2, 20, -35, 44, -45, 38, -25, 8, 11, -28, 40, -45, 43, -33, + 17, 1, -19, 34, -43, 45, -39, 26, -9, -10, 27, -39, 45, -43, 34, -18, + }, { + 32, -42, 34, -20, 4, 12, -27, 38, -44, 45, -39, 28, -13, -3, 19, -33, + 42, -45, 43, -34, 21, -6, -11, 26, -38, 44, -45, 39, -29, 14, 2, -18, + 32, -41, 45, -43, 35, -22, 7, 10, -25, 37, -44, 45, -40, 30, -15, -1, + 17, -31, 41, -45, 43, -36, 23, -8, -9, 24, -36, 44, -45, 40, -30, 16, + }, { + 32, -43, 36, -26, 13, 1, -15, 28, -38, 44, -45, 42, -35, 24, -11, -3, + 17, -30, 39, -44, 45, -41, 34, -22, 9, 6, -19, 31, -40, 45, -45, 40, + -32, 20, -7, -8, 21, -33, 41, -45, 44, -39, 30, -18, 4, 10, -23, 34, + -42, 45, -44, 38, -29, 16, -2, -12, 25, -36, 43, -45, 43, -37, 27, -14, + }, { + 32, -44, 39, -31, 21, -10, -2, 14, -25, 34, -41, 45, -45, 42, -36, 28, + -17, 6, 7, -18, 29, -37, 43, -45, 44, -40, 34, -24, 13, -1, -11, 22, + -32, 39, -44, 45, -43, 38, -30, 20, -9, -3, 15, -26, 35, -41, 45, -45, + 42, -36, 27, -16, 4, 8, -19, 30, -38, 43, -45, 44, -40, 33, -23, 12, + }, { + 32, -44, 41, -36, 29, -20, 11, -1, -9, 18, -27, 34, -40, 44, -45, 45, + -42, 37, -30, 22, -13, 3, 7, -16, 25, -33, 39, -43, 45, -45, 43, -38, + 32, -24, 15, -6, -4, 14, -23, 31, -38, 42, -45, 45, -43, 39, -34, 26, + -17, 8, 2, -12, 21, -30, 36, -41, 44, -45, 44, -40, 35, -28, 19, -10, + }, { + 32, -45, 43, -39, 35, -30, 23, -16, 9, -1, -7, 14, -21, 28, -34, 38, + -42, 44, -45, 45, -43, 40, -36, 31, -25, 18, -11, 3, 4, -12, 19, -26, + 32, -37, 41, -44, 45, -45, 44, -41, 38, -33, 27, -20, 13, -6, -2, 10, + -17, 24, -30, 36, -40, 43, -45, 45, -44, 42, -39, 34, -29, 22, -15, 8, + }, { + 32, -45, 44, -42, 40, -37, 34, -30, 25, -20, 15, -10, 4, 1, -7, 12, + -17, 22, -27, 31, -35, 38, -41, 43, -44, 45, -45, 45, -43, 41, -39, 36, + -32, 28, -23, 18, -13, 8, -2, -3, 9, -14, 19, -24, 29, -33, 36, -39, + 42, -44, 45, -45, 45, -44, 43, -40, 38, -34, 30, -26, 21, -16, 11, -6, + }, { + 32, -45, 45, -44, 43, -42, 41, -39, 38, -36, 34, -31, 29, -26, 23, -20, + 17, -14, 11, -8, 4, -1, -2, 6, -9, 12, -15, 18, -21, 24, -27, 30, + -32, 34, -36, 38, -40, 41, -43, 44, -44, 45, -45, 45, -45, 45, -44, 43, + -42, 40, -39, 37, -35, 33, -30, 28, -25, 22, -19, 16, -13, 10, -7, 3, + }, { + 32, -45, 45, -45, 45, -45, 45, -45, 44, -44, 44, -44, 43, -43, 43, -42, + 42, -41, 41, -40, 40, -39, 39, -38, 38, -37, 36, -36, 35, -34, 34, -33, + 32, -31, 30, -30, 29, -28, 27, -26, 25, -24, 23, -22, 21, -20, 19, -18, + 17, -16, 15, -14, 13, -12, 11, -10, 9, -8, 7, -6, 4, -3, 2, -1, + } +}; diff --git a/libavcodec/h274.h b/libavcodec/h274.h new file mode 100644 index 0000000000..807b3a016a --- /dev/null +++ b/libavcodec/h274.h @@ -0,0 +1,52 @@ +/* + * H.274 film grain synthesis + * Copyright (c) 2021 Niklas Haas + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * H.274 film grain synthesis. + * @author Niklas Haas + */ + +#ifndef AVCODEC_H274_H +#define AVCODEC_H274_H + +#include + +// Must be initialized to {0} prior to first usage +typedef struct H274FilmGrainDatabase { + // Database of film grain patterns, lazily computed as-needed + int8_t db[13 /* h */][13 /* v */][64][64]; + uint16_t residency[13 /* h */]; // bit field of v + + // Temporary buffer for slice generation + int16_t slice_tmp[64][64]; +} H274FilmGrainDatabase; + +// Synthesizes film grain on top of `in` and stores the result to `out`. `out` +// must already have been allocated and set to the same size and format as +// `in`. +// +// Returns a negative error code on error, such as invalid params. +int ff_h274_apply_film_grain(AVFrame *out, const AVFrame *in, + H274FilmGrainDatabase *db, + const AVFilmGrainParams *params); + +#endif /* AVCODEC_H274_H */