vc2enc: replace quantization LUT with a smaller division LUT

This commit replaces the huge and impractical LUT which converted coeffs and a quantizer to bits to encode and instead uses a standard multiplication and a shift to replace the division and then codes the values using the regular golomb coding functions. Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>
2025-08-15 14:13:16 +02:00 · 2018-02-27 23:12:33 +00:00
parent 950170bd3b
commit ea6973a573
1 changed files with 31 additions and 87 deletions
--- a/libavcodec/vc2enc.c
+++ b/libavcodec/vc2enc.c
@@ -29,10 +29,6 @@
 #include "vc2enc_dwt.h"
 #include "diractab.h"
 /* Total range is -COEF_LUT_TAB to +COEFF_LUT_TAB, but total tab size is half
 * (COEF_LUT_TAB*DIRAC_MAX_QUANT_INDEX), as the sign is appended during encoding */
 #define COEF_LUT_TAB 2048
 /* The limited size resolution of each slice forces us to do this */
 #define SSIZE_ROUND(b) (FFALIGN((b), s->size_scaler) + 4 + s->prefix_bytes)
@@ -152,9 +148,8 @@ typedef struct VC2EncContext {
    uint8_t quant[MAX_DWT_LEVELS][4];
    int custom_quant_matrix;
-    /* Coefficient LUT */
+    /* Division LUT */
-    uint32_t *coef_lut_val;
+    uint32_t qmagic_lut[116][2];
    uint8_t  *coef_lut_len;
    int num_x; /* #slices horizontally */
    int num_y; /* #slices vertically */
@@ -229,37 +224,6 @@ static av_always_inline int count_vc2_ue_uint(uint32_t val)
    return ff_log2(topbit)*2 + 1;
 }
 static av_always_inline void get_vc2_ue_uint(int val, uint8_t *nbits,
                                             uint32_t *eval)
 {
    int i;
    int pbits = 0, bits = 0, topbit = 1, maxval = 1;
    if (!val++) {
        *nbits = 1;
        *eval = 1;
        return;
    }
    while (val > maxval) {
        topbit <<= 1;
        maxval <<= 1;
        maxval |=  1;
    }
    bits = ff_log2(topbit);
    for (i = 0; i < bits; i++) {
        topbit >>= 1;
        pbits <<= 2;
        if (val & topbit)
            pbits |= 0x1;
    }
    *nbits = bits*2 + 1;
    *eval = (pbits << 1) | 1;
 }
 /* VC-2 10.4 - parse_info() */
 static void encode_parse_info(VC2EncContext *s, enum DiracParseCodes pcode)
 {
@@ -557,7 +521,7 @@ static void encode_picture_start(VC2EncContext *s)
    encode_wavelet_transform(s);
 }
-#define QUANT(c, qf) (((c) << 2)/(qf))
+#define QUANT(c, mul, add, shift) (((mul) * (c) + (add)) >> (shift))
 /* VC-2 13.5.5.2 - slice_band() */
 static void encode_subband(VC2EncContext *s, PutBitContext *pb, int sx, int sy,
@@ -570,24 +534,17 @@ static void encode_subband(VC2EncContext *s, PutBitContext *pb, int sx, int sy,
    const int top    = b->height * (sy+0) / s->num_y;
    const int bottom = b->height * (sy+1) / s->num_y;
    const int qfactor = ff_dirac_qscale_tab[quant];
    const uint8_t  *len_lut = &s->coef_lut_len[quant*COEF_LUT_TAB];
    const uint32_t *val_lut = &s->coef_lut_val[quant*COEF_LUT_TAB];
    dwtcoef *coeff = b->buf + top * b->stride;
    const uint64_t q_m = ((uint64_t)(s->qmagic_lut[quant][0])) << 2;
    const uint64_t q_a = s->qmagic_lut[quant][1];
    const int q_s = av_log2(ff_dirac_qscale_tab[quant]) + 32;
    for (y = top; y < bottom; y++) {
        for (x = left; x < right; x++) {
-            const int neg = coeff[x] < 0;
+            uint32_t c_abs = QUANT(FFABS(coeff[x]), q_m, q_a, q_s);
-            uint32_t c_abs = FFABS(coeff[x]);
+            put_vc2_ue_uint(pb, c_abs);
-            if (c_abs < COEF_LUT_TAB) {
+            if (c_abs)
-                put_bits(pb, len_lut[c_abs], val_lut[c_abs] | neg);
+                put_bits(pb, 1, coeff[x] < 0);
            } else {
                c_abs = QUANT(c_abs, qfactor);
                put_vc2_ue_uint(pb, c_abs);
                if (c_abs)
                    put_bits(pb, 1, neg);
            }
        }
        coeff += b->stride;
    }
@@ -619,8 +576,9 @@ static int count_hq_slice(SliceArgs *slice, int quant_idx)
                SubBand *b = &s->plane[p].band[level][orientation];
                const int q_idx = quants[level][orientation];
-                const uint8_t *len_lut = &s->coef_lut_len[q_idx*COEF_LUT_TAB];
+                const uint64_t q_m = ((uint64_t)s->qmagic_lut[q_idx][0]) << 2;
-                const int qfactor = ff_dirac_qscale_tab[q_idx];
+                const uint64_t q_a = s->qmagic_lut[q_idx][1];
                const int q_s = av_log2(ff_dirac_qscale_tab[q_idx]) + 32;
                const int left   = b->width  * slice->x    / s->num_x;
                const int right  = b->width  *(slice->x+1) / s->num_x;
@@ -631,14 +589,9 @@ static int count_hq_slice(SliceArgs *slice, int quant_idx)
                for (y = top; y < bottom; y++) {
                    for (x = left; x < right; x++) {
-                        uint32_t c_abs = FFABS(buf[x]);
+                        uint32_t c_abs = QUANT(FFABS(buf[x]), q_m, q_a, q_s);
-                        if (c_abs < COEF_LUT_TAB) {
+                        bits += count_vc2_ue_uint(c_abs);
-                            bits += len_lut[c_abs];
+                        bits += !!c_abs;
                        } else {
                            c_abs = QUANT(c_abs, qfactor);
                            bits += count_vc2_ue_uint(c_abs);
                            bits += !!c_abs;
                        }
                    }
                    buf += b->stride;
                }
@@ -1059,8 +1012,6 @@ static av_cold int vc2_encode_end(AVCodecContext *avctx)
    }
    av_freep(&s->slice_args);
    av_freep(&s->coef_lut_len);
    av_freep(&s->coef_lut_val);
    return 0;
 }
@@ -1069,7 +1020,7 @@ static av_cold int vc2_encode_init(AVCodecContext *avctx)
 {
    Plane *p;
    SubBand *b;
-    int i, j, level, o, shift, ret;
+    int i, level, o, shift, ret;
    const AVPixFmtDescriptor *fmt = av_pix_fmt_desc_get(avctx->pix_fmt);
    const int depth = fmt->comp[0].depth;
    VC2EncContext *s = avctx->priv_data;
@@ -1211,27 +1162,20 @@ static av_cold int vc2_encode_init(AVCodecContext *avctx)
    if (!s->slice_args)
        goto alloc_fail;
-    /* Lookup tables */
+    for (i = 0; i < 116; i++) {
-    s->coef_lut_len = av_malloc(COEF_LUT_TAB*(s->q_ceil+1)*sizeof(*s->coef_lut_len));
+        const uint32_t qf = ff_dirac_qscale_tab[i];
-    if (!s->coef_lut_len)
+        const int m = av_log2(qf);
-        goto alloc_fail;
+        const uint32_t t = (1UL << (m + 32)) / qf;
-
+        const uint32_t r = (t*qf + qf) & ((1UL << 32) - 1);
-    s->coef_lut_val = av_malloc(COEF_LUT_TAB*(s->q_ceil+1)*sizeof(*s->coef_lut_val));
+        if (!(qf & (qf - 1))) {
-    if (!s->coef_lut_val)
+            s->qmagic_lut[i][0] = 0xFFFFFFFF;
-        goto alloc_fail;
+            s->qmagic_lut[i][1] = 0xFFFFFFFF;
-
+        } else if (r <= 1UL << m) {
-    for (i = 0; i < s->q_ceil; i++) {
+            s->qmagic_lut[i][0] = t + 1;
-        uint8_t  *len_lut = &s->coef_lut_len[i*COEF_LUT_TAB];
+            s->qmagic_lut[i][1] = 0;
-        uint32_t *val_lut = &s->coef_lut_val[i*COEF_LUT_TAB];
+        } else {
-        for (j = 0; j < COEF_LUT_TAB; j++) {
+            s->qmagic_lut[i][0] = t;
-            get_vc2_ue_uint(QUANT(j, ff_dirac_qscale_tab[i]),
+            s->qmagic_lut[i][1] = t;
                            &len_lut[j], &val_lut[j]);
            if (len_lut[j] != 1) {
                len_lut[j] += 1;
                val_lut[j] <<= 1;
            } else {
                val_lut[j] = 1;
            }
        }
    }