Merge commit 'ad9d3384de08f02967d6eb11196ee8c78e8b2dba'

* commit 'ad9d3384de08f02967d6eb11196ee8c78e8b2dba': svq3: move the dequant buffer to SVQ3Context Merged-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
2025-08-10 06:10:52 +02:00 · 2016-05-03 10:15:22 +01:00
parent 80d14de52d ad9d3384de
commit 297e2768da
4 changed files with 69 additions and 56 deletions
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -43,23 +43,6 @@
 #include "rectangle.h"
 #include "thread.h"

-
-static const uint8_t rem6[QP_MAX_NUM + 1] = {
-    0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
-    3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
-    0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
-    3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
-    0, 1, 2, 3,
-};
-
-static const uint8_t div6[QP_MAX_NUM + 1] = {
-    0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3,  3,  3,
-    3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6,  6,  6,
-    7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10,
-   10,10,10,11,11,11,11,11,11,12,12,12,12,12,12,13,13,13, 13, 13, 13,
-   14,14,14,14,
-};
-
 static const uint8_t field_scan[16+1] = {
    0 + 0 * 4, 0 + 1 * 4, 1 + 0 * 4, 0 + 2 * 4,
    0 + 3 * 4, 1 + 1 * 4, 1 + 2 * 4, 1 + 3 * 4,
@@ -125,29 +108,6 @@ static const uint8_t zigzag_scan8x8_cavlc[64+1] = {
    5 + 5 * 8, 6 + 5 * 8, 6 + 6 * 8, 7 + 7 * 8,
 };

-static const uint8_t dequant4_coeff_init[6][3] = {
-    { 10, 13, 16 },
-    { 11, 14, 18 },
-    { 13, 16, 20 },
-    { 14, 18, 23 },
-    { 16, 20, 25 },
-    { 18, 23, 29 },
-};
-
-static const uint8_t dequant8_coeff_init_scan[16] = {
-    0, 3, 4, 3, 3, 1, 5, 1, 4, 5, 2, 5, 3, 1, 5, 1
-};
-
-static const uint8_t dequant8_coeff_init[6][6] = {
-    { 20, 18, 32, 19, 25, 24 },
-    { 22, 19, 35, 21, 28, 26 },
-    { 26, 23, 42, 24, 33, 31 },
-    { 28, 25, 45, 26, 35, 33 },
-    { 32, 28, 51, 30, 40, 38 },
-    { 36, 32, 58, 34, 46, 43 },
-};
-
-
 static void release_unused_pictures(H264Context *h, int remove_current)
 {
    int i;
@@ -328,11 +288,11 @@ static void init_dequant8_coeff_table(H264Context *h)
            continue;

        for (q = 0; q < max_qp + 1; q++) {
-            int shift = div6[q];
-            int idx   = rem6[q];
+            int shift = ff_h264_quant_div6[q];
+            int idx   = ff_h264_quant_rem6[q];
            for (x = 0; x < 64; x++)
                h->dequant8_coeff[i][q][(x >> 3) | ((x & 7) << 3)] =
-                    ((uint32_t)dequant8_coeff_init[idx][dequant8_coeff_init_scan[((x >> 1) & 12) | (x & 3)]] *
+                    ((uint32_t)ff_h264_dequant8_coeff_init[idx][ff_h264_dequant8_coeff_init_scan[((x >> 1) & 12) | (x & 3)]] *
                     h->pps.scaling_matrix8[i][x]) << shift;
        }
    }
@@ -354,11 +314,11 @@ static void init_dequant4_coeff_table(H264Context *h)
            continue;

        for (q = 0; q < max_qp + 1; q++) {
-            int shift = div6[q] + 2;
-            int idx   = rem6[q];
+            int shift = ff_h264_quant_div6[q] + 2;
+            int idx   = ff_h264_quant_rem6[q];
            for (x = 0; x < 16; x++)
                h->dequant4_coeff[i][q][(x >> 2) | ((x << 2) & 0xF)] =
-                    ((uint32_t)dequant4_coeff_init[idx][(x & 1) + ((x >> 2) & 1)] *
+                    ((uint32_t)ff_h264_dequant4_coeff_init[idx][(x & 1) + ((x >> 2) & 1)] *
                     h->pps.scaling_matrix4[i][x]) << shift;
        }
    }
--- a/libavcodec/h264data.c
+++ b/libavcodec/h264data.c
@@ -147,3 +147,41 @@ const PMbInfo ff_h264_b_sub_mb_type_info[13] = {
    { MB_TYPE_8x8   | MB_TYPE_P0L1 | MB_TYPE_P1L1,                               4, },
    { MB_TYPE_8x8   | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 4, },
 };
+
+const uint8_t ff_h264_dequant4_coeff_init[6][3] = {
+    { 10, 13, 16 },
+    { 11, 14, 18 },
+    { 13, 16, 20 },
+    { 14, 18, 23 },
+    { 16, 20, 25 },
+    { 18, 23, 29 },
+};
+
+const uint8_t ff_h264_dequant8_coeff_init_scan[16] = {
+    0, 3, 4, 3, 3, 1, 5, 1, 4, 5, 2, 5, 3, 1, 5, 1
+};
+
+const uint8_t ff_h264_dequant8_coeff_init[6][6] = {
+    { 20, 18, 32, 19, 25, 24 },
+    { 22, 19, 35, 21, 28, 26 },
+    { 26, 23, 42, 24, 33, 31 },
+    { 28, 25, 45, 26, 35, 33 },
+    { 32, 28, 51, 30, 40, 38 },
+    { 36, 32, 58, 34, 46, 43 },
+};
+
+const uint8_t ff_h264_quant_rem6[QP_MAX_NUM + 1] = {
+    0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
+    3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
+    0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
+    3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
+    0, 1, 2, 3,
+};
+
+const uint8_t ff_h264_quant_div6[QP_MAX_NUM + 1] = {
+    0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3,  3,  3,
+    3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6,  6,  6,
+    7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10,
+   10,10,10,11,11,11,11,11,11,12,12,12,12,12,12,13,13,13, 13, 13, 13,
+   14,14,14,14,
+};
--- a/libavcodec/h264data.h
+++ b/libavcodec/h264data.h
@@ -67,4 +67,11 @@ static const AVRational ff_h264_pixel_aspect[17] = {
    {   3,  2 },
    {   2,  1 },
 };
+
+extern const uint8_t ff_h264_dequant4_coeff_init[6][3];
+extern const uint8_t ff_h264_dequant8_coeff_init_scan[16];
+extern const uint8_t ff_h264_dequant8_coeff_init[6][6];
+extern const uint8_t ff_h264_quant_rem6[QP_MAX_NUM + 1];
+extern const uint8_t ff_h264_quant_div6[QP_MAX_NUM + 1];
+
 #endif /* AVCODEC_H264DATA_H */
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -113,6 +113,7 @@ typedef struct SVQ3Context {

    DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5 * 8][2];
    DECLARE_ALIGNED(8,  int8_t, ref_cache)[2][5 * 8];
+    uint32_t dequant4_coeff[QP_MAX_NUM + 1][16];
 } SVQ3Context;

 #define FULLPEL_MODE  1
@@ -663,8 +664,6 @@ static void hl_decode_mb(SVQ3Context *s, const H264Context *h, H264SliceContext
    s->vdsp.prefetch(dest_y  + (s->mb_x & 3) * 4 * sl->linesize   + 64, sl->linesize,      4);
    s->vdsp.prefetch(dest_cb + (s->mb_x & 7)     * sl->uvlinesize + 64, dest_cr - dest_cb, 2);

-    h->list_counts[mb_xy] = sl->list_count;
-
    linesize   = sl->mb_linesize   = sl->linesize;
    uvlinesize = sl->mb_uvlinesize = sl->uvlinesize;

@@ -680,9 +679,9 @@ static void hl_decode_mb(SVQ3Context *s, const H264Context *h, H264SliceContext
    if (sl->cbp & 0x30) {
        uint8_t *dest[2] = { dest_cb, dest_cr };
        s->h264dsp.h264_chroma_dc_dequant_idct(sl->mb + 16 * 16 * 1,
-                                               h->dequant4_coeff[IS_INTRA(mb_type) ? 1 : 4][sl->chroma_qp[0]][0]);
+                                               s->dequant4_coeff[sl->chroma_qp[0]][0]);
        s->h264dsp.h264_chroma_dc_dequant_idct(sl->mb + 16 * 16 * 2,
-                                               h->dequant4_coeff[IS_INTRA(mb_type) ? 2 : 5][sl->chroma_qp[1]][0]);
+                                               s->dequant4_coeff[sl->chroma_qp[1]][0]);
        for (j = 1; j < 3; j++) {
            for (i = j * 16; i < j * 16 + 4; i++)
                if (sl->non_zero_count_cache[scan8[i]] || sl->mb[i * 16]) {
@@ -1093,6 +1092,20 @@ static int svq3_decode_slice_header(AVCodecContext *avctx)
    return 0;
 }

+static void init_dequant4_coeff_table(SVQ3Context *s)
+{
+    int q, x;
+    const int max_qp = 51;
+
+    for (q = 0; q < max_qp + 1; q++) {
+        int shift = ff_h264_quant_div6[q] + 2;
+        int idx   = ff_h264_quant_rem6[q];
+        for (x = 0; x < 16; x++)
+            s->dequant4_coeff[q][(x >> 2) | ((x << 2) & 0xF)] =
+                ((uint32_t)ff_h264_dequant4_coeff_init[idx][(x & 1) + ((x >> 2) & 1)] * 16) << shift;
+    }
+}
+
 static av_cold int svq3_decode_init(AVCodecContext *avctx)
 {
    SVQ3Context *s = avctx->priv_data;
@@ -1132,8 +1145,6 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx)
    ff_h264_pred_init(&s->hpc, AV_CODEC_ID_SVQ3, 8, 1);
    ff_videodsp_init(&s->vdsp, 8);

-    memset(h->pps.scaling_matrix4, 16, 6 * 16 * sizeof(uint8_t));
-    memset(h->pps.scaling_matrix8, 16, 2 * 64 * sizeof(uint8_t));

    avctx->bits_per_raw_sample = 8;
    h->sps.bit_depth_luma = 8;
@@ -1322,10 +1333,7 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx)
            s->mb2br_xy[mb_xy] = 8 * (mb_xy % (2 * h->mb_stride));
        }

-    if ((ret = ff_h264_alloc_tables(h)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "svq3 memory allocation failed\n");
-        goto fail;
-    }
+    init_dequant4_coeff_table(s);

    return 0;
 fail: