hevc/pps: optimized size of min_tb_addr_zs

reduce computation too (cherry picked from commit 39c4d45c7788081c45c7fae51b7c5d0bcbaece9d) Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
2025-07-16 22:42:38 +02:00 · 2014-06-24 08:27:16 +02:00
parent f7f1f4c7ce
commit ba70563d55
4 changed files with 36 additions and 24 deletions
--- a/libavcodec/hevc.h
+++ b/libavcodec/hevc.h
@ -463,6 +463,7 @@ typedef struct HEVCSPS {
    int min_tb_height;
    int min_pu_width;
    int min_pu_height;
+    int tb_mask;

    int hshift[3];
    int vshift[3];
@ -532,6 +533,7 @@ typedef struct HEVCPPS {
    int *tile_id;           ///< TileId
    int *tile_pos_rs;       ///< TilePosRS
    int *min_tb_addr_zs;    ///< MinTbAddrZS
+    int *min_tb_addr_zs_tab;///< MinTbAddrZS
 } HEVCPPS;

 typedef struct SliceHeader {
--- a/libavcodec/hevc_mvs.c
+++ b/libavcodec/hevc_mvs.c
@ -65,20 +65,27 @@ static int z_scan_block_avail(HEVCContext *s, int xCurr, int yCurr,
                              int xN, int yN)
 {
 #define MIN_TB_ADDR_ZS(x, y)                                            \
-    s->pps->min_tb_addr_zs[(y) * s->sps->min_tb_width + (x)]
-    int Curr = MIN_TB_ADDR_ZS(xCurr >> s->sps->log2_min_tb_size,
-                              yCurr >> s->sps->log2_min_tb_size);
-    int N;
+    s->pps->min_tb_addr_zs[(y) * (s->sps->tb_mask+2) + (x)]
+
+    int xCurr_ctb = xCurr >> s->sps->log2_ctb_size;
+    int yCurr_ctb = yCurr >> s->sps->log2_ctb_size;
+    int xN_ctb    = xN    >> s->sps->log2_ctb_size;
+    int yN_ctb    = yN    >> s->sps->log2_ctb_size;

    if (xN < 0 || yN < 0 ||
        xN >= s->sps->width ||
        yN >= s->sps->height)
        return 0;

-    N = MIN_TB_ADDR_ZS(xN >> s->sps->log2_min_tb_size,
-                       yN >> s->sps->log2_min_tb_size);
-
-    return N <= Curr;
+    if( yN_ctb < yCurr_ctb || xN_ctb < xCurr_ctb )
+        return 1;
+    else {
+        int Curr = MIN_TB_ADDR_ZS((xCurr >> s->sps->log2_min_tb_size) & s->sps->tb_mask,
+                (yCurr >> s->sps->log2_min_tb_size) & s->sps->tb_mask);
+        int N    = MIN_TB_ADDR_ZS((xN >> s->sps->log2_min_tb_size) & s->sps->tb_mask,
+                (yN >> s->sps->log2_min_tb_size) & s->sps->tb_mask);
+        return N <= Curr;
+    }
 }

 static int same_prediction_block(HEVCLocalContext *lc, int log2_cb_size,
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@ -907,6 +907,7 @@ int ff_hevc_decode_nal_sps(HEVCContext *s)
    sps->min_tb_height = sps->height >> sps->log2_min_tb_size;
    sps->min_pu_width  = sps->width  >> sps->log2_min_pu_size;
    sps->min_pu_height = sps->height >> sps->log2_min_pu_size;
+    sps->tb_mask       = (1 << (sps->log2_ctb_size - sps->log2_min_tb_size)) - 1;

    sps->qp_bd_offset = 6 * (sps->bit_depth - 8);

@ -981,7 +982,7 @@ static void hevc_pps_free(void *opaque, uint8_t *data)
    av_freep(&pps->ctb_addr_ts_to_rs);
    av_freep(&pps->tile_pos_rs);
    av_freep(&pps->tile_id);
-    av_freep(&pps->min_tb_addr_zs);
+    av_freep(&pps->min_tb_addr_zs_tab);

    av_freep(&pps);
 }
@ -990,7 +991,7 @@ int ff_hevc_decode_nal_pps(HEVCContext *s)
 {
    GetBitContext *gb = &s->HEVClc->gb;
    HEVCSPS      *sps = NULL;
-    int pic_area_in_ctbs, pic_area_in_min_cbs, pic_area_in_min_tbs;
+    int pic_area_in_ctbs;
    int log2_diff_ctb_min_tb_size;
    int i, j, x, y, ctb_addr_rs, tile_id;
    int ret = 0;
@ -1229,15 +1230,13 @@ int ff_hevc_decode_nal_pps(HEVCContext *s)
     * 6.5
     */
    pic_area_in_ctbs     = sps->ctb_width    * sps->ctb_height;
-    pic_area_in_min_cbs  = sps->min_cb_width * sps->min_cb_height;
-    pic_area_in_min_tbs  = sps->min_tb_width * sps->min_tb_height;

    pps->ctb_addr_rs_to_ts = av_malloc_array(pic_area_in_ctbs,    sizeof(*pps->ctb_addr_rs_to_ts));
    pps->ctb_addr_ts_to_rs = av_malloc_array(pic_area_in_ctbs,    sizeof(*pps->ctb_addr_ts_to_rs));
    pps->tile_id           = av_malloc_array(pic_area_in_ctbs,    sizeof(*pps->tile_id));
-    pps->min_tb_addr_zs    = av_malloc_array(pic_area_in_min_tbs, sizeof(*pps->min_tb_addr_zs));
+    pps->min_tb_addr_zs_tab = av_malloc_array((sps->tb_mask+2) * (sps->tb_mask+2), sizeof(*pps->min_tb_addr_zs_tab));
    if (!pps->ctb_addr_rs_to_ts || !pps->ctb_addr_ts_to_rs ||
-        !pps->tile_id || !pps->min_tb_addr_zs) {
+        !pps->tile_id || !pps->min_tb_addr_zs_tab) {
        ret = AVERROR(ENOMEM);
        goto err;
    }
@ -1292,8 +1291,13 @@ int ff_hevc_decode_nal_pps(HEVCContext *s)
            pps->tile_pos_rs[j * pps->num_tile_columns + i] = pps->row_bd[j] * sps->ctb_width + pps->col_bd[i];

    log2_diff_ctb_min_tb_size = sps->log2_ctb_size - sps->log2_min_tb_size;
-    for (y = 0; y < sps->min_tb_height; y++) {
-        for (x = 0; x < sps->min_tb_width; x++) {
+    pps->min_tb_addr_zs = &pps->min_tb_addr_zs_tab[1*(sps->tb_mask+2)+1];
+    for (y = 0; y < sps->tb_mask+2; y++) {
+        pps->min_tb_addr_zs_tab[y*(sps->tb_mask+2)] = -1;
+        pps->min_tb_addr_zs_tab[y]    = -1;
+    }
+    for (y = 0; y < sps->tb_mask+1; y++) {
+        for (x = 0; x < sps->tb_mask+1; x++) {
            int tb_x        = x >> log2_diff_ctb_min_tb_size;
            int tb_y        = y >> log2_diff_ctb_min_tb_size;
            int ctb_addr_rs = sps->ctb_width * tb_y + tb_x;
@ -1303,7 +1307,7 @@ int ff_hevc_decode_nal_pps(HEVCContext *s)
                int m = 1 << i;
                val += (m & x ? m * m : 0) + (m & y ? 2 * m * m : 0);
            }
-            pps->min_tb_addr_zs[y * sps->min_tb_width + x] = val;
+            pps->min_tb_addr_zs[y * (sps->tb_mask+2) + x] = val;
        }
    }

--- a/libavcodec/hevcpred_template.c
+++ b/libavcodec/hevcpred_template.c
@ -39,8 +39,7 @@ static av_always_inline void FUNC(intra_pred)(HEVCContext *s, int x0, int y0,
 #define IS_INTRA(x, y) \
    (MVF_PU(x, y).pred_flag == PF_INTRA)
 #define MIN_TB_ADDR_ZS(x, y) \
-    s->pps->min_tb_addr_zs[(y) * s->sps->min_tb_width + (x)]
-
+    s->pps->min_tb_addr_zs[(y) * (s->sps->tb_mask+2) + (x)]
 #define EXTEND(ptr, val, len)         \
 do {                                  \
    pixel4 pix = PIXEL_SPLAT_X4(val); \
@ -82,8 +81,9 @@ do {                                  \
    int size_in_tbs_v  = size_in_luma_v >> s->sps->log2_min_tb_size;
    int x = x0 >> hshift;
    int y = y0 >> vshift;
-    int x_tb = x0 >> s->sps->log2_min_tb_size;
-    int y_tb = y0 >> s->sps->log2_min_tb_size;
+    int x_tb = (x0 >> s->sps->log2_min_tb_size) & s->sps->tb_mask;
+    int y_tb = (y0 >> s->sps->log2_min_tb_size) & s->sps->tb_mask;
+
    int cur_tb_addr = MIN_TB_ADDR_ZS(x_tb, y_tb);

    ptrdiff_t stride = s->frame->linesize[c_idx] / sizeof(pixel);
@ -103,12 +103,11 @@ do {                                  \
    pixel  *top           = top_array  + 1;
    pixel  *filtered_left = filtered_left_array + 1;
    pixel  *filtered_top  = filtered_top_array  + 1;
-
-    int cand_bottom_left = lc->na.cand_bottom_left && cur_tb_addr > MIN_TB_ADDR_ZS(x_tb - 1, y_tb + size_in_tbs_v);
+    int cand_bottom_left = lc->na.cand_bottom_left && cur_tb_addr > MIN_TB_ADDR_ZS( x_tb - 1, (y_tb + size_in_tbs_v) & s->sps->tb_mask);
    int cand_left        = lc->na.cand_left;
    int cand_up_left     = lc->na.cand_up_left;
    int cand_up          = lc->na.cand_up;
-    int cand_up_right    = lc->na.cand_up_right && cur_tb_addr > MIN_TB_ADDR_ZS(x_tb + size_in_tbs_h, y_tb - 1);
+    int cand_up_right    = lc->na.cand_up_right    && cur_tb_addr > MIN_TB_ADDR_ZS((x_tb + size_in_tbs_h) & s->sps->tb_mask, y_tb - 1);

    int bottom_left_size = (FFMIN(y0 + 2 * size_in_luma_v, s->sps->height) -
                           (y0 + size_in_luma_v)) >> vshift;