dsputil: create 16/32-bit dctcoef versions of some functions

High bitdepth H.264 needs 32-bit transform coefficients, whereas dnxhd does not. This creates a conflict with the templated functions operating on DCTELEM data. This patch adds a field allowing the caller to choose the element size in dsputil_init() and adds the required functions. Signed-off-by: Mans Rullgard <mans@mansr.com>
2025-08-10 06:10:52 +02:00 · 2011-07-21 12:39:41 +01:00
parent 0a72533e98
commit 5cc2600964
4 changed files with 80 additions and 56 deletions
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -3159,13 +3159,13 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
    c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)


-#define BIT_DEPTH_FUNCS(depth)\
+#define BIT_DEPTH_FUNCS(depth, dct)\
    c->draw_edges                    = FUNCC(draw_edges            , depth);\
    c->emulated_edge_mc              = FUNC (ff_emulated_edge_mc   , depth);\
-    c->clear_block                   = FUNCC(clear_block           , depth);\
-    c->clear_blocks                  = FUNCC(clear_blocks          , depth);\
-    c->add_pixels8                   = FUNCC(add_pixels8           , depth);\
-    c->add_pixels4                   = FUNCC(add_pixels4           , depth);\
+    c->clear_block                   = FUNCC(clear_block  ## dct   , depth);\
+    c->clear_blocks                  = FUNCC(clear_blocks ## dct   , depth);\
+    c->add_pixels8                   = FUNCC(add_pixels8  ## dct   , depth);\
+    c->add_pixels4                   = FUNCC(add_pixels4  ## dct   , depth);\
    c->put_no_rnd_pixels_l2[0]       = FUNCC(put_no_rnd_pixels16_l2, depth);\
    c->put_no_rnd_pixels_l2[1]       = FUNCC(put_no_rnd_pixels8_l2 , depth);\
 \
@@ -3199,15 +3199,23 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)

    switch (avctx->bits_per_raw_sample) {
    case 9:
-        BIT_DEPTH_FUNCS(9);
+        if (c->dct_bits == 32) {
+            BIT_DEPTH_FUNCS(9, _32);
+        } else {
+            BIT_DEPTH_FUNCS(9, _16);
+        }
        break;
    case 10:
-        BIT_DEPTH_FUNCS(10);
+        if (c->dct_bits == 32) {
+            BIT_DEPTH_FUNCS(10, _32);
+        } else {
+            BIT_DEPTH_FUNCS(10, _16);
+        }
        break;
    default:
        av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
    case 8:
-        BIT_DEPTH_FUNCS(8);
+        BIT_DEPTH_FUNCS(8, _16);
        break;
    }

--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -219,6 +219,11 @@ void ff_put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int lin
 * DSPContext.
 */
 typedef struct DSPContext {
+    /**
+     * Size of DCT coefficients.
+     */
+    int dct_bits;
+
    /* pixel ops : interface with DCT */
    void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
    void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
--- a/libavcodec/dsputil_template.c
+++ b/libavcodec/dsputil_template.c
@@ -192,43 +192,66 @@ void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src, int linesize, i
    }
 }

-static void FUNCC(add_pixels8)(uint8_t *restrict _pixels, DCTELEM *_block, int line_size)
-{
-    int i;
-    pixel *restrict pixels = (pixel *restrict)_pixels;
-    dctcoef *block = (dctcoef*)_block;
-    line_size /= sizeof(pixel);
-
-    for(i=0;i<8;i++) {
-        pixels[0] += block[0];
-        pixels[1] += block[1];
-        pixels[2] += block[2];
-        pixels[3] += block[3];
-        pixels[4] += block[4];
-        pixels[5] += block[5];
-        pixels[6] += block[6];
-        pixels[7] += block[7];
-        pixels += line_size;
-        block += 8;
-    }
+#define DCTELEM_FUNCS(dctcoef, suffix)                                  \
+static void FUNCC(add_pixels8 ## suffix)(uint8_t *restrict _pixels,     \
+                                         DCTELEM *_block,               \
+                                         int line_size)                 \
+{                                                                       \
+    int i;                                                              \
+    pixel *restrict pixels = (pixel *restrict)_pixels;                  \
+    dctcoef *block = (dctcoef*)_block;                                  \
+    line_size /= sizeof(pixel);                                         \
+                                                                        \
+    for(i=0;i<8;i++) {                                                  \
+        pixels[0] += block[0];                                          \
+        pixels[1] += block[1];                                          \
+        pixels[2] += block[2];                                          \
+        pixels[3] += block[3];                                          \
+        pixels[4] += block[4];                                          \
+        pixels[5] += block[5];                                          \
+        pixels[6] += block[6];                                          \
+        pixels[7] += block[7];                                          \
+        pixels += line_size;                                            \
+        block += 8;                                                     \
+    }                                                                   \
+}                                                                       \
+                                                                        \
+static void FUNCC(add_pixels4 ## suffix)(uint8_t *restrict _pixels,     \
+                                         DCTELEM *_block,               \
+                                         int line_size)                 \
+{                                                                       \
+    int i;                                                              \
+    pixel *restrict pixels = (pixel *restrict)_pixels;                  \
+    dctcoef *block = (dctcoef*)_block;                                  \
+    line_size /= sizeof(pixel);                                         \
+                                                                        \
+    for(i=0;i<4;i++) {                                                  \
+        pixels[0] += block[0];                                          \
+        pixels[1] += block[1];                                          \
+        pixels[2] += block[2];                                          \
+        pixels[3] += block[3];                                          \
+        pixels += line_size;                                            \
+        block += 4;                                                     \
+    }                                                                   \
+}                                                                       \
+                                                                        \
+static void FUNCC(clear_block ## suffix)(DCTELEM *block)                \
+{                                                                       \
+    memset(block, 0, sizeof(dctcoef)*64);                               \
+}                                                                       \
+                                                                        \
+/**                                                                     \
+ * memset(blocks, 0, sizeof(DCTELEM)*6*64)                              \
+ */                                                                     \
+static void FUNCC(clear_blocks ## suffix)(DCTELEM *blocks)              \
+{                                                                       \
+    memset(blocks, 0, sizeof(dctcoef)*6*64);                            \
 }

-static void FUNCC(add_pixels4)(uint8_t *restrict _pixels, DCTELEM *_block, int line_size)
-{
-    int i;
-    pixel *restrict pixels = (pixel *restrict)_pixels;
-    dctcoef *block = (dctcoef*)_block;
-    line_size /= sizeof(pixel);
-
-    for(i=0;i<4;i++) {
-        pixels[0] += block[0];
-        pixels[1] += block[1];
-        pixels[2] += block[2];
-        pixels[3] += block[3];
-        pixels += line_size;
-        block += 4;
-    }
-}
+DCTELEM_FUNCS(DCTELEM, _16)
+#if BIT_DEPTH > 8
+DCTELEM_FUNCS(dctcoef, _32)
+#endif

 #define PIXOP2(OPNAME, OP) \
 static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
@@ -1231,16 +1254,3 @@ void FUNCC(ff_put_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
 void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
    FUNCC(avg_pixels16)(dst, src, stride, 16);
 }
-
-static void FUNCC(clear_block)(DCTELEM *block)
-{
-    memset(block, 0, sizeof(dctcoef)*64);
-}
-
-/**
- * memset(blocks, 0, sizeof(DCTELEM)*6*64)
- */
-static void FUNCC(clear_blocks)(DCTELEM *blocks)
-{
-    memset(blocks, 0, sizeof(dctcoef)*6*64);
-}
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -3702,6 +3702,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){

                    ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);
                    ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);
+                    s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16;
                    dsputil_init(&s->dsp, s->avctx);
                } else {
                    av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);