lavc/ffv1: move sample_buffer to the per-slice context

2025-10-30 23:18:11 +02:00 · 2024-07-07 19:46:20 +02:00
parent 54aa33f116
commit 91d3c1ac47
6 changed files with 62 additions and 53 deletions
--- a/libavcodec/ffv1.c
+++ b/libavcodec/ffv1.c
@@ -112,6 +112,8 @@ av_cold int ff_ffv1_init_slice_contexts(FFV1Context *f)
    if (!f->slices)
        return AVERROR(ENOMEM);

+    f->max_slice_count = max_slice_count;
+
    for (i = 0; i < max_slice_count;) {
        FFV1SliceContext *sc = &f->slices[i];
        int sx          = i % f->num_h_slices;
@@ -123,7 +125,7 @@ av_cold int ff_ffv1_init_slice_contexts(FFV1Context *f)
        FFV1Context *fs = av_mallocz(sizeof(*fs));

        if (!fs)
-            goto memfail;
+            return AVERROR(ENOMEM);

        f->slice_context[i++] = fs;
        memcpy(fs, f, sizeof(*fs));
@@ -134,19 +136,15 @@ av_cold int ff_ffv1_init_slice_contexts(FFV1Context *f)
        sc->slice_x      = sxs;
        sc->slice_y      = sys;

-        fs->sample_buffer = av_malloc_array((fs->width + 6), 3 * MAX_PLANES *
-                                      sizeof(*fs->sample_buffer));
-        fs->sample_buffer32 = av_malloc_array((fs->width + 6), 3 * MAX_PLANES *
-                                        sizeof(*fs->sample_buffer32));
-        if (!fs->sample_buffer || !fs->sample_buffer32)
-            goto memfail;
+        sc->sample_buffer = av_malloc_array((fs->width + 6), 3 * MAX_PLANES *
+                                            sizeof(*sc->sample_buffer));
+        sc->sample_buffer32 = av_malloc_array((fs->width + 6), 3 * MAX_PLANES *
+                                              sizeof(*sc->sample_buffer32));
+        if (!sc->sample_buffer || !sc->sample_buffer32)
+            return AVERROR(ENOMEM);
    }
-    f->max_slice_count = max_slice_count;
-    return 0;

-memfail:
-    f->max_slice_count = i;
-    return AVERROR(ENOMEM);
+    return 0;
 }

 int ff_ffv1_allocate_initial_states(FFV1Context *f)
@@ -199,14 +197,20 @@ av_cold int ff_ffv1_close(AVCodecContext *avctx)

    for (j = 0; j < s->max_slice_count; j++) {
        FFV1Context *fs = s->slice_context[j];
+        FFV1SliceContext *sc = &s->slices[j];
+
+        av_freep(&sc->sample_buffer);
+        av_freep(&sc->sample_buffer32);
+
+        if (!fs)
+            continue;
+
        for (i = 0; i < s->plane_count; i++) {
            PlaneContext *p = &fs->plane[i];

            av_freep(&p->state);
            av_freep(&p->vlc_state);
        }
-        av_freep(&fs->sample_buffer);
-        av_freep(&fs->sample_buffer32);
    }

    av_freep(&avctx->stats_out);
@@ -214,7 +218,8 @@ av_cold int ff_ffv1_close(AVCodecContext *avctx)
        av_freep(&s->initial_states[j]);
        for (i = 0; i < s->max_slice_count; i++) {
            FFV1Context *sf = s->slice_context[i];
-            av_freep(&sf->rc_stat2[j]);
+            if (sf)
+                av_freep(&sf->rc_stat2[j]);
        }
        av_freep(&s->rc_stat2[j]);
    }
--- a/libavcodec/ffv1.h
+++ b/libavcodec/ffv1.h
@@ -70,6 +70,9 @@ typedef struct PlaneContext {
 #define MAX_SLICES 1024

 typedef struct FFV1SliceContext {
+    int16_t *sample_buffer;
+    int32_t *sample_buffer32;
+
    int slice_width;
    int slice_height;
    int slice_x;
@@ -108,8 +111,6 @@ typedef struct FFV1Context {
    uint8_t (*initial_states[MAX_QUANT_TABLES])[32];
    int run_index;
    int colorspace;
-    int16_t *sample_buffer;
-    int32_t *sample_buffer32;

    int use32bit;

--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -117,18 +117,18 @@ static int is_input_end(FFV1Context *s)
 #define RENAME(name) name ## 32
 #include "ffv1dec_template.c"

-static int decode_plane(FFV1Context *s, uint8_t *src,
-                         int w, int h, int stride, int plane_index,
+static int decode_plane(FFV1Context *s, FFV1SliceContext *sc,
+                        uint8_t *src, int w, int h, int stride, int plane_index,
                         int pixel_stride)
 {
    int x, y;
    int16_t *sample[2];
-    sample[0] = s->sample_buffer + 3;
-    sample[1] = s->sample_buffer + w + 6 + 3;
+    sample[0] = sc->sample_buffer + 3;
+    sample[1] = sc->sample_buffer + w + 6 + 3;

    s->run_index = 0;

-    memset(s->sample_buffer, 0, 2 * (w + 6) * sizeof(*s->sample_buffer));
+    memset(sc->sample_buffer, 0, 2 * (w + 6) * sizeof(*sc->sample_buffer));

    for (y = 0; y < h; y++) {
        int16_t *temp = sample[0]; // FIXME: try a normal buffer
@@ -333,29 +333,29 @@ static int decode_slice(AVCodecContext *c, void *arg)
        const int chroma_height = AV_CEIL_RSHIFT(height, f->chroma_v_shift);
        const int cx            = x >> f->chroma_h_shift;
        const int cy            = y >> f->chroma_v_shift;
-        decode_plane(fs, p->data[0] + ps*x + y*p->linesize[0], width, height, p->linesize[0], 0, 1);
+        decode_plane(fs, sc, p->data[0] + ps*x + y*p->linesize[0], width, height, p->linesize[0], 0, 1);

        if (f->chroma_planes) {
-            decode_plane(fs, p->data[1] + ps*cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[1], 1, 1);
-            decode_plane(fs, p->data[2] + ps*cx+cy*p->linesize[2], chroma_width, chroma_height, p->linesize[2], 1, 1);
+            decode_plane(fs, sc, p->data[1] + ps*cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[1], 1, 1);
+            decode_plane(fs, sc, p->data[2] + ps*cx+cy*p->linesize[2], chroma_width, chroma_height, p->linesize[2], 1, 1);
        }
        if (fs->transparency)
-            decode_plane(fs, p->data[3] + ps*x + y*p->linesize[3], width, height, p->linesize[3], (f->version >= 4 && !f->chroma_planes) ? 1 : 2, 1);
+            decode_plane(fs, sc, p->data[3] + ps*x + y*p->linesize[3], width, height, p->linesize[3], (f->version >= 4 && !f->chroma_planes) ? 1 : 2, 1);
    } else if (f->colorspace == 0) {
-         decode_plane(fs, p->data[0] + ps*x + y*p->linesize[0]    , width, height, p->linesize[0], 0, 2);
-         decode_plane(fs, p->data[0] + ps*x + y*p->linesize[0] + 1, width, height, p->linesize[0], 1, 2);
+         decode_plane(fs, sc, p->data[0] + ps*x + y*p->linesize[0]    , width, height, p->linesize[0], 0, 2);
+         decode_plane(fs, sc, p->data[0] + ps*x + y*p->linesize[0] + 1, width, height, p->linesize[0], 1, 2);
    } else if (f->use32bit) {
        uint8_t *planes[4] = { p->data[0] + ps * x + y * p->linesize[0],
                               p->data[1] + ps * x + y * p->linesize[1],
                               p->data[2] + ps * x + y * p->linesize[2],
                               p->data[3] + ps * x + y * p->linesize[3] };
-        decode_rgb_frame32(fs, planes, width, height, p->linesize);
+        decode_rgb_frame32(fs, sc, planes, width, height, p->linesize);
    } else {
        uint8_t *planes[4] = { p->data[0] + ps * x + y * p->linesize[0],
                               p->data[1] + ps * x + y * p->linesize[1],
                               p->data[2] + ps * x + y * p->linesize[2],
                               p->data[3] + ps * x + y * p->linesize[3] };
-        decode_rgb_frame(fs, planes, width, height, p->linesize);
+        decode_rgb_frame(fs, sc, planes, width, height, p->linesize);
    }
    if (fs->ac != AC_GOLOMB_RICE && f->version > 2) {
        int v;
@@ -1084,7 +1084,6 @@ static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
        }
    }
    av_assert0(!fdst->plane[0].state);
-    av_assert0(!fdst->sample_buffer);

    av_assert1(fdst->max_slice_count == fsrc->max_slice_count);

--- a/libavcodec/ffv1dec_template.c
+++ b/libavcodec/ffv1dec_template.c
@@ -127,7 +127,8 @@ static av_always_inline int RENAME(decode_line)(FFV1Context *s, int w,
    return 0;
 }

-static int RENAME(decode_rgb_frame)(FFV1Context *s, uint8_t *src[4], int w, int h, int stride[4])
+static int RENAME(decode_rgb_frame)(FFV1Context *s, FFV1SliceContext *sc,
+                                    uint8_t *src[4], int w, int h, int stride[4])
 {
    int x, y, p;
    TYPE *sample[4][2];
@@ -137,13 +138,13 @@ static int RENAME(decode_rgb_frame)(FFV1Context *s, uint8_t *src[4], int w, int
    int transparency = s->transparency;

    for (x = 0; x < 4; x++) {
-        sample[x][0] = RENAME(s->sample_buffer) +  x * 2      * (w + 6) + 3;
-        sample[x][1] = RENAME(s->sample_buffer) + (x * 2 + 1) * (w + 6) + 3;
+        sample[x][0] = RENAME(sc->sample_buffer) +  x * 2      * (w + 6) + 3;
+        sample[x][1] = RENAME(sc->sample_buffer) + (x * 2 + 1) * (w + 6) + 3;
    }

    s->run_index = 0;

-    memset(RENAME(s->sample_buffer), 0, 8 * (w + 6) * sizeof(*RENAME(s->sample_buffer)));
+    memset(RENAME(sc->sample_buffer), 0, 8 * (w + 6) * sizeof(*RENAME(sc->sample_buffer)));

    for (y = 0; y < h; y++) {
        for (p = 0; p < 3 + transparency; p++) {
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -269,7 +269,8 @@ static inline void put_vlc_symbol(PutBitContext *pb, VlcState *const state,
 #define RENAME(name) name ## 32
 #include "ffv1enc_template.c"

-static int encode_plane(FFV1Context *s, const uint8_t *src, int w, int h,
+static int encode_plane(FFV1Context *s, FFV1SliceContext *sc,
+                        const uint8_t *src, int w, int h,
                         int stride, int plane_index, int pixel_stride)
 {
    int x, y, i, ret;
@@ -277,11 +278,11 @@ static int encode_plane(FFV1Context *s, const uint8_t *src, int w, int h,
    int16_t *sample[3];
    s->run_index = 0;

-    memset(s->sample_buffer, 0, ring_size * (w + 6) * sizeof(*s->sample_buffer));
+    memset(sc->sample_buffer, 0, ring_size * (w + 6) * sizeof(*sc->sample_buffer));

    for (y = 0; y < h; y++) {
        for (i = 0; i < ring_size; i++)
-            sample[i] = s->sample_buffer + (w + 6) * ((h + i - y) % ring_size) + 3;
+            sample[i] = sc->sample_buffer + (w + 6) * ((h + i - y) % ring_size) + 3;

        sample[0][-1]= sample[1][0  ];
        sample[1][ w]= sample[1][w-1];
@@ -938,7 +939,8 @@ static void encode_slice_header(FFV1Context *f, FFV1Context *fs,
    }
 }

-static void choose_rct_params(FFV1Context *fs, const uint8_t *src[3], const int stride[3], int w, int h)
+static void choose_rct_params(FFV1Context *fs, FFV1SliceContext *sc,
+                              const uint8_t *src[3], const int stride[3], int w, int h)
 {
 #define NB_Y_COEFF 15
    static const int rct_y_coeff[15][2] = {
@@ -968,7 +970,7 @@ static void choose_rct_params(FFV1Context *fs, const uint8_t *src[3], const int
    for (y = 0; y < h; y++) {
        int lastr=0, lastg=0, lastb=0;
        for (p = 0; p < 3; p++)
-            sample[p] = fs->sample_buffer + p*w;
+            sample[p] = sc->sample_buffer + p*w;

        for (x = 0; x < w; x++) {
            int b, g, r;
@@ -1041,7 +1043,7 @@ static int encode_slice(AVCodecContext *c, void *arg)

    fs->slice_coding_mode = 0;
    if (f->version > 3) {
-        choose_rct_params(fs, planes, p->linesize, width, height);
+        choose_rct_params(fs, sc, planes, p->linesize, width, height);
    } else {
        fs->slice_rct_by_coef = 1;
        fs->slice_rct_ry_coef = 1;
@@ -1066,21 +1068,21 @@ retry:
        const int cx            = x >> f->chroma_h_shift;
        const int cy            = y >> f->chroma_v_shift;

-        ret = encode_plane(fs, p->data[0] + ps*x + y*p->linesize[0], width, height, p->linesize[0], 0, 1);
+        ret = encode_plane(fs, sc, p->data[0] + ps*x + y*p->linesize[0], width, height, p->linesize[0], 0, 1);

        if (f->chroma_planes) {
-            ret |= encode_plane(fs, p->data[1] + ps*cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[1], 1, 1);
-            ret |= encode_plane(fs, p->data[2] + ps*cx+cy*p->linesize[2], chroma_width, chroma_height, p->linesize[2], 1, 1);
+            ret |= encode_plane(fs, sc, p->data[1] + ps*cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[1], 1, 1);
+            ret |= encode_plane(fs, sc, p->data[2] + ps*cx+cy*p->linesize[2], chroma_width, chroma_height, p->linesize[2], 1, 1);
        }
        if (fs->transparency)
-            ret |= encode_plane(fs, p->data[3] + ps*x + y*p->linesize[3], width, height, p->linesize[3], 2, 1);
+            ret |= encode_plane(fs, sc, p->data[3] + ps*x + y*p->linesize[3], width, height, p->linesize[3], 2, 1);
    } else if (c->pix_fmt == AV_PIX_FMT_YA8) {
-        ret  = encode_plane(fs, p->data[0] +     ps*x + y*p->linesize[0], width, height, p->linesize[0], 0, 2);
-        ret |= encode_plane(fs, p->data[0] + 1 + ps*x + y*p->linesize[0], width, height, p->linesize[0], 1, 2);
+        ret  = encode_plane(fs, sc, p->data[0] +     ps*x + y*p->linesize[0], width, height, p->linesize[0], 0, 2);
+        ret |= encode_plane(fs, sc, p->data[0] + 1 + ps*x + y*p->linesize[0], width, height, p->linesize[0], 1, 2);
    } else if (f->use32bit) {
-        ret = encode_rgb_frame32(fs, planes, width, height, p->linesize);
+        ret = encode_rgb_frame32(fs, sc, planes, width, height, p->linesize);
    } else {
-        ret = encode_rgb_frame(fs, planes, width, height, p->linesize);
+        ret = encode_rgb_frame(fs, sc, planes, width, height, p->linesize);
    }

    if (ret < 0) {
--- a/libavcodec/ffv1enc_template.c
+++ b/libavcodec/ffv1enc_template.c
@@ -124,7 +124,8 @@ static av_always_inline int RENAME(encode_line)(FFV1Context *s, int w,
    return 0;
 }

-static int RENAME(encode_rgb_frame)(FFV1Context *s, const uint8_t *src[4],
+static int RENAME(encode_rgb_frame)(FFV1Context *s, FFV1SliceContext *sc,
+                                    const uint8_t *src[4],
                                    int w, int h, const int stride[4])
 {
    int x, y, p, i;
@@ -139,13 +140,13 @@ static int RENAME(encode_rgb_frame)(FFV1Context *s, const uint8_t *src[4],

    s->run_index = 0;

-    memset(RENAME(s->sample_buffer), 0, ring_size * MAX_PLANES *
-           (w + 6) * sizeof(*RENAME(s->sample_buffer)));
+    memset(RENAME(sc->sample_buffer), 0, ring_size * MAX_PLANES *
+           (w + 6) * sizeof(*RENAME(sc->sample_buffer)));

    for (y = 0; y < h; y++) {
        for (i = 0; i < ring_size; i++)
            for (p = 0; p < MAX_PLANES; p++)
-                sample[p][i]= RENAME(s->sample_buffer) + p*ring_size*(w+6) + ((h+i-y)%ring_size)*(w+6) + 3;
+                sample[p][i]= RENAME(sc->sample_buffer) + p*ring_size*(w+6) + ((h+i-y)%ring_size)*(w+6) + 3;

        for (x = 0; x < w; x++) {
            int b, g, r, av_uninit(a);