vp8: Add hwaccel hooks

Also adds some extra fields to the main context structure that may be needed by a hwaccel decoder. The current behaviour of the WebP decoder is maintained by adding an additional field to the VP8 decoder private context to indicate that it is actually being used as WebP (no hwaccel is supported for that case).
2024-12-23 12:43:46 +02:00 · 2017-11-18 17:55:18 +00:00 · 2017-11-18 17:55:18 +00:00 · 9f00fa5369
commit 9f00fa5369
parent efd0612fdc
3 changed files with 173 additions and 69 deletions
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@ -27,6 +27,7 @@
 #include "libavutil/imgutils.h"

 #include "avcodec.h"
+#include "hwaccel.h"
 #include "internal.h"
 #include "mathops.h"
 #include "rectangle.h"
@ -72,16 +73,30 @@ static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
    if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
                                    ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
        return ret;
-    if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
-        ff_thread_release_buffer(s->avctx, &f->tf);
-        return AVERROR(ENOMEM);
+    if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height)))
+        goto fail;
+    if (s->avctx->hwaccel) {
+        const AVHWAccel *hwaccel = s->avctx->hwaccel;
+        if (hwaccel->frame_priv_data_size) {
+            f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
+            if (!f->hwaccel_priv_buf)
+                goto fail;
+            f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
+        }
    }
    return 0;
+
+fail:
+    av_buffer_unref(&f->seg_map);
+    ff_thread_release_buffer(s->avctx, &f->tf);
+    return AVERROR(ENOMEM);
 }

 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
 {
    av_buffer_unref(&f->seg_map);
+    av_buffer_unref(&f->hwaccel_priv_buf);
+    f->hwaccel_picture_private = NULL;
    ff_thread_release_buffer(s->avctx, &f->tf);
 }

@ -99,6 +114,12 @@ static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
        vp8_release_frame(s, dst);
        return AVERROR(ENOMEM);
    }
+    if (src->hwaccel_picture_private) {
+        dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
+        if (!dst->hwaccel_priv_buf)
+            return AVERROR(ENOMEM);
+        dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
+    }

    return 0;
 }
@ -140,7 +161,7 @@ static VP8Frame *vp8_find_free_buffer(VP8Context *s)
        av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
        abort();
    }
-    if (frame->tf.f->data[0])
+    if (frame->tf.f->buf[0])
        vp8_release_frame(s, frame);

    return frame;
@ -218,8 +239,9 @@ static void parse_segment_info(VP8Context *s)
    int i;

    s->segmentation.update_map = vp8_rac_get(c);
+    s->segmentation.update_feature_data = vp8_rac_get(c);

-    if (vp8_rac_get(c)) { // update segment feature data
+    if (s->segmentation.update_feature_data) {
        s->segmentation.absolute_vals = vp8_rac_get(c);

        for (i = 0; i < 4; i++)
@ -274,6 +296,7 @@ static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
        int size = AV_RL24(sizes + 3 * i);
        if (buf_size - size < 0)
            return -1;
+        s->coeff_partition_size[i] = size;

        ret = ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
        if (ret < 0)
@ -281,7 +304,11 @@ static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
        buf      += size;
        buf_size -= size;
    }
-    return ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
+
+    s->coeff_partition_size[i] = buf_size;
+    ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
+
+    return 0;
 }

 static void vp7_get_quants(VP8Context *s)
@ -308,28 +335,28 @@ static void vp8_get_quants(VP8Context *s)
    VP56RangeCoder *c = &s->c;
    int i, base_qi;

-    int yac_qi     = vp8_rac_get_uint(c, 7);
-    int ydc_delta  = vp8_rac_get_sint(c, 4);
-    int y2dc_delta = vp8_rac_get_sint(c, 4);
-    int y2ac_delta = vp8_rac_get_sint(c, 4);
-    int uvdc_delta = vp8_rac_get_sint(c, 4);
-    int uvac_delta = vp8_rac_get_sint(c, 4);
+    s->quant.yac_qi     = vp8_rac_get_uint(c, 7);
+    s->quant.ydc_delta  = vp8_rac_get_sint(c, 4);
+    s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
+    s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
+    s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
+    s->quant.uvac_delta = vp8_rac_get_sint(c, 4);

    for (i = 0; i < 4; i++) {
        if (s->segmentation.enabled) {
            base_qi = s->segmentation.base_quant[i];
            if (!s->segmentation.absolute_vals)
-                base_qi += yac_qi;
+                base_qi += s->quant.yac_qi;
        } else
-            base_qi = yac_qi;
+            base_qi = s->quant.yac_qi;

-        s->qmat[i].luma_qmul[0]    = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta,  7)];
+        s->qmat[i].luma_qmul[0]    = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta,  7)];
        s->qmat[i].luma_qmul[1]    = vp8_ac_qlookup[av_clip_uintp2(base_qi,              7)];
-        s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2;
+        s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
        /* 101581>>16 is equivalent to 155/100 */
-        s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16;
-        s->qmat[i].chroma_qmul[0]  = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
-        s->qmat[i].chroma_qmul[1]  = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
+        s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
+        s->qmat[i].chroma_qmul[0]  = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
+        s->qmat[i].chroma_qmul[1]  = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];

        s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
        s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
@ -661,6 +688,8 @@ static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_si
    buf      += 3;
    buf_size -= 3;

+    s->header_partition_size = header_size;
+
    if (s->profile > 3)
        av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);

@ -726,9 +755,11 @@ static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_si
    s->filter.level     = vp8_rac_get_uint(c, 6);
    s->filter.sharpness = vp8_rac_get_uint(c, 3);

-    if ((s->lf_delta.enabled = vp8_rac_get(c)))
-        if (vp8_rac_get(c))
+    if ((s->lf_delta.enabled = vp8_rac_get(c))) {
+        s->lf_delta.update = vp8_rac_get(c);
+        if (s->lf_delta.update)
            update_lf_deltas(s);
+    }

    if (setup_partitions(s, buf, buf_size)) {
        av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
@ -768,6 +799,13 @@ static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_si
        vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
    }

+    // Record the entropy coder state here so that hwaccels can use it.
+    s->c.code_word = vp56_rac_renorm(&s->c);
+    s->coder_state_at_header_end.input     = s->c.buffer - (-s->c.bits / 8);
+    s->coder_state_at_header_end.range     = s->c.high;
+    s->coder_state_at_header_end.value     = s->c.code_word >> 16;
+    s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
+
    return 0;
 }

@ -2540,7 +2578,6 @@ static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
    return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
 }

-
 static av_always_inline
 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                      AVPacket *avpkt, int is_vp7)
@ -2550,8 +2587,6 @@ int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
    enum AVDiscard skip_thresh;
    VP8Frame *av_uninit(curframe), *prev_frame;

-    av_assert0(avctx->pix_fmt == AV_PIX_FMT_YUVA420P || avctx->pix_fmt == AV_PIX_FMT_YUV420P);
-
    if (is_vp7)
        ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
    else
@ -2560,6 +2595,22 @@ int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
    if (ret < 0)
        goto err;

+    if (s->actually_webp) {
+        // avctx->pix_fmt already set in caller.
+    } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
+        enum AVPixelFormat pix_fmts[] = {
+            AV_PIX_FMT_YUV420P,
+            AV_PIX_FMT_NONE,
+        };
+
+        s->pix_fmt = ff_get_format(s->avctx, pix_fmts);
+        if (s->pix_fmt < 0) {
+            ret = AVERROR(EINVAL);
+            goto err;
+        }
+        avctx->pix_fmt = s->pix_fmt;
+    }
+
    prev_frame = s->framep[VP56_FRAME_CURRENT];

    referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
@ -2578,7 +2629,7 @@ int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,

    // release no longer referenced frames
    for (i = 0; i < 5; i++)
-        if (s->frames[i].tf.f->data[0] &&
+        if (s->frames[i].tf.f->buf[0] &&
            &s->frames[i] != prev_frame &&
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN]   &&
@ -2631,9 +2682,22 @@ int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,

    s->next_framep[VP56_FRAME_CURRENT] = curframe;

-    if (avctx->codec->update_thread_context)
    ff_thread_finish_setup(avctx);

+    if (avctx->hwaccel) {
+        ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
+        if (ret < 0)
+            goto err;
+
+        ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
+        if (ret < 0)
+            goto err;
+
+        ret = avctx->hwaccel->end_frame(avctx);
+        if (ret < 0)
+            goto err;
+
+    } else {
        s->linesize   = curframe->tf.f->linesize[0];
        s->uvlinesize = curframe->tf.f->linesize[1];

@ -2680,6 +2744,7 @@ int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
        else
            avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
                            num_jobs);
+    }

    ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
    memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
@ -2750,6 +2815,7 @@ int vp78_decode_init(AVCodecContext *avctx, int is_vp7)

    s->avctx = avctx;
    s->vp7   = avctx->codec->id == AV_CODEC_ID_VP7;
+    s->pix_fmt = AV_PIX_FMT_NONE;
    avctx->pix_fmt = AV_PIX_FMT_YUV420P;
    avctx->internal->allocate_progress = 1;

@ -2823,13 +2889,14 @@ static int vp8_decode_update_thread_context(AVCodecContext *dst,
        s->mb_height = s_src->mb_height;
    }

+    s->pix_fmt      = s_src->pix_fmt;
    s->prob[0]      = s_src->prob[!s_src->update_probabilities];
    s->segmentation = s_src->segmentation;
    s->lf_delta     = s_src->lf_delta;
    memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));

    for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
-        if (s_src->frames[i].tf.f->data[0]) {
+        if (s_src->frames[i].tf.f->buf[0]) {
            int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
            if (ret < 0)
                return ret;
@ -2876,5 +2943,8 @@ AVCodec ff_vp8_decoder = {
    .flush                 = vp8_decode_flush,
    .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
    .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
+    .hw_configs            = (const AVCodecHWConfigInternal*[]) {
+                               NULL
+                           },
 };
 #endif /* CONFIG_VP7_DECODER */
--- a/libavcodec/vp8.h
+++ b/libavcodec/vp8.h
@ -138,12 +138,18 @@ typedef struct VP8ThreadData {
 typedef struct VP8Frame {
    ThreadFrame tf;
    AVBufferRef *seg_map;
+
+    AVBufferRef *hwaccel_priv_buf;
+    void *hwaccel_picture_private;
 } VP8Frame;

 #define MAX_THREADS 8
 typedef struct VP8Context {
    VP8ThreadData *thread_data;
    AVCodecContext *avctx;
+    enum AVPixelFormat pix_fmt;
+    int actually_webp;
+
    VP8Frame *framep[4];
    VP8Frame *next_framep[4];
    VP8Frame *curframe;
@ -172,6 +178,7 @@ typedef struct VP8Context {
        uint8_t enabled;
        uint8_t absolute_vals;
        uint8_t update_map;
+        uint8_t update_feature_data;
        int8_t base_quant[4];
        int8_t filter_level[4];     ///< base loop filter level
    } segmentation;
@ -199,8 +206,19 @@ typedef struct VP8Context {
        int16_t chroma_qmul[2];
    } qmat[4];

+    // Raw quantisation values, which may be needed by hwaccel decode.
+    struct {
+        int yac_qi;
+        int ydc_delta;
+        int y2dc_delta;
+        int y2ac_delta;
+        int uvdc_delta;
+        int uvac_delta;
+    } quant;
+
    struct {
        uint8_t enabled;    ///< whether each mb can have a different strength based on mode/ref
+        uint8_t update;

        /**
         * filter strength adjustment for the following macroblock modes:
@ -228,6 +246,20 @@ typedef struct VP8Context {

    VP56RangeCoder c;   ///< header context, includes mb modes and motion vectors

+    /* This contains the entropy coder state at the end of the header
+     * block, in the form specified by the standard.  For use by
+     * hwaccels, so that a hardware decoder has the information to
+     * start decoding at the macroblock layer.
+     */
+    struct {
+        const uint8_t *input;
+        uint32_t range;
+        uint32_t value;
+        int bit_count;
+    } coder_state_at_header_end;
+
+    int header_partition_size;
+
    /**
     * These are all of the updatable probabilities for binary decisions.
     * They are only implicitly reset on keyframes, making it quite likely
@ -265,6 +297,7 @@ typedef struct VP8Context {
     */
    int num_coeff_partitions;
    VP56RangeCoder coeff_partition[8];
+    int coeff_partition_size[8];
    VideoDSPContext vdsp;
    VP8DSPContext vp8dsp;
    H264PredContext hpc;
--- a/libavcodec/webp.c
+++ b/libavcodec/webp.c
@ -1335,6 +1335,7 @@ static int vp8_lossy_decode_frame(AVCodecContext *avctx, AVFrame *p,
    if (!s->initialized) {
        ff_vp8_decode_init(avctx);
        s->initialized = 1;
+        s->v.actually_webp = 1;
    }
    avctx->pix_fmt = s->has_alpha ? AV_PIX_FMT_YUVA420P : AV_PIX_FMT_YUV420P;
    s->lossless = 0;