From 4e528206bc4d968706401206cf54471739250ec7 Mon Sep 17 00:00:00 2001 From: Mark Thompson Date: Sun, 4 Sep 2016 13:26:37 +0100 Subject: [PATCH 1/4] vp8: Add hwaccel hooks Also adds some extra fields to the main context structure that may be needed by a hwaccel decoder. --- libavcodec/vp8.c | 185 ++++++++++++++++++++++++++++++++--------------- libavcodec/vp8.h | 32 ++++++++ 2 files changed, 157 insertions(+), 60 deletions(-) diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c index 546124cdf2..ced49799bc 100644 --- a/libavcodec/vp8.c +++ b/libavcodec/vp8.c @@ -64,16 +64,30 @@ static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref) if ((ret = ff_thread_get_buffer(s->avctx, &f->tf, ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0) return ret; - if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) { - ff_thread_release_buffer(s->avctx, &f->tf); - return AVERROR(ENOMEM); + if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) + goto fail; + if (s->avctx->hwaccel) { + const AVHWAccel *hwaccel = s->avctx->hwaccel; + if (hwaccel->frame_priv_data_size) { + f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size); + if (!f->hwaccel_priv_buf) + goto fail; + f->hwaccel_picture_private = f->hwaccel_priv_buf->data; + } } return 0; + +fail: + av_buffer_unref(&f->seg_map); + ff_thread_release_buffer(s->avctx, &f->tf); + return AVERROR(ENOMEM); } static void vp8_release_frame(VP8Context *s, VP8Frame *f) { av_buffer_unref(&f->seg_map); + av_buffer_unref(&f->hwaccel_priv_buf); + f->hwaccel_picture_private = NULL; ff_thread_release_buffer(s->avctx, &f->tf); } @@ -91,6 +105,12 @@ static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src) vp8_release_frame(s, dst); return AVERROR(ENOMEM); } + if (src->hwaccel_picture_private) { + dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf); + if (!dst->hwaccel_priv_buf) + return AVERROR(ENOMEM); + dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data; + } return 0; } @@ -132,7 +152,7 @@ static VP8Frame *vp8_find_free_buffer(VP8Context *s) av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n"); abort(); } - if (frame->tf.f->data[0]) + if (frame->tf.f->buf[0]) vp8_release_frame(s, frame); return frame; @@ -209,8 +229,9 @@ static void parse_segment_info(VP8Context *s) int i; s->segmentation.update_map = vp8_rac_get(c); + s->segmentation.update_feature_data = vp8_rac_get(c); - if (vp8_rac_get(c)) { // update segment feature data + if (s->segmentation.update_feature_data) { s->segmentation.absolute_vals = vp8_rac_get(c); for (i = 0; i < 4; i++) @@ -264,11 +285,14 @@ static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size) int size = AV_RL24(sizes + 3 * i); if (buf_size - size < 0) return -1; + s->coeff_partition_size[i] = size; ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size); buf += size; buf_size -= size; } + + s->coeff_partition_size[i] = buf_size; ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size); return 0; @@ -298,28 +322,28 @@ static void get_quants(VP8Context *s) VP56RangeCoder *c = &s->c; int i, base_qi; - int yac_qi = vp8_rac_get_uint(c, 7); - int ydc_delta = vp8_rac_get_sint(c, 4); - int y2dc_delta = vp8_rac_get_sint(c, 4); - int y2ac_delta = vp8_rac_get_sint(c, 4); - int uvdc_delta = vp8_rac_get_sint(c, 4); - int uvac_delta = vp8_rac_get_sint(c, 4); + s->quant.yac_qi = vp8_rac_get_uint(c, 7); + s->quant.ydc_delta = vp8_rac_get_sint(c, 4); + s->quant.y2dc_delta = vp8_rac_get_sint(c, 4); + s->quant.y2ac_delta = vp8_rac_get_sint(c, 4); + s->quant.uvdc_delta = vp8_rac_get_sint(c, 4); + s->quant.uvac_delta = vp8_rac_get_sint(c, 4); for (i = 0; i < 4; i++) { if (s->segmentation.enabled) { base_qi = s->segmentation.base_quant[i]; if (!s->segmentation.absolute_vals) - base_qi += yac_qi; + base_qi += s->quant.yac_qi; } else - base_qi = yac_qi; + base_qi = s->quant.yac_qi; - s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta, 7)]; + s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta, 7)]; s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)]; - s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2; + s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2; /* 101581>>16 is equivalent to 155/100 */ - s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16; - s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)]; - s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)]; + s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16; + s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)]; + s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)]; s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8); s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132); @@ -637,6 +661,8 @@ static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_si buf += 3; buf_size -= 3; + s->header_partition_size = header_size; + if (s->profile > 3) av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile); @@ -700,9 +726,11 @@ static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_si s->filter.level = vp8_rac_get_uint(c, 6); s->filter.sharpness = vp8_rac_get_uint(c, 3); - if ((s->lf_delta.enabled = vp8_rac_get(c))) - if (vp8_rac_get(c)) + if ((s->lf_delta.enabled = vp8_rac_get(c))) { + s->lf_delta.update = vp8_rac_get(c); + if (s->lf_delta.update) update_lf_deltas(s); + } if (setup_partitions(s, buf, buf_size)) { av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n"); @@ -741,6 +769,13 @@ static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_si vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE); } + // Record the entropy coder state here so that hwaccels can use it. + s->c.code_word = vp56_rac_renorm(&s->c); + s->coder_state_at_header_end.input = s->c.buffer - (-s->c.bits / 8); + s->coder_state_at_header_end.range = s->c.high; + s->coder_state_at_header_end.value = s->c.code_word >> 16; + s->coder_state_at_header_end.bit_count = -s->c.bits % 8; + return 0; } @@ -2462,7 +2497,6 @@ static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8); } - static av_always_inline int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt, int is_vp7) @@ -2480,6 +2514,20 @@ int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, if (ret < 0) goto err; + if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) { + enum AVPixelFormat pix_fmts[] = { + AV_PIX_FMT_YUV420P, + AV_PIX_FMT_NONE, + }; + + s->pix_fmt = ff_get_format(s->avctx, pix_fmts); + if (s->pix_fmt < 0) { + ret = AVERROR(EINVAL); + goto err; + } + avctx->pix_fmt = s->pix_fmt; + } + prev_frame = s->framep[VP56_FRAME_CURRENT]; referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT || @@ -2555,51 +2603,67 @@ int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, ff_thread_finish_setup(avctx); - s->linesize = curframe->tf.f->linesize[0]; - s->uvlinesize = curframe->tf.f->linesize[1]; + if (avctx->hwaccel) { + ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size); + if (ret < 0) + goto err; - memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz)); - /* Zero macroblock structures for top/top-left prediction - * from outside the frame. */ - if (!s->mb_layout) - memset(s->macroblocks + s->mb_height * 2 - 1, 0, - (s->mb_width + 1) * sizeof(*s->macroblocks)); - if (!s->mb_layout && s->keyframe) - memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4); + ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size); + if (ret < 0) + goto err; - memset(s->ref_count, 0, sizeof(s->ref_count)); + ret = avctx->hwaccel->end_frame(avctx); + if (ret < 0) + goto err; - if (s->mb_layout == 1) { - // Make sure the previous frame has read its segmentation map, - // if we re-use the same map. - if (prev_frame && s->segmentation.enabled && - !s->segmentation.update_map) - ff_thread_await_progress(&prev_frame->tf, 1, 0); - if (is_vp7) - vp7_decode_mv_mb_modes(avctx, curframe, prev_frame); + } else { + s->linesize = curframe->tf.f->linesize[0]; + s->uvlinesize = curframe->tf.f->linesize[1]; + + memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz)); + /* Zero macroblock structures for top/top-left prediction + * from outside the frame. */ + if (!s->mb_layout) + memset(s->macroblocks + s->mb_height * 2 - 1, 0, + (s->mb_width + 1) * sizeof(*s->macroblocks)); + if (!s->mb_layout && s->keyframe) + memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4); + + memset(s->ref_count, 0, sizeof(s->ref_count)); + + if (s->mb_layout == 1) { + // Make sure the previous frame has read its segmentation map, + // if we re-use the same map. + if (prev_frame && s->segmentation.enabled && + !s->segmentation.update_map) + ff_thread_await_progress(&prev_frame->tf, 1, 0); + if (is_vp7) + vp7_decode_mv_mb_modes(avctx, curframe, prev_frame); + else + vp8_decode_mv_mb_modes(avctx, curframe, prev_frame); + } + + if (avctx->active_thread_type == FF_THREAD_FRAME) + num_jobs = 1; else - vp8_decode_mv_mb_modes(avctx, curframe, prev_frame); - } + num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count); + s->num_jobs = num_jobs; + s->curframe = curframe; + s->prev_frame = prev_frame; + s->mv_min.y = -MARGIN; + s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; + for (i = 0; i < MAX_THREADS; i++) { + s->thread_data[i].thread_mb_pos = 0; + s->thread_data[i].wait_mb_pos = INT_MAX; + } - if (avctx->active_thread_type == FF_THREAD_FRAME) - num_jobs = 1; - else - num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count); - s->num_jobs = num_jobs; - s->curframe = curframe; - s->prev_frame = prev_frame; - s->mv_min.y = -MARGIN; - s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; - for (i = 0; i < MAX_THREADS; i++) { - s->thread_data[i].thread_mb_pos = 0; - s->thread_data[i].wait_mb_pos = INT_MAX; + if (is_vp7) + avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL, + num_jobs); + else + avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, + num_jobs); } - if (is_vp7) - avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL, - num_jobs); - else - avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, - num_jobs); ff_thread_report_progress(&curframe->tf, INT_MAX, 0); memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4); @@ -2666,6 +2730,7 @@ int vp78_decode_init(AVCodecContext *avctx, int is_vp7) int ret; s->avctx = avctx; + s->pix_fmt = AV_PIX_FMT_NONE; avctx->pix_fmt = AV_PIX_FMT_YUV420P; avctx->internal->allocate_progress = 1; diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h index 65948e1d6b..1870705ad2 100644 --- a/libavcodec/vp8.h +++ b/libavcodec/vp8.h @@ -130,12 +130,17 @@ typedef struct VP8ThreadData { typedef struct VP8Frame { ThreadFrame tf; AVBufferRef *seg_map; + + AVBufferRef *hwaccel_priv_buf; + void *hwaccel_picture_private; } VP8Frame; #define MAX_THREADS 8 typedef struct VP8Context { VP8ThreadData *thread_data; AVCodecContext *avctx; + enum AVPixelFormat pix_fmt; + VP8Frame *framep[4]; VP8Frame *next_framep[4]; VP8Frame *curframe; @@ -165,6 +170,7 @@ typedef struct VP8Context { uint8_t enabled; uint8_t absolute_vals; uint8_t update_map; + uint8_t update_feature_data; int8_t base_quant[4]; int8_t filter_level[4]; ///< base loop filter level } segmentation; @@ -192,8 +198,19 @@ typedef struct VP8Context { int16_t chroma_qmul[2]; } qmat[4]; + // Raw quantisation values, which may be needed by hwaccel decode. + struct { + int yac_qi; + int ydc_delta; + int y2dc_delta; + int y2ac_delta; + int uvdc_delta; + int uvac_delta; + } quant; + struct { uint8_t enabled; ///< whether each mb can have a different strength based on mode/ref + uint8_t update; /** * filter strength adjustment for the following macroblock modes: @@ -221,6 +238,20 @@ typedef struct VP8Context { VP56RangeCoder c; ///< header context, includes mb modes and motion vectors + /* This contains the entropy coder state at the end of the header + * block, in the form specified by the standard. For use by + * hwaccels, so that a hardware decoder has the information to + * start decoding at the macroblock layer. + */ + struct { + const uint8_t *input; + uint32_t range; + uint32_t value; + int bit_count; + } coder_state_at_header_end; + + int header_partition_size; + /** * These are all of the updatable probabilities for binary decisions. * They are only implicitly reset on keyframes, making it quite likely @@ -258,6 +289,7 @@ typedef struct VP8Context { */ int num_coeff_partitions; VP56RangeCoder coeff_partition[8]; + int coeff_partition_size[8]; VideoDSPContext vdsp; VP8DSPContext vp8dsp; H264PredContext hpc; From a9fb134730da1f9642eb5a2baa50943b8a4aa245 Mon Sep 17 00:00:00 2001 From: Mark Thompson Date: Sun, 4 Sep 2016 13:28:10 +0100 Subject: [PATCH 2/4] lavc/vaapi: Add VP8 decode hwaccel --- configure | 3 + libavcodec/Makefile | 1 + libavcodec/allcodecs.c | 1 + libavcodec/vaapi_vp8.c | 231 +++++++++++++++++++++++++++++++++++++++++ libavcodec/vp8.c | 3 + 5 files changed, 239 insertions(+) create mode 100644 libavcodec/vaapi_vp8.c diff --git a/configure b/configure index 7ad920f4be..520f07ccff 100755 --- a/configure +++ b/configure @@ -2185,6 +2185,8 @@ vc1_vaapi_hwaccel_deps="vaapi" vc1_vaapi_hwaccel_select="vc1_decoder" vc1_vdpau_hwaccel_deps="vdpau" vc1_vdpau_hwaccel_select="vc1_decoder" +vp8_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferVP8" +vp8_vaapi_hwaccel_select="vp8_decoder" wmv3_d3d11va_hwaccel_select="vc1_d3d11va_hwaccel" wmv3_dxva2_hwaccel_select="vc1_dxva2_hwaccel" wmv3_vaapi_hwaccel_select="vc1_vaapi_hwaccel" @@ -4544,6 +4546,7 @@ check_type "windows.h dxva.h" "DXVA_PicParams_HEVC" -DWINAPI_FAMILY=WINAPI_FAMIL check_type "windows.h d3d11.h" "ID3D11VideoDecoder" check_type "d3d9.h dxva2api.h" DXVA2_ConfigPictureDecode -D_WIN32_WINNT=0x0602 +check_type "va/va.h va/va_dec_vp8.h" "VAPictureParameterBufferVP8" check_type "va/va.h va/va_vpp.h" "VAProcPipelineParameterBuffer" check_type "va/va.h va/va_enc_h264.h" "VAEncPictureParameterBufferH264" check_type "va/va.h va/va_enc_hevc.h" "VAEncPictureParameterBufferHEVC" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 974480f06d..bec461b80c 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -634,6 +634,7 @@ OBJS-$(CONFIG_VC1_D3D11VA_HWACCEL) += dxva2_vc1.o OBJS-$(CONFIG_VC1_DXVA2_HWACCEL) += dxva2_vc1.o OBJS-$(CONFIG_VC1_VAAPI_HWACCEL) += vaapi_vc1.o OBJS-$(CONFIG_VC1_VDPAU_HWACCEL) += vdpau_vc1.o +OBJS-$(CONFIG_VP8_VAAPI_HWACCEL) += vaapi_vp8.o # libavformat dependencies OBJS-$(CONFIG_ISO_MEDIA) += mpeg4audio.o mpegaudiodata.o diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index e259de2510..41af38eb7c 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -94,6 +94,7 @@ void avcodec_register_all(void) REGISTER_HWACCEL(VC1_VAAPI, vc1_vaapi); REGISTER_HWACCEL(VC1_VDPAU, vc1_vdpau); REGISTER_HWACCEL(VC1_MMAL, vc1_mmal); + REGISTER_HWACCEL(VP8_VAAPI, vp8_vaapi); REGISTER_HWACCEL(WMV3_D3D11VA, wmv3_d3d11va); REGISTER_HWACCEL(WMV3_DXVA2, wmv3_dxva2); REGISTER_HWACCEL(WMV3_VAAPI, wmv3_vaapi); diff --git a/libavcodec/vaapi_vp8.c b/libavcodec/vaapi_vp8.c new file mode 100644 index 0000000000..a130c04e1d --- /dev/null +++ b/libavcodec/vaapi_vp8.c @@ -0,0 +1,231 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "vaapi_decode.h" +#include "vp8.h" + +static VASurfaceID vaapi_vp8_surface_id(VP8Frame *vf) +{ + if (vf) + return ff_vaapi_get_surface_id(vf->tf.f); + else + return VA_INVALID_SURFACE; +} + +static int vaapi_vp8_start_frame(AVCodecContext *avctx, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + const VP8Context *s = avctx->priv_data; + VAAPIDecodePicture *pic = s->framep[VP56_FRAME_CURRENT]->hwaccel_picture_private; + VAPictureParameterBufferVP8 pp; + VAProbabilityDataBufferVP8 prob; + VAIQMatrixBufferVP8 quant; + int err, i, j, k; + + pic->output_surface = vaapi_vp8_surface_id(s->framep[VP56_FRAME_CURRENT]); + + pp = (VAPictureParameterBufferVP8) { + .frame_width = avctx->width, + .frame_height = avctx->height, + + .last_ref_frame = vaapi_vp8_surface_id(s->framep[VP56_FRAME_PREVIOUS]), + .golden_ref_frame = vaapi_vp8_surface_id(s->framep[VP56_FRAME_GOLDEN]), + .alt_ref_frame = vaapi_vp8_surface_id(s->framep[VP56_FRAME_GOLDEN2]), + .out_of_loop_frame = VA_INVALID_SURFACE, + + .pic_fields.bits = { + .key_frame = !s->keyframe, + .version = s->profile, + + .segmentation_enabled = s->segmentation.enabled, + .update_mb_segmentation_map = s->segmentation.update_map, + .update_segment_feature_data = s->segmentation.update_feature_data, + + .filter_type = s->filter.simple, + .sharpness_level = s->filter.sharpness, + + .loop_filter_adj_enable = s->lf_delta.enabled, + .mode_ref_lf_delta_update = s->lf_delta.update, + + .sign_bias_golden = s->sign_bias[VP56_FRAME_GOLDEN], + .sign_bias_alternate = s->sign_bias[VP56_FRAME_GOLDEN2], + + .mb_no_coeff_skip = s->mbskip_enabled, + .loop_filter_disable = s->filter.level == 0, + }, + + .prob_skip_false = s->prob->mbskip, + .prob_intra = s->prob->intra, + .prob_last = s->prob->last, + .prob_gf = s->prob->golden, + }; + + for (i = 0; i < 3; i++) + pp.mb_segment_tree_probs[i] = s->prob->segmentid[i]; + + for (i = 0; i < 4; i++) { + if (s->segmentation.enabled) { + pp.loop_filter_level[i] = s->segmentation.filter_level[i]; + if (!s->segmentation.absolute_vals) + pp.loop_filter_level[i] += s->filter.level; + } else { + pp.loop_filter_level[i] = s->filter.level; + } + pp.loop_filter_level[i] = av_clip_uintp2(pp.loop_filter_level[i], 6); + } + + for (i = 0; i < 4; i++) { + pp.loop_filter_deltas_ref_frame[i] = s->lf_delta.ref[i]; + pp.loop_filter_deltas_mode[i] = s->lf_delta.mode[i + 4]; + } + + if (s->keyframe) { + static const uint8_t keyframe_y_mode_probs[4] = { + 145, 156, 163, 128 + }; + static const uint8_t keyframe_uv_mode_probs[3] = { + 142, 114, 183 + }; + memcpy(pp.y_mode_probs, keyframe_y_mode_probs, 4); + memcpy(pp.uv_mode_probs, keyframe_uv_mode_probs, 3); + } else { + for (i = 0; i < 4; i++) + pp.y_mode_probs[i] = s->prob->pred16x16[i]; + for (i = 0; i < 3; i++) + pp.uv_mode_probs[i] = s->prob->pred8x8c[i]; + } + for (i = 0; i < 2; i++) + for (j = 0; j < 19; j++) + pp.mv_probs[i][j] = s->prob->mvc[i][j]; + + pp.bool_coder_ctx.range = s->coder_state_at_header_end.range; + pp.bool_coder_ctx.value = s->coder_state_at_header_end.value; + pp.bool_coder_ctx.count = s->coder_state_at_header_end.bit_count; + + err = ff_vaapi_decode_make_param_buffer(avctx, pic, + VAPictureParameterBufferType, + &pp, sizeof(pp)); + if (err < 0) + goto fail; + + for (i = 0; i < 4; i++) { + for (j = 0; j < 8; j++) { + static const int coeff_bands_inverse[8] = { + 0, 1, 2, 3, 5, 6, 4, 15 + }; + int coeff_pos = coeff_bands_inverse[j]; + + for (k = 0; k < 3; k++) { + memcpy(prob.dct_coeff_probs[i][j][k], + s->prob->token[i][coeff_pos][k], 11); + } + } + } + + err = ff_vaapi_decode_make_param_buffer(avctx, pic, + VAProbabilityBufferType, + &prob, sizeof(prob)); + if (err < 0) + goto fail; + + for (i = 0; i < 4; i++) { + int base_qi = s->segmentation.base_quant[i]; + if (!s->segmentation.absolute_vals) + base_qi += s->quant.yac_qi; + + quant.quantization_index[i][0] = av_clip_uintp2(base_qi, 7); + quant.quantization_index[i][1] = av_clip_uintp2(base_qi + s->quant.ydc_delta, 7); + quant.quantization_index[i][2] = av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7); + quant.quantization_index[i][3] = av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7); + quant.quantization_index[i][4] = av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7); + quant.quantization_index[i][5] = av_clip_uintp2(base_qi + s->quant.uvac_delta, 7); + } + + err = ff_vaapi_decode_make_param_buffer(avctx, pic, + VAIQMatrixBufferType, + &quant, sizeof(quant)); + if (err < 0) + goto fail; + + return 0; + +fail: + ff_vaapi_decode_cancel(avctx, pic); + return err; +} + +static int vaapi_vp8_end_frame(AVCodecContext *avctx) +{ + const VP8Context *s = avctx->priv_data; + VAAPIDecodePicture *pic = s->framep[VP56_FRAME_CURRENT]->hwaccel_picture_private; + + return ff_vaapi_decode_issue(avctx, pic); +} + +static int vaapi_vp8_decode_slice(AVCodecContext *avctx, + const uint8_t *buffer, + uint32_t size) +{ + const VP8Context *s = avctx->priv_data; + VAAPIDecodePicture *pic = s->framep[VP56_FRAME_CURRENT]->hwaccel_picture_private; + VASliceParameterBufferVP8 sp; + int err, i; + + unsigned int header_size = 3 + 7 * s->keyframe; + const uint8_t *data = buffer + header_size; + unsigned int data_size = size - header_size; + + sp = (VASliceParameterBufferVP8) { + .slice_data_size = data_size, + .slice_data_offset = 0, + .slice_data_flag = VA_SLICE_DATA_FLAG_ALL, + + .macroblock_offset = (8 * (s->coder_state_at_header_end.input - data) - + s->coder_state_at_header_end.bit_count - 8), + .num_of_partitions = s->num_coeff_partitions + 1, + }; + + sp.partition_size[0] = s->header_partition_size - ((sp.macroblock_offset + 7) / 8); + for (i = 0; i < 8; i++) + sp.partition_size[i+1] = s->coeff_partition_size[i]; + + err = ff_vaapi_decode_make_slice_buffer(avctx, pic, &sp, sizeof(sp), data, data_size); + if (err) + goto fail; + + return 0; + +fail: + ff_vaapi_decode_cancel(avctx, pic); + return err; +} + +AVHWAccel ff_vp8_vaapi_hwaccel = { + .name = "vp8_vaapi", + .type = AVMEDIA_TYPE_VIDEO, + .id = AV_CODEC_ID_VP8, + .pix_fmt = AV_PIX_FMT_VAAPI, + .start_frame = &vaapi_vp8_start_frame, + .end_frame = &vaapi_vp8_end_frame, + .decode_slice = &vaapi_vp8_decode_slice, + .frame_priv_data_size = sizeof(VAAPIDecodePicture), + .init = &ff_vaapi_decode_init, + .uninit = &ff_vaapi_decode_uninit, + .priv_data_size = sizeof(VAAPIDecodeContext), +}; diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c index ced49799bc..bf1b03e9f7 100644 --- a/libavcodec/vp8.c +++ b/libavcodec/vp8.c @@ -2516,6 +2516,9 @@ int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) { enum AVPixelFormat pix_fmts[] = { +#if CONFIG_VP8_VAAPI_HWACCEL + AV_PIX_FMT_VAAPI, +#endif AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE, }; From 11c191b52ce0768370e38a2726132f9223e701f6 Mon Sep 17 00:00:00 2001 From: Mark Thompson Date: Sun, 4 Sep 2016 13:33:15 +0100 Subject: [PATCH 3/4] vaapi_decode: Ignore the profile when not useful Enables VP8 decoding - the decoder places the the bitstream version in the profile field, which we want to ignore. --- libavcodec/vaapi_decode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c index 51b6d47a66..ab8445afc0 100644 --- a/libavcodec/vaapi_decode.c +++ b/libavcodec/vaapi_decode.c @@ -320,7 +320,8 @@ static int vaapi_decode_make_config(AVCodecContext *avctx) int profile_match = 0; if (avctx->codec_id != vaapi_profile_map[i].codec_id) continue; - if (avctx->profile == vaapi_profile_map[i].codec_profile) + if (avctx->profile == vaapi_profile_map[i].codec_profile || + vaapi_profile_map[i].codec_profile == FF_PROFILE_UNKNOWN) profile_match = 1; profile = vaapi_profile_map[i].va_profile; for (j = 0; j < profile_count; j++) { From 75d642a944d5579e4ef20ff3701422a64692afcf Mon Sep 17 00:00:00 2001 From: Mark Thompson Date: Fri, 9 Sep 2016 15:59:13 +0100 Subject: [PATCH 4/4] vaapi_vp8: Explicitly include libva vp8 decode header With some old libva versions does not automatically include the per-codec subsidiary headers, so we need to include the right one explicitly ourselves. --- libavcodec/vaapi_vp8.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libavcodec/vaapi_vp8.c b/libavcodec/vaapi_vp8.c index a130c04e1d..70e9cec3d4 100644 --- a/libavcodec/vaapi_vp8.c +++ b/libavcodec/vaapi_vp8.c @@ -16,6 +16,9 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include +#include + #include "vaapi_decode.h" #include "vp8.h"