mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-29 22:00:58 +02:00
libavcodec/vaapi_encode: Add async_depth to vaapi_encoder to increase performance
Fix: #7706. After commit 5fdcf85bbffe7451c2, vaapi encoder's performance decrease. The reason is that vaRenderPicture() and vaSyncBuffer() are called at the same time (vaRenderPicture() always followed by a vaSyncBuffer()). Now I changed them to be called in a asynchronous way, which will make better use of hardware. Async_depth is added to increase encoder's performance. The frames that are sent to hardware are stored in a fifo. Encoder will sync output after async fifo is full. Signed-off-by: Wenbin Chen <wenbin.chen@intel.com> Signed-off-by: Haihao Xiang <haihao.xiang@intel.com>
This commit is contained in:
parent
e0ff869930
commit
d165ce22a4
@ -3591,6 +3591,12 @@ will refer only to P- or I-frames. When set to greater values multiple layers
|
||||
of B-frames will be present, frames in each layer only referring to frames in
|
||||
higher layers.
|
||||
|
||||
@item async_depth
|
||||
Maximum processing parallelism. Increase this to improve single channel
|
||||
performance. This option doesn't work if driver doesn't implement vaSyncBuffer
|
||||
function. Please make sure there are enough hw_frames allocated if a large
|
||||
number of async_depth is used.
|
||||
|
||||
@item rc_mode
|
||||
Set the rate control mode to use. A given driver may only support a subset of
|
||||
modes.
|
||||
|
@ -965,8 +965,10 @@ static int vaapi_encode_pick_next(AVCodecContext *avctx,
|
||||
if (!pic && ctx->end_of_stream) {
|
||||
--b_counter;
|
||||
pic = ctx->pic_end;
|
||||
if (pic->encode_issued)
|
||||
if (pic->encode_complete)
|
||||
return AVERROR_EOF;
|
||||
else if (pic->encode_issued)
|
||||
return AVERROR(EAGAIN);
|
||||
}
|
||||
|
||||
if (!pic) {
|
||||
@ -1137,7 +1139,8 @@ static int vaapi_encode_send_frame(AVCodecContext *avctx, AVFrame *frame)
|
||||
if (ctx->input_order == ctx->decode_delay)
|
||||
ctx->dts_pts_diff = pic->pts - ctx->first_pts;
|
||||
if (ctx->output_delay > 0)
|
||||
ctx->ts_ring[ctx->input_order % (3 * ctx->output_delay)] = pic->pts;
|
||||
ctx->ts_ring[ctx->input_order %
|
||||
(3 * ctx->output_delay + ctx->async_depth)] = pic->pts;
|
||||
|
||||
pic->display_order = ctx->input_order;
|
||||
++ctx->input_order;
|
||||
@ -1191,18 +1194,47 @@ int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
|
||||
return AVERROR(EAGAIN);
|
||||
}
|
||||
|
||||
pic = NULL;
|
||||
err = vaapi_encode_pick_next(avctx, &pic);
|
||||
if (err < 0)
|
||||
return err;
|
||||
av_assert0(pic);
|
||||
if (ctx->has_sync_buffer_func) {
|
||||
pic = NULL;
|
||||
|
||||
pic->encode_order = ctx->encode_order++;
|
||||
if (av_fifo_can_write(ctx->encode_fifo)) {
|
||||
err = vaapi_encode_pick_next(avctx, &pic);
|
||||
if (!err) {
|
||||
av_assert0(pic);
|
||||
pic->encode_order = ctx->encode_order +
|
||||
av_fifo_can_read(ctx->encode_fifo);
|
||||
err = vaapi_encode_issue(avctx, pic);
|
||||
if (err < 0) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
|
||||
return err;
|
||||
}
|
||||
av_fifo_write(ctx->encode_fifo, &pic, 1);
|
||||
}
|
||||
}
|
||||
|
||||
err = vaapi_encode_issue(avctx, pic);
|
||||
if (err < 0) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
|
||||
return err;
|
||||
if (!av_fifo_can_read(ctx->encode_fifo))
|
||||
return err;
|
||||
|
||||
// More frames can be buffered
|
||||
if (av_fifo_can_write(ctx->encode_fifo) && !ctx->end_of_stream)
|
||||
return AVERROR(EAGAIN);
|
||||
|
||||
av_fifo_read(ctx->encode_fifo, &pic, 1);
|
||||
ctx->encode_order = pic->encode_order + 1;
|
||||
} else {
|
||||
pic = NULL;
|
||||
err = vaapi_encode_pick_next(avctx, &pic);
|
||||
if (err < 0)
|
||||
return err;
|
||||
av_assert0(pic);
|
||||
|
||||
pic->encode_order = ctx->encode_order++;
|
||||
|
||||
err = vaapi_encode_issue(avctx, pic);
|
||||
if (err < 0) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
err = vaapi_encode_output(avctx, pic, pkt);
|
||||
@ -1220,7 +1252,7 @@ int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
|
||||
pkt->dts = ctx->ts_ring[pic->encode_order] - ctx->dts_pts_diff;
|
||||
} else {
|
||||
pkt->dts = ctx->ts_ring[(pic->encode_order - ctx->decode_delay) %
|
||||
(3 * ctx->output_delay)];
|
||||
(3 * ctx->output_delay + ctx->async_depth)];
|
||||
}
|
||||
av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64" dts %"PRId64".\n",
|
||||
pkt->pts, pkt->dts);
|
||||
@ -2541,6 +2573,11 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
|
||||
vas = vaSyncBuffer(ctx->hwctx->display, VA_INVALID_ID, 0);
|
||||
if (vas != VA_STATUS_ERROR_UNIMPLEMENTED) {
|
||||
ctx->has_sync_buffer_func = 1;
|
||||
ctx->encode_fifo = av_fifo_alloc2(ctx->async_depth,
|
||||
sizeof(VAAPIEncodePicture *),
|
||||
0);
|
||||
if (!ctx->encode_fifo)
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -2581,6 +2618,7 @@ av_cold int ff_vaapi_encode_close(AVCodecContext *avctx)
|
||||
|
||||
av_freep(&ctx->codec_sequence_params);
|
||||
av_freep(&ctx->codec_picture_params);
|
||||
av_fifo_freep2(&ctx->encode_fifo);
|
||||
|
||||
av_buffer_unref(&ctx->recon_frames_ref);
|
||||
av_buffer_unref(&ctx->input_frames_ref);
|
||||
|
@ -29,6 +29,7 @@
|
||||
|
||||
#include "libavutil/hwcontext.h"
|
||||
#include "libavutil/hwcontext_vaapi.h"
|
||||
#include "libavutil/fifo.h"
|
||||
|
||||
#include "avcodec.h"
|
||||
#include "hwconfig.h"
|
||||
@ -47,6 +48,7 @@ enum {
|
||||
MAX_TILE_ROWS = 22,
|
||||
// A.4.1: table A.6 allows at most 20 tile columns for any level.
|
||||
MAX_TILE_COLS = 20,
|
||||
MAX_ASYNC_DEPTH = 64,
|
||||
};
|
||||
|
||||
extern const AVCodecHWConfigInternal *const ff_vaapi_encode_hw_configs[];
|
||||
@ -297,7 +299,8 @@ typedef struct VAAPIEncodeContext {
|
||||
// Timestamp handling.
|
||||
int64_t first_pts;
|
||||
int64_t dts_pts_diff;
|
||||
int64_t ts_ring[MAX_REORDER_DELAY * 3];
|
||||
int64_t ts_ring[MAX_REORDER_DELAY * 3 +
|
||||
MAX_ASYNC_DEPTH];
|
||||
|
||||
// Slice structure.
|
||||
int slice_block_rows;
|
||||
@ -348,6 +351,10 @@ typedef struct VAAPIEncodeContext {
|
||||
|
||||
// Whether the driver support vaSyncBuffer
|
||||
int has_sync_buffer_func;
|
||||
// Store buffered pic
|
||||
AVFifo *encode_fifo;
|
||||
// Max number of frame buffered in encoder.
|
||||
int async_depth;
|
||||
} VAAPIEncodeContext;
|
||||
|
||||
enum {
|
||||
@ -458,7 +465,12 @@ int ff_vaapi_encode_close(AVCodecContext *avctx);
|
||||
{ "b_depth", \
|
||||
"Maximum B-frame reference depth", \
|
||||
OFFSET(common.desired_b_depth), AV_OPT_TYPE_INT, \
|
||||
{ .i64 = 1 }, 1, INT_MAX, FLAGS }
|
||||
{ .i64 = 1 }, 1, INT_MAX, FLAGS }, \
|
||||
{ "async_depth", "Maximum processing parallelism. " \
|
||||
"Increase this to improve single channel performance. This option " \
|
||||
"doesn't work if driver doesn't implement vaSyncBuffer function.", \
|
||||
OFFSET(common.async_depth), AV_OPT_TYPE_INT, \
|
||||
{ .i64 = 2 }, 1, MAX_ASYNC_DEPTH, FLAGS }
|
||||
|
||||
#define VAAPI_ENCODE_RC_MODE(name, desc) \
|
||||
{ #name, desc, 0, AV_OPT_TYPE_CONST, { .i64 = RC_MODE_ ## name }, \
|
||||
|
Loading…
x
Reference in New Issue
Block a user