mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
ffv1enc_vulkan: switch to receive_packet
This allows the encoder to fully saturate all queues the GPU has, giving a good 10% in certain cases and resolutions. This also improves error resilience if an allocation fails, and properly cleans up after itself if it does.
This commit is contained in:
parent
4fefc6e80c
commit
d8f301cdf2
@ -19,6 +19,7 @@
|
||||
*/
|
||||
|
||||
#include "libavutil/crc.h"
|
||||
#include "libavutil/mem.h"
|
||||
#include "libavutil/vulkan.h"
|
||||
#include "libavutil/vulkan_spirv.h"
|
||||
|
||||
@ -36,13 +37,38 @@
|
||||
#define LG_ALIGN_W 32
|
||||
#define LG_ALIGN_H 32
|
||||
|
||||
typedef struct VulkanEncodeFFv1FrameData {
|
||||
/* Output data */
|
||||
AVBufferRef *out_data_ref;
|
||||
|
||||
/* Results data */
|
||||
AVBufferRef *results_data_ref;
|
||||
|
||||
/* Copied from the source */
|
||||
int64_t pts;
|
||||
int64_t duration;
|
||||
void *frame_opaque;
|
||||
AVBufferRef *frame_opaque_ref;
|
||||
|
||||
int key_frame;
|
||||
} VulkanEncodeFFv1FrameData;
|
||||
|
||||
typedef struct VulkanEncodeFFv1Context {
|
||||
FFV1Context ctx;
|
||||
AVFrame *frame;
|
||||
|
||||
FFVulkanContext s;
|
||||
FFVkQueueFamilyCtx qf;
|
||||
FFVkExecPool exec_pool;
|
||||
|
||||
FFVkQueueFamilyCtx transfer_qf;
|
||||
FFVkExecPool transfer_exec_pool;
|
||||
|
||||
VkBufferCopy *buf_regions;
|
||||
VulkanEncodeFFv1FrameData *exec_ctx_info;
|
||||
int in_flight;
|
||||
int async_depth;
|
||||
|
||||
FFVulkanShader setup;
|
||||
FFVulkanShader reset;
|
||||
FFVulkanShader rct;
|
||||
@ -59,6 +85,7 @@ typedef struct VulkanEncodeFFv1Context {
|
||||
|
||||
/* Output data buffer */
|
||||
AVBufferPool *out_data_pool;
|
||||
AVBufferPool *pkt_data_pool;
|
||||
|
||||
/* Temporary data buffer */
|
||||
AVBufferPool *tmp_data_pool;
|
||||
@ -271,15 +298,16 @@ fail:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
|
||||
const AVFrame *pict, int *got_packet)
|
||||
static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
|
||||
FFVkExecContext *exec,
|
||||
const AVFrame *pict)
|
||||
{
|
||||
int err;
|
||||
VulkanEncodeFFv1Context *fv = avctx->priv_data;
|
||||
FFV1Context *f = &fv->ctx;
|
||||
FFVulkanFunctions *vk = &fv->s.vkfn;
|
||||
FFVkExecContext *exec;
|
||||
|
||||
VulkanEncodeFFv1FrameData *fd = exec->opaque;
|
||||
FFv1VkParameters pd;
|
||||
|
||||
AVFrame *intermediate_frame = NULL;
|
||||
@ -298,14 +326,10 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
|
||||
|
||||
/* Output data */
|
||||
size_t maxsize;
|
||||
AVBufferRef *out_data_ref;
|
||||
FFVkBuffer *out_data_buf;
|
||||
uint8_t *buf_p;
|
||||
|
||||
/* Results data */
|
||||
AVBufferRef *results_data_ref;
|
||||
FFVkBuffer *results_data_buf;
|
||||
uint64_t *sc;
|
||||
|
||||
int has_inter = avctx->gop_size > 1;
|
||||
uint32_t context_count = f->context_count[f->context_model];
|
||||
@ -316,44 +340,36 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
|
||||
AVFrame *enc_in = (AVFrame *)pict;
|
||||
VkImageView *enc_in_views = in_views;
|
||||
|
||||
VkMappedMemoryRange invalidate_data[2];
|
||||
int nb_invalidate_data = 0;
|
||||
|
||||
VkImageMemoryBarrier2 img_bar[37];
|
||||
int nb_img_bar = 0;
|
||||
VkBufferMemoryBarrier2 buf_bar[8];
|
||||
int nb_buf_bar = 0;
|
||||
|
||||
if (!pict)
|
||||
return 0;
|
||||
|
||||
exec = ff_vk_exec_get(&fv->s, &fv->exec_pool);
|
||||
/* Start recording */
|
||||
ff_vk_exec_start(&fv->s, exec);
|
||||
|
||||
/* Frame state */
|
||||
f->cur_enc_frame = pict;
|
||||
if (avctx->gop_size == 0 || f->picture_number % avctx->gop_size == 0) {
|
||||
av_buffer_unref(&fv->keyframe_slice_data_ref);
|
||||
f->key_frame = 1;
|
||||
f->key_frame = fd->key_frame = 1;
|
||||
f->gob_count++;
|
||||
} else {
|
||||
f->key_frame = 0;
|
||||
f->key_frame = fd->key_frame = 0;
|
||||
}
|
||||
|
||||
f->max_slice_count = f->num_h_slices * f->num_v_slices;
|
||||
f->slice_count = f->max_slice_count;
|
||||
|
||||
/* Allocate temporary data buffer */
|
||||
tmp_data_size = f->slice_count*CONTEXT_SIZE;
|
||||
err = ff_vk_get_pooled_buffer(&fv->s, &fv->tmp_data_pool,
|
||||
&tmp_data_ref,
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
||||
NULL, tmp_data_size,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
||||
if (err < 0)
|
||||
return err;
|
||||
RET(ff_vk_get_pooled_buffer(&fv->s, &fv->tmp_data_pool,
|
||||
&tmp_data_ref,
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
||||
NULL, tmp_data_size,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
|
||||
tmp_data_buf = (FFVkBuffer *)tmp_data_ref->data;
|
||||
ff_vk_exec_add_dep_buf(&fv->s, exec, &tmp_data_ref, 1, 0);
|
||||
|
||||
/* Allocate slice buffer data */
|
||||
if (f->ac == AC_GOLOMB_RICE)
|
||||
@ -368,35 +384,33 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
|
||||
slice_state_size += slice_data_size;
|
||||
slice_state_size = FFALIGN(slice_state_size, 8);
|
||||
|
||||
/* Allocate slice data buffer */
|
||||
slice_data_ref = fv->keyframe_slice_data_ref;
|
||||
if (!slice_data_ref) {
|
||||
/* Allocate slice data buffer */
|
||||
err = ff_vk_get_pooled_buffer(&fv->s, &fv->slice_data_pool,
|
||||
&slice_data_ref,
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
||||
NULL, slice_state_size*f->slice_count,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
||||
if (err < 0)
|
||||
return err;
|
||||
RET(ff_vk_get_pooled_buffer(&fv->s, &fv->slice_data_pool,
|
||||
&slice_data_ref,
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
||||
NULL, slice_state_size*f->slice_count,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
|
||||
|
||||
/* Only save it if we're going to use it again */
|
||||
if (has_inter)
|
||||
fv->keyframe_slice_data_ref = slice_data_ref;
|
||||
}
|
||||
slice_data_buf = (FFVkBuffer *)slice_data_ref->data;
|
||||
ff_vk_exec_add_dep_buf(&fv->s, exec, &slice_data_ref, 1, has_inter);
|
||||
|
||||
/* Allocate results buffer */
|
||||
err = ff_vk_get_pooled_buffer(&fv->s, &fv->results_data_pool,
|
||||
&results_data_ref,
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
||||
NULL, 2*f->slice_count*sizeof(uint64_t),
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
|
||||
if (err < 0)
|
||||
return err;
|
||||
results_data_buf = (FFVkBuffer *)results_data_ref->data;
|
||||
RET(ff_vk_get_pooled_buffer(&fv->s, &fv->results_data_pool,
|
||||
&fd->results_data_ref,
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
||||
NULL, 2*f->slice_count*sizeof(uint64_t),
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
|
||||
results_data_buf = (FFVkBuffer *)fd->results_data_ref->data;
|
||||
ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->results_data_ref, 1, 1);
|
||||
|
||||
/* Output buffer size */
|
||||
maxsize = avctx->width*avctx->height*(1 + f->transparency);
|
||||
@ -414,26 +428,17 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
|
||||
maxsize += FF_INPUT_BUFFER_MIN_SIZE;
|
||||
|
||||
/* Allocate output buffer */
|
||||
err = ff_vk_get_pooled_buffer(&fv->s, &fv->out_data_pool,
|
||||
&out_data_ref,
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
||||
NULL, maxsize,
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
|
||||
if (err < 0)
|
||||
return err;
|
||||
RET(ff_vk_get_pooled_buffer(&fv->s, &fv->out_data_pool,
|
||||
&fd->out_data_ref,
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
||||
NULL, maxsize,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
|
||||
out_data_buf = (FFVkBuffer *)fd->out_data_ref->data;
|
||||
ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->out_data_ref, 1, 1);
|
||||
|
||||
out_data_buf = (FFVkBuffer *)out_data_ref->data;
|
||||
pkt->data = out_data_buf->mapped_mem;
|
||||
pkt->size = out_data_buf->size;
|
||||
pkt->buf = out_data_ref;
|
||||
|
||||
/* Add dependencies */
|
||||
ff_vk_exec_add_dep_buf(&fv->s, exec, &tmp_data_ref, 1, 0);
|
||||
ff_vk_exec_add_dep_buf(&fv->s, exec, &results_data_ref, 1, 0);
|
||||
ff_vk_exec_add_dep_buf(&fv->s, exec, &slice_data_ref, 1, has_inter);
|
||||
ff_vk_exec_add_dep_buf(&fv->s, exec, &out_data_ref, 1, 1);
|
||||
/* Prepare input frame */
|
||||
RET(ff_vk_exec_add_dep_frame(&fv->s, exec, enc_in,
|
||||
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
|
||||
@ -645,6 +650,76 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
|
||||
0, sizeof(pd), &pd);
|
||||
vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1);
|
||||
|
||||
/* Submit */
|
||||
err = ff_vk_exec_submit(&fv->s, exec);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
f->picture_number++;
|
||||
|
||||
/* This, if needed, was referenced by the execution context
|
||||
* as it was declared as a dependency. */
|
||||
av_frame_free(&intermediate_frame);
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
av_frame_free(&intermediate_frame);
|
||||
ff_vk_exec_discard_deps(&fv->s, exec);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int download_slices(AVCodecContext *avctx,
|
||||
VkBufferCopy *buf_regions, int nb_regions,
|
||||
VulkanEncodeFFv1FrameData *fd,
|
||||
AVBufferRef *pkt_data_ref)
|
||||
{
|
||||
int err;
|
||||
VulkanEncodeFFv1Context *fv = avctx->priv_data;
|
||||
FFVulkanFunctions *vk = &fv->s.vkfn;
|
||||
FFVkExecContext *exec;
|
||||
|
||||
FFVkBuffer *out_data_buf = (FFVkBuffer *)fd->out_data_ref->data;
|
||||
FFVkBuffer *pkt_data_buf = (FFVkBuffer *)pkt_data_ref->data;
|
||||
|
||||
VkBufferMemoryBarrier2 buf_bar[8];
|
||||
int nb_buf_bar = 0;
|
||||
|
||||
/* Transfer the slices */
|
||||
exec = ff_vk_exec_get(&fv->s, &fv->transfer_exec_pool);
|
||||
ff_vk_exec_start(&fv->s, exec);
|
||||
|
||||
ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->out_data_ref, 1, 0);
|
||||
fd->out_data_ref = NULL; /* Ownership passed */
|
||||
|
||||
ff_vk_exec_add_dep_buf(&fv->s, exec, &pkt_data_ref, 1, 1);
|
||||
|
||||
/* Ensure the output buffer is finished */
|
||||
buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
|
||||
.srcStageMask = out_data_buf->stage,
|
||||
.dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
|
||||
.srcAccessMask = out_data_buf->access,
|
||||
.dstAccessMask = VK_ACCESS_2_TRANSFER_READ_BIT,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.buffer = out_data_buf->buf,
|
||||
.size = VK_WHOLE_SIZE,
|
||||
.offset = 0,
|
||||
};
|
||||
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
||||
.pBufferMemoryBarriers = buf_bar,
|
||||
.bufferMemoryBarrierCount = nb_buf_bar,
|
||||
});
|
||||
out_data_buf->stage = buf_bar[0].dstStageMask;
|
||||
out_data_buf->access = buf_bar[0].dstAccessMask;
|
||||
nb_buf_bar = 0;
|
||||
|
||||
vk->CmdCopyBuffer(exec->buf,
|
||||
out_data_buf->buf, pkt_data_buf->buf,
|
||||
nb_regions, buf_regions);
|
||||
|
||||
/* Submit */
|
||||
err = ff_vk_exec_submit(&fv->s, exec);
|
||||
if (err < 0)
|
||||
@ -652,68 +727,152 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
|
||||
|
||||
/* We need the encoded data immediately */
|
||||
ff_vk_exec_wait(&fv->s, exec);
|
||||
av_frame_free(&intermediate_frame);
|
||||
|
||||
/* Invalidate slice/output data if needed */
|
||||
if (!(results_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
|
||||
invalidate_data[nb_invalidate_data++] = (VkMappedMemoryRange) {
|
||||
if (!(pkt_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
|
||||
VkMappedMemoryRange invalidate_data = {
|
||||
.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
|
||||
.memory = pkt_data_buf->mem,
|
||||
.offset = 0,
|
||||
.size = VK_WHOLE_SIZE,
|
||||
};
|
||||
vk->InvalidateMappedMemoryRanges(fv->s.hwctx->act_dev,
|
||||
1, &invalidate_data);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_packet(AVCodecContext *avctx, FFVkExecContext *exec,
|
||||
AVPacket *pkt)
|
||||
{
|
||||
int err;
|
||||
VulkanEncodeFFv1Context *fv = avctx->priv_data;
|
||||
FFV1Context *f = &fv->ctx;
|
||||
FFVulkanFunctions *vk = &fv->s.vkfn;
|
||||
|
||||
/* Packet data */
|
||||
AVBufferRef *pkt_data_ref;
|
||||
FFVkBuffer *pkt_data_buf;
|
||||
|
||||
VulkanEncodeFFv1FrameData *fd = exec->opaque;
|
||||
|
||||
FFVkBuffer *results_data_buf = (FFVkBuffer *)fd->results_data_ref->data;
|
||||
uint64_t *sc;
|
||||
|
||||
/* Make sure encoding's done */
|
||||
ff_vk_exec_wait(&fv->s, exec);
|
||||
|
||||
/* Invalidate slice/output data if needed */
|
||||
if (!(results_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
|
||||
VkMappedMemoryRange invalidate_data = {
|
||||
.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
|
||||
.memory = results_data_buf->mem,
|
||||
.offset = 0,
|
||||
.size = VK_WHOLE_SIZE,
|
||||
};
|
||||
if (!(out_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
|
||||
invalidate_data[nb_invalidate_data++] = (VkMappedMemoryRange) {
|
||||
.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
|
||||
.memory = out_data_buf->mem,
|
||||
.offset = 0,
|
||||
.size = VK_WHOLE_SIZE,
|
||||
};
|
||||
if (nb_invalidate_data)
|
||||
vk->InvalidateMappedMemoryRanges(fv->s.hwctx->act_dev,
|
||||
nb_invalidate_data, invalidate_data);
|
||||
|
||||
/* First slice is in-place */
|
||||
buf_p = pkt->data;
|
||||
sc = &((uint64_t *)results_data_buf->mapped_mem)[0];
|
||||
av_log(avctx, AV_LOG_DEBUG, "Slice size = %"PRIu64" (max %i), src offset = %"PRIu64"\n",
|
||||
sc[0], pkt->size / f->slice_count, sc[1]);
|
||||
av_assert0(sc[0] < pd.slice_size_max);
|
||||
av_assert0(sc[0] < (1 << 24));
|
||||
buf_p += sc[0];
|
||||
|
||||
/* We have to copy the rest */
|
||||
for (int i = 1; i < f->slice_count; i++) {
|
||||
uint64_t bytes;
|
||||
uint8_t *bs_start;
|
||||
|
||||
sc = &((uint64_t *)results_data_buf->mapped_mem)[i*2];
|
||||
bytes = sc[0];
|
||||
bs_start = pkt->data + sc[1];
|
||||
|
||||
av_log(avctx, AV_LOG_DEBUG, "Slice %i size = %"PRIu64" (max %"PRIu64"), "
|
||||
"src offset = %"PRIu64"\n",
|
||||
i, bytes, pd.slice_size_max, sc[1]);
|
||||
av_assert0(bytes < pd.slice_size_max);
|
||||
av_assert0(bytes < (1 << 24));
|
||||
|
||||
memmove(buf_p, bs_start, bytes);
|
||||
|
||||
buf_p += bytes;
|
||||
1, &invalidate_data);
|
||||
}
|
||||
|
||||
f->picture_number++;
|
||||
pkt->size = buf_p - pkt->data;
|
||||
pkt->flags |= AV_PKT_FLAG_KEY * f->key_frame;
|
||||
*got_packet = 1;
|
||||
/* Calculate final size */
|
||||
pkt->size = 0;
|
||||
for (int i = 0; i < f->slice_count; i++) {
|
||||
sc = &((uint64_t *)results_data_buf->mapped_mem)[i*2];
|
||||
av_log(avctx, AV_LOG_DEBUG, "Slice %i size = %"PRIu64", "
|
||||
"src offset = %"PRIu64"\n",
|
||||
i, sc[0], sc[1]);
|
||||
|
||||
av_log(avctx, AV_LOG_VERBOSE, "Total data = %i\n",
|
||||
pkt->size);
|
||||
fv->buf_regions[i] = (VkBufferCopy) {
|
||||
.srcOffset = sc[1],
|
||||
.dstOffset = pkt->size,
|
||||
.size = sc[0],
|
||||
};
|
||||
pkt->size += sc[0];
|
||||
}
|
||||
av_log(avctx, AV_LOG_VERBOSE, "Encoded data: %iMiB\n", pkt->size / (1024*1024));
|
||||
av_buffer_unref(&fd->results_data_ref); /* No need for this buffer anymore */
|
||||
|
||||
fail:
|
||||
/* Frames added as a dep are always referenced, so we only need to
|
||||
* clean this up. */
|
||||
av_frame_free(&intermediate_frame);
|
||||
/* Allocate packet buffer */
|
||||
err = ff_vk_get_pooled_buffer(&fv->s, &fv->pkt_data_pool,
|
||||
&pkt_data_ref,
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
NULL, pkt->size,
|
||||
VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
|
||||
if (err < 0)
|
||||
return err;
|
||||
pkt_data_buf = (FFVkBuffer *)pkt_data_ref->data;
|
||||
|
||||
/* Setup packet data */
|
||||
pkt->data = pkt_data_buf->mapped_mem;
|
||||
pkt->buf = pkt_data_ref;
|
||||
|
||||
pkt->pts = fd->pts;
|
||||
pkt->dts = fd->pts;
|
||||
pkt->duration = fd->duration;
|
||||
pkt->flags |= AV_PKT_FLAG_KEY * fd->key_frame;
|
||||
|
||||
if (avctx->flags & AV_CODEC_FLAG_COPY_OPAQUE) {
|
||||
pkt->opaque = fd->frame_opaque;
|
||||
pkt->opaque_ref = fd->frame_opaque_ref;
|
||||
fd->frame_opaque_ref = NULL;
|
||||
}
|
||||
|
||||
return download_slices(avctx, fv->buf_regions, f->slice_count, fd,
|
||||
pkt_data_ref);
|
||||
}
|
||||
|
||||
static int vulkan_encode_ffv1_receive_packet(AVCodecContext *avctx,
|
||||
AVPacket *pkt)
|
||||
{
|
||||
int err;
|
||||
VulkanEncodeFFv1Context *fv = avctx->priv_data;
|
||||
VulkanEncodeFFv1FrameData *fd;
|
||||
FFVkExecContext *exec;
|
||||
AVFrame *frame;
|
||||
|
||||
while (1) {
|
||||
/* Roll an execution context */
|
||||
exec = ff_vk_exec_get(&fv->s, &fv->exec_pool);
|
||||
|
||||
/* If it had a frame, immediately output it */
|
||||
if (exec->had_submission) {
|
||||
exec->had_submission = 0;
|
||||
fv->in_flight--;
|
||||
return get_packet(avctx, exec, pkt);
|
||||
}
|
||||
|
||||
/* Get next frame to encode */
|
||||
frame = fv->frame;
|
||||
err = ff_encode_get_frame(avctx, frame);
|
||||
if (err < 0 && err != AVERROR_EOF) {
|
||||
return err;
|
||||
} else if (err == AVERROR_EOF) {
|
||||
if (!fv->in_flight)
|
||||
return err;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Encode frame */
|
||||
fd = exec->opaque;
|
||||
fd->pts = frame->pts;
|
||||
fd->duration = frame->duration;
|
||||
if (avctx->flags & AV_CODEC_FLAG_COPY_OPAQUE) {
|
||||
fd->frame_opaque = frame->opaque;
|
||||
fd->frame_opaque_ref = frame->opaque_ref;
|
||||
frame->opaque_ref = NULL;
|
||||
}
|
||||
|
||||
err = vulkan_encode_ffv1_submit_frame(avctx, exec, frame);
|
||||
av_frame_unref(frame);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
fv->in_flight++;
|
||||
if (fv->in_flight < fv->async_depth)
|
||||
return AVERROR(EAGAIN);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1441,8 +1600,23 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx)
|
||||
return err;
|
||||
}
|
||||
|
||||
if (!fv->async_depth)
|
||||
fv->async_depth = fv->qf.nb_queues;
|
||||
|
||||
err = ff_vk_exec_pool_init(&fv->s, &fv->qf, &fv->exec_pool,
|
||||
1, /* Single-threaded for now */
|
||||
FFMIN(fv->qf.nb_queues, fv->async_depth),
|
||||
0, 0, 0, NULL);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
err = ff_vk_qf_init(&fv->s, &fv->transfer_qf, VK_QUEUE_TRANSFER_BIT);
|
||||
if (err < 0) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Device has no transfer queues!\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
err = ff_vk_exec_pool_init(&fv->s, &fv->transfer_qf, &fv->transfer_exec_pool,
|
||||
1,
|
||||
0, 0, 0, NULL);
|
||||
if (err < 0)
|
||||
return err;
|
||||
@ -1510,6 +1684,24 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx)
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
/* Temporary frame */
|
||||
fv->frame = av_frame_alloc();
|
||||
if (!fv->frame)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
/* Async data pool */
|
||||
fv->async_depth = fv->exec_pool.pool_size;
|
||||
fv->exec_ctx_info = av_calloc(fv->async_depth, sizeof(*fv->exec_ctx_info));
|
||||
if (!fv->exec_ctx_info)
|
||||
return AVERROR(ENOMEM);
|
||||
for (int i = 0; i < fv->async_depth; i++)
|
||||
fv->exec_pool.contexts[i].opaque = &fv->exec_ctx_info[i];
|
||||
|
||||
f->max_slice_count = f->num_h_slices * f->num_v_slices;
|
||||
fv->buf_regions = av_malloc_array(f->max_slice_count, sizeof(*fv->buf_regions));
|
||||
if (!fv->buf_regions)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1518,17 +1710,29 @@ static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx)
|
||||
VulkanEncodeFFv1Context *fv = avctx->priv_data;
|
||||
|
||||
ff_vk_exec_pool_free(&fv->s, &fv->exec_pool);
|
||||
ff_vk_exec_pool_free(&fv->s, &fv->transfer_exec_pool);
|
||||
|
||||
ff_vk_shader_free(&fv->s, &fv->enc);
|
||||
ff_vk_shader_free(&fv->s, &fv->rct);
|
||||
ff_vk_shader_free(&fv->s, &fv->reset);
|
||||
ff_vk_shader_free(&fv->s, &fv->setup);
|
||||
|
||||
if (fv->exec_ctx_info) {
|
||||
for (int i = 0; i < fv->async_depth; i++) {
|
||||
VulkanEncodeFFv1FrameData *fd = &fv->exec_ctx_info[i];
|
||||
av_buffer_unref(&fd->out_data_ref);
|
||||
av_buffer_unref(&fd->results_data_ref);
|
||||
av_buffer_unref(&fd->frame_opaque_ref);
|
||||
}
|
||||
}
|
||||
av_free(fv->exec_ctx_info);
|
||||
|
||||
av_buffer_unref(&fv->intermediate_frames_ref);
|
||||
|
||||
av_buffer_pool_uninit(&fv->results_data_pool);
|
||||
|
||||
av_buffer_pool_uninit(&fv->out_data_pool);
|
||||
av_buffer_pool_uninit(&fv->pkt_data_pool);
|
||||
av_buffer_pool_uninit(&fv->tmp_data_pool);
|
||||
|
||||
av_buffer_unref(&fv->keyframe_slice_data_ref);
|
||||
@ -1538,6 +1742,8 @@ static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx)
|
||||
ff_vk_free_buf(&fv->s, &fv->rangecoder_static_buf);
|
||||
ff_vk_free_buf(&fv->s, &fv->crc_tab_buf);
|
||||
|
||||
av_free(fv->buf_regions);
|
||||
av_frame_free(&fv->frame);
|
||||
ff_vk_uninit(&fv->s);
|
||||
|
||||
return 0;
|
||||
@ -1567,6 +1773,9 @@ static const AVOption vulkan_encode_ffv1_options[] = {
|
||||
{ "force_pcm", "Code all slices with no prediction", OFFSET(force_pcm), AV_OPT_TYPE_BOOL,
|
||||
{ .i64 = 0 }, 0, 1, VE },
|
||||
|
||||
{ "async_depth", "Internal parallelization depth", OFFSET(async_depth), AV_OPT_TYPE_INT,
|
||||
{ .i64 = 0 }, 0, INT_MAX, VE },
|
||||
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
@ -1594,7 +1803,7 @@ const FFCodec ff_ffv1_vulkan_encoder = {
|
||||
.p.id = AV_CODEC_ID_FFV1,
|
||||
.priv_data_size = sizeof(VulkanEncodeFFv1Context),
|
||||
.init = &vulkan_encode_ffv1_init,
|
||||
FF_CODEC_ENCODE_CB(vulkan_encode_ffv1_frame),
|
||||
FF_CODEC_RECEIVE_PACKET_CB(&vulkan_encode_ffv1_receive_packet),
|
||||
.close = &vulkan_encode_ffv1_close,
|
||||
.p.priv_class = &vulkan_encode_ffv1_class,
|
||||
.p.capabilities = AV_CODEC_CAP_DELAY |
|
||||
|
@ -146,6 +146,7 @@ typedef uint64_t FFVulkanExtensions;
|
||||
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdPipelineBarrier) \
|
||||
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdCopyBufferToImage) \
|
||||
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdCopyImageToBuffer) \
|
||||
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdCopyBuffer) \
|
||||
\
|
||||
/* Buffer */ \
|
||||
MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetBufferMemoryRequirements2) \
|
||||
|
Loading…
Reference in New Issue
Block a user