1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-13 21:28:01 +02:00
FFmpeg/libavcodec/ffv1enc_vulkan.c
Lynne eb536d97a0
ffv1enc_vulkan: support buffers larger than 4GiB
Unlike the software FFv1 encoder, none of our buffers are allocated by
FFmpeg, which supports at most 4GiB large allocations.

For really large sizes, the maximum size of the buffer can exceed 4GiB,
which the software encoder optimistically tries to allocate as 4GiB
in the hopes that the encoder will compress to under that amount.

We can just let Vulkan allocate us a larger buffer, and switch to
64-bit offsets.
2024-11-20 05:23:05 +01:00

1600 lines
58 KiB
C

/*
* Copyright (c) 2024 Lynne <dev@lynne.ee>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/crc.h"
#include "libavutil/vulkan.h"
#include "libavutil/vulkan_spirv.h"
#include "avcodec.h"
#include "internal.h"
#include "hwconfig.h"
#include "encode.h"
#include "libavutil/opt.h"
#include "codec_internal.h"
#include "ffv1.h"
#include "ffv1enc.h"
/* Parallel Golomb alignment */
#define LG_ALIGN_W 32
#define LG_ALIGN_H 32
typedef struct VulkanEncodeFFv1Context {
FFV1Context ctx;
FFVulkanContext s;
FFVkQueueFamilyCtx qf;
FFVkExecPool exec_pool;
FFVulkanShader setup;
FFVulkanShader reset;
FFVulkanShader rct;
FFVulkanShader enc;
/* Constant read-only buffers */
FFVkBuffer quant_buf;
FFVkBuffer rangecoder_static_buf;
FFVkBuffer crc_tab_buf;
/* Slice data buffer pool */
AVBufferPool *slice_data_pool;
AVBufferRef *keyframe_slice_data_ref;
/* Output data buffer */
AVBufferPool *out_data_pool;
/* Temporary data buffer */
AVBufferPool *tmp_data_pool;
/* Slice results buffer */
AVBufferPool *results_data_pool;
/* Intermediate frame pool */
AVBufferRef *intermediate_frames_ref;
/* Representation mode */
enum FFVkShaderRepFormat rep_fmt;
int num_h_slices;
int num_v_slices;
int force_pcm;
int is_rgb;
int ppi;
int chunks;
} VulkanEncodeFFv1Context;
extern const char *ff_source_common_comp;
extern const char *ff_source_rangecoder_comp;
extern const char *ff_source_ffv1_vlc_comp;
extern const char *ff_source_ffv1_common_comp;
extern const char *ff_source_ffv1_reset_comp;
extern const char *ff_source_ffv1_enc_common_comp;
extern const char *ff_source_ffv1_enc_rct_comp;
extern const char *ff_source_ffv1_enc_vlc_comp;
extern const char *ff_source_ffv1_enc_ac_comp;
extern const char *ff_source_ffv1_enc_setup_comp;
extern const char *ff_source_ffv1_enc_comp;
extern const char *ff_source_ffv1_enc_rgb_comp;
typedef struct FFv1VkRCTParameters {
int offset;
uint8_t planar_rgb;
uint8_t transparency;
uint8_t padding[2];
} FFv1VkRCTParameters;
typedef struct FFv1VkResetParameters {
VkDeviceAddress slice_state;
uint32_t plane_state_size;
uint32_t context_count;
uint8_t codec_planes;
uint8_t key_frame;
uint8_t padding[3];
} FFv1VkResetParameters;
typedef struct FFv1VkParameters {
VkDeviceAddress slice_state;
VkDeviceAddress scratch_data;
VkDeviceAddress out_data;
uint64_t slice_size_max;
int32_t sar[2];
uint32_t chroma_shift[2];
uint32_t plane_state_size;
uint32_t context_count;
uint32_t crcref;
uint8_t bits_per_raw_sample;
uint8_t context_model;
uint8_t version;
uint8_t micro_version;
uint8_t force_pcm;
uint8_t key_frame;
uint8_t planes;
uint8_t codec_planes;
uint8_t transparency;
uint8_t colorspace;
uint8_t pic_mode;
uint8_t ec;
uint8_t ppi;
uint8_t chunks;
uint8_t padding[2];
} FFv1VkParameters;
static void add_push_data(FFVulkanShader *shd)
{
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
GLSLC(1, u8buf slice_state; );
GLSLC(1, u8buf scratch_data; );
GLSLC(1, u8buf out_data; );
GLSLC(1, uint64_t slice_size_max; );
GLSLC(0, );
GLSLC(1, ivec2 sar; );
GLSLC(1, uvec2 chroma_shift; );
GLSLC(0, );
GLSLC(1, uint plane_state_size; );
GLSLC(1, uint context_count; );
GLSLC(1, uint32_t crcref; );
GLSLC(0, );
GLSLC(1, uint8_t bits_per_raw_sample; );
GLSLC(1, uint8_t context_model; );
GLSLC(1, uint8_t version; );
GLSLC(1, uint8_t micro_version; );
GLSLC(1, uint8_t force_pcm; );
GLSLC(1, uint8_t key_frame; );
GLSLC(1, uint8_t planes; );
GLSLC(1, uint8_t codec_planes; );
GLSLC(1, uint8_t transparency; );
GLSLC(1, uint8_t colorspace; );
GLSLC(1, uint8_t pic_mode; );
GLSLC(1, uint8_t ec; );
GLSLC(1, uint8_t ppi; );
GLSLC(1, uint8_t chunks; );
GLSLC(1, uint8_t padding[2]; );
GLSLC(0, }; );
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkParameters),
VK_SHADER_STAGE_COMPUTE_BIT);
}
static int run_rct(AVCodecContext *avctx, FFVkExecContext *exec,
AVFrame *enc_in, VkImageView *enc_in_views,
AVFrame **intermediate_frame, VkImageView *intermediate_views,
VkImageMemoryBarrier2 *img_bar, int *nb_img_bar,
VkBufferMemoryBarrier2 *buf_bar, int *nb_buf_bar,
FFVkBuffer *slice_data_buf, uint32_t slice_data_size)
{
int err;
VulkanEncodeFFv1Context *fv = avctx->priv_data;
FFV1Context *f = &fv->ctx;
FFVulkanFunctions *vk = &fv->s.vkfn;
AVHWFramesContext *src_hwfc = (AVHWFramesContext *)enc_in->hw_frames_ctx->data;
FFv1VkRCTParameters pd;
/* Create a temporaty frame */
*intermediate_frame = av_frame_alloc();
if (!(*intermediate_frame))
return AVERROR(ENOMEM);
RET(av_hwframe_get_buffer(fv->intermediate_frames_ref,
*intermediate_frame, 0));
RET(ff_vk_exec_add_dep_frame(&fv->s, exec, *intermediate_frame,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_create_imageviews(&fv->s, exec, intermediate_views,
*intermediate_frame,
fv->rep_fmt));
/* Update descriptors */
ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->rct,
1, 0, 0,
slice_data_buf,
0, slice_data_size*f->slice_count,
VK_FORMAT_UNDEFINED);
ff_vk_shader_update_img_array(&fv->s, exec, &fv->rct,
enc_in, enc_in_views,
1, 1,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
ff_vk_shader_update_img_array(&fv->s, exec, &fv->rct,
*intermediate_frame, intermediate_views,
1, 2,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
ff_vk_frame_barrier(&fv->s, exec, *intermediate_frame, img_bar, nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
/* Prep the input/output images */
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = img_bar,
.imageMemoryBarrierCount = *nb_img_bar,
.pBufferMemoryBarriers = buf_bar,
.bufferMemoryBarrierCount = *nb_buf_bar,
});
*nb_img_bar = 0;
if (*nb_buf_bar) {
slice_data_buf->stage = buf_bar[0].dstStageMask;
slice_data_buf->access = buf_bar[0].dstAccessMask;
*nb_buf_bar = 0;
}
/* Run the shader */
ff_vk_exec_bind_shader(&fv->s, exec, &fv->rct);
pd = (FFv1VkRCTParameters) {
.offset = 1 << f->bits_per_raw_sample,
.planar_rgb = ff_vk_mt_is_np_rgb(src_hwfc->sw_format) &&
(ff_vk_count_images((AVVkFrame *)enc_in->data[0]) > 1),
.transparency = f->transparency,
};
ff_vk_shader_update_push_const(&fv->s, exec, &fv->rct,
VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd), &pd);
vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1);
/* Add a post-dispatch barrier before encoding */
ff_vk_frame_barrier(&fv->s, exec, *intermediate_frame, img_bar, nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
fail:
return err;
}
static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
const AVFrame *pict, int *got_packet)
{
int err;
VulkanEncodeFFv1Context *fv = avctx->priv_data;
FFV1Context *f = &fv->ctx;
FFVulkanFunctions *vk = &fv->s.vkfn;
FFVkExecContext *exec;
FFv1VkParameters pd;
AVFrame *intermediate_frame = NULL;
/* Temporary data */
size_t tmp_data_size;
AVBufferRef *tmp_data_ref;
FFVkBuffer *tmp_data_buf;
/* Slice data */
AVBufferRef *slice_data_ref;
FFVkBuffer *slice_data_buf;
uint32_t plane_state_size;
uint32_t slice_state_size;
uint32_t slice_data_size;
/* Output data */
size_t maxsize;
AVBufferRef *out_data_ref;
FFVkBuffer *out_data_buf;
uint8_t *buf_p;
/* Results data */
AVBufferRef *results_data_ref;
FFVkBuffer *results_data_buf;
uint64_t *sc;
int has_inter = avctx->gop_size > 1;
uint32_t context_count = f->context_count[f->context_model];
VkImageView in_views[AV_NUM_DATA_POINTERS];
VkImageView intermediate_views[AV_NUM_DATA_POINTERS];
AVFrame *enc_in = (AVFrame *)pict;
VkImageView *enc_in_views = in_views;
VkMappedMemoryRange invalidate_data[2];
int nb_invalidate_data = 0;
VkImageMemoryBarrier2 img_bar[37];
int nb_img_bar = 0;
VkBufferMemoryBarrier2 buf_bar[8];
int nb_buf_bar = 0;
if (!pict)
return 0;
exec = ff_vk_exec_get(&fv->s, &fv->exec_pool);
ff_vk_exec_start(&fv->s, exec);
/* Frame state */
f->cur_enc_frame = pict;
if (avctx->gop_size == 0 || f->picture_number % avctx->gop_size == 0) {
av_buffer_unref(&fv->keyframe_slice_data_ref);
f->key_frame = 1;
f->gob_count++;
} else {
f->key_frame = 0;
}
f->max_slice_count = f->num_h_slices * f->num_v_slices;
f->slice_count = f->max_slice_count;
/* Allocate temporary data buffer */
tmp_data_size = f->slice_count*CONTEXT_SIZE;
err = ff_vk_get_pooled_buffer(&fv->s, &fv->tmp_data_pool,
&tmp_data_ref,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
NULL, tmp_data_size,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
if (err < 0)
return err;
tmp_data_buf = (FFVkBuffer *)tmp_data_ref->data;
/* Allocate slice buffer data */
if (f->ac == AC_GOLOMB_RICE)
plane_state_size = 8;
else
plane_state_size = CONTEXT_SIZE;
plane_state_size *= context_count;
slice_state_size = plane_state_size*f->plane_count;
slice_data_size = 256; /* Overestimation for the SliceContext struct */
slice_state_size += slice_data_size;
slice_state_size = FFALIGN(slice_state_size, 8);
slice_data_ref = fv->keyframe_slice_data_ref;
if (!slice_data_ref) {
/* Allocate slice data buffer */
err = ff_vk_get_pooled_buffer(&fv->s, &fv->slice_data_pool,
&slice_data_ref,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
NULL, slice_state_size*f->slice_count,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
if (err < 0)
return err;
/* Only save it if we're going to use it again */
if (has_inter)
fv->keyframe_slice_data_ref = slice_data_ref;
}
slice_data_buf = (FFVkBuffer *)slice_data_ref->data;
/* Allocate results buffer */
err = ff_vk_get_pooled_buffer(&fv->s, &fv->results_data_pool,
&results_data_ref,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
NULL, 2*f->slice_count*sizeof(uint64_t),
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
if (err < 0)
return err;
results_data_buf = (FFVkBuffer *)results_data_ref->data;
/* Output buffer size */
maxsize = avctx->width*avctx->height*(1 + f->transparency);
if (f->chroma_planes)
maxsize += AV_CEIL_RSHIFT(avctx->width, f->chroma_h_shift) *
AV_CEIL_RSHIFT(f->height, f->chroma_v_shift)*2;
maxsize += f->slice_count * 800;
if (f->version > 3) {
maxsize *= f->bits_per_raw_sample + 1;
} else {
maxsize += f->slice_count * 2 * (avctx->width + avctx->height);
maxsize *= 8*(2*f->bits_per_raw_sample + 5);
}
maxsize >>= 3;
maxsize += FF_INPUT_BUFFER_MIN_SIZE;
/* Allocate output buffer */
err = ff_vk_get_pooled_buffer(&fv->s, &fv->out_data_pool,
&out_data_ref,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
NULL, maxsize,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
if (err < 0)
return err;
out_data_buf = (FFVkBuffer *)out_data_ref->data;
pkt->data = out_data_buf->mapped_mem;
pkt->size = out_data_buf->size;
pkt->buf = out_data_ref;
/* Add dependencies */
ff_vk_exec_add_dep_buf(&fv->s, exec, &tmp_data_ref, 1, 0);
ff_vk_exec_add_dep_buf(&fv->s, exec, &results_data_ref, 1, 0);
ff_vk_exec_add_dep_buf(&fv->s, exec, &slice_data_ref, 1, has_inter);
ff_vk_exec_add_dep_buf(&fv->s, exec, &out_data_ref, 1, 1);
RET(ff_vk_exec_add_dep_frame(&fv->s, exec, enc_in,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_create_imageviews(&fv->s, exec, enc_in_views, enc_in,
fv->rep_fmt));
ff_vk_frame_barrier(&fv->s, exec, enc_in, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
/* Setup shader needs the original input */
ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->setup,
1, 0, 0,
slice_data_buf,
0, slice_data_size*f->slice_count,
VK_FORMAT_UNDEFINED);
ff_vk_shader_update_img_array(&fv->s, exec, &fv->setup,
enc_in, enc_in_views,
1, 1,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
/* Add a buffer barrier between previous and current frame */
if (!f->key_frame) {
buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
.srcStageMask = slice_data_buf->stage,
.dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
.srcAccessMask = slice_data_buf->access,
.dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = slice_data_buf->buf,
.size = VK_WHOLE_SIZE,
.offset = 0,
};
}
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = img_bar,
.imageMemoryBarrierCount = nb_img_bar,
.pBufferMemoryBarriers = buf_bar,
.bufferMemoryBarrierCount = nb_buf_bar,
});
nb_img_bar = 0;
if (nb_buf_bar) {
slice_data_buf->stage = buf_bar[0].dstStageMask;
slice_data_buf->access = buf_bar[0].dstAccessMask;
nb_buf_bar = 0;
}
/* Run setup shader */
ff_vk_exec_bind_shader(&fv->s, exec, &fv->setup);
pd = (FFv1VkParameters) {
.slice_state = slice_data_buf->address + f->slice_count*256,
.scratch_data = tmp_data_buf->address,
.out_data = out_data_buf->address,
.slice_size_max = out_data_buf->size / f->slice_count,
.bits_per_raw_sample = f->bits_per_raw_sample,
.sar[0] = pict->sample_aspect_ratio.num,
.sar[1] = pict->sample_aspect_ratio.den,
.chroma_shift[0] = f->chroma_h_shift,
.chroma_shift[1] = f->chroma_v_shift,
.plane_state_size = plane_state_size,
.context_count = context_count,
.crcref = f->crcref,
.context_model = fv->ctx.context_model,
.version = f->version,
.micro_version = f->micro_version,
.force_pcm = fv->force_pcm,
.key_frame = f->key_frame,
.planes = av_pix_fmt_count_planes(avctx->sw_pix_fmt),
.codec_planes = f->plane_count,
.transparency = f->transparency,
.colorspace = f->colorspace,
.pic_mode = !(pict->flags & AV_FRAME_FLAG_INTERLACED) ? 3 :
!(pict->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) ? 2 : 1,
.ec = f->ec,
.ppi = fv->ppi,
.chunks = fv->chunks,
};
ff_vk_shader_update_push_const(&fv->s, exec, &fv->setup,
VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd), &pd);
vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1);
/* Setup shader modified the slice data buffer */
buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
.srcStageMask = slice_data_buf->stage,
.dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
.srcAccessMask = slice_data_buf->access,
.dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = slice_data_buf->buf,
.size = slice_data_size*f->slice_count,
.offset = 0,
};
if (f->key_frame || f->version > 3) {
FFv1VkResetParameters pd_reset;
ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->reset,
1, 0, 0,
slice_data_buf,
0, slice_data_size*f->slice_count,
VK_FORMAT_UNDEFINED);
/* Run setup shader */
ff_vk_exec_bind_shader(&fv->s, exec, &fv->reset);
pd_reset = (FFv1VkResetParameters) {
.slice_state = slice_data_buf->address + f->slice_count*256,
.plane_state_size = plane_state_size,
.context_count = context_count,
.codec_planes = f->plane_count,
.key_frame = f->key_frame,
};
ff_vk_shader_update_push_const(&fv->s, exec, &fv->reset,
VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd_reset), &pd_reset);
/* Sync between setup and reset shaders */
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pBufferMemoryBarriers = buf_bar,
.bufferMemoryBarrierCount = nb_buf_bar,
});
slice_data_buf->stage = buf_bar[0].dstStageMask;
slice_data_buf->access = buf_bar[0].dstAccessMask;
nb_buf_bar = 0;
vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices,
f->plane_count);
}
/* Run RCT shader */
if (fv->is_rgb) {
RET(run_rct(avctx, exec,
enc_in, enc_in_views,
&intermediate_frame, intermediate_views,
img_bar, &nb_img_bar, buf_bar, &nb_buf_bar,
slice_data_buf, slice_data_size));
/* Use the new frame */
enc_in = intermediate_frame;
enc_in_views = intermediate_views;
}
/* If the reset shader ran, insert a barrier now. */
if (f->key_frame || f->version > 3) {
/* Reset shader modified the slice data buffer */
buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
.srcStageMask = slice_data_buf->stage,
.dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
.srcAccessMask = slice_data_buf->access,
.dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = slice_data_buf->buf,
.size = slice_data_buf->size - slice_data_size*f->slice_count,
.offset = slice_data_size*f->slice_count,
};
}
/* Final barrier before encoding */
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = img_bar,
.imageMemoryBarrierCount = nb_img_bar,
.pBufferMemoryBarriers = buf_bar,
.bufferMemoryBarrierCount = nb_buf_bar,
});
nb_img_bar = 0;
if (nb_buf_bar) {
slice_data_buf->stage = buf_bar[0].dstStageMask;
slice_data_buf->access = buf_bar[0].dstAccessMask;
nb_buf_bar = 0;
}
/* Main encode shader */
ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->enc,
1, 0, 0,
slice_data_buf,
0, slice_data_size*f->slice_count,
VK_FORMAT_UNDEFINED);
ff_vk_shader_update_img_array(&fv->s, exec, &fv->enc,
enc_in, enc_in_views,
1, 1,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
ff_vk_shader_update_desc_buffer(&fv->s, exec,
&fv->enc, 1, 2, 0,
results_data_buf,
0, results_data_buf->size,
VK_FORMAT_UNDEFINED);
ff_vk_exec_bind_shader(&fv->s, exec, &fv->enc);
ff_vk_shader_update_push_const(&fv->s, exec, &fv->enc,
VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd), &pd);
vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1);
/* Submit */
err = ff_vk_exec_submit(&fv->s, exec);
if (err < 0)
return err;
/* We need the encoded data immediately */
ff_vk_exec_wait(&fv->s, exec);
av_frame_free(&intermediate_frame);
/* Invalidate slice/output data if needed */
if (!(results_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
invalidate_data[nb_invalidate_data++] = (VkMappedMemoryRange) {
.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
.memory = results_data_buf->mem,
.offset = 0,
.size = VK_WHOLE_SIZE,
};
if (!(out_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
invalidate_data[nb_invalidate_data++] = (VkMappedMemoryRange) {
.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
.memory = out_data_buf->mem,
.offset = 0,
.size = VK_WHOLE_SIZE,
};
if (nb_invalidate_data)
vk->InvalidateMappedMemoryRanges(fv->s.hwctx->act_dev,
nb_invalidate_data, invalidate_data);
/* First slice is in-place */
buf_p = pkt->data;
sc = &((uint64_t *)results_data_buf->mapped_mem)[0];
av_log(avctx, AV_LOG_DEBUG, "Slice size = %"PRIu64" (max %i), src offset = %"PRIu64"\n",
sc[0], pkt->size / f->slice_count, sc[1]);
av_assert0(sc[0] < pd.slice_size_max);
av_assert0(sc[0] < (1 << 24));
buf_p += sc[0];
/* We have to copy the rest */
for (int i = 1; i < f->slice_count; i++) {
uint64_t bytes;
uint8_t *bs_start;
sc = &((uint64_t *)results_data_buf->mapped_mem)[i*2];
bytes = sc[0];
bs_start = pkt->data + sc[1];
av_log(avctx, AV_LOG_DEBUG, "Slice %i size = %"PRIu64" (max %"PRIu64"), "
"src offset = %"PRIu64"\n",
i, bytes, pd.slice_size_max, sc[1]);
av_assert0(bytes < pd.slice_size_max);
av_assert0(bytes < (1 << 24));
memmove(buf_p, bs_start, bytes);
buf_p += bytes;
}
f->picture_number++;
pkt->size = buf_p - pkt->data;
pkt->flags |= AV_PKT_FLAG_KEY * f->key_frame;
*got_packet = 1;
av_log(avctx, AV_LOG_VERBOSE, "Total data = %i\n",
pkt->size);
fail:
/* Frames added as a dep are always referenced, so we only need to
* clean this up. */
av_frame_free(&intermediate_frame);
return 0;
}
static int init_indirect(AVCodecContext *avctx, enum AVPixelFormat sw_format)
{
int err;
VulkanEncodeFFv1Context *fv = avctx->priv_data;
AVHWFramesContext *frames_ctx;
AVVulkanFramesContext *vk_frames;
fv->intermediate_frames_ref = av_hwframe_ctx_alloc(fv->s.device_ref);
if (!fv->intermediate_frames_ref)
return AVERROR(ENOMEM);
frames_ctx = (AVHWFramesContext *)fv->intermediate_frames_ref->data;
frames_ctx->format = AV_PIX_FMT_VULKAN;
frames_ctx->sw_format = sw_format;
frames_ctx->width = FFALIGN(fv->s.frames->width, 32);
frames_ctx->height = FFALIGN(fv->s.frames->height, 32);
vk_frames = frames_ctx->hwctx;
vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL;
vk_frames->usage = VK_IMAGE_USAGE_STORAGE_BIT;
vk_frames->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
err = av_hwframe_ctx_init(fv->intermediate_frames_ref);
if (err < 0) {
av_log(avctx, AV_LOG_ERROR, "Unable to initialize frame pool with format %s: %s\n",
av_get_pix_fmt_name(sw_format), av_err2str(err));
av_buffer_unref(&fv->intermediate_frames_ref);
return err;
}
return 0;
}
static int check_support(AVHWFramesConstraints *constraints,
enum AVPixelFormat fmt)
{
for (int i = 0; constraints->valid_sw_formats[i]; i++) {
if (constraints->valid_sw_formats[i] == fmt)
return 1;
}
return 0;
}
static enum AVPixelFormat get_supported_rgb_buffer_fmt(AVCodecContext *avctx)
{
VulkanEncodeFFv1Context *fv = avctx->priv_data;
enum AVPixelFormat fmt;
AVHWFramesConstraints *constraints;
constraints = av_hwdevice_get_hwframe_constraints(fv->s.device_ref,
NULL);
/* What we'd like to optimally have */
fmt = fv->ctx.use32bit ?
(fv->ctx.transparency ? AV_PIX_FMT_RGBA128 : AV_PIX_FMT_RGB96) :
(fv->ctx.transparency ? AV_PIX_FMT_RGBA64 : AV_PIX_FMT_RGB48);
if (check_support(constraints, fmt))
goto end;
if (fv->ctx.use32bit) {
if (check_support(constraints, (fmt = AV_PIX_FMT_RGBA128)))
goto end;
} else {
if (check_support(constraints, (fmt = AV_PIX_FMT_RGBA64)))
goto end;
if (!fv->ctx.transparency &&
check_support(constraints, (fmt = AV_PIX_FMT_RGB96)))
goto end;
if (check_support(constraints, (fmt = AV_PIX_FMT_RGBA128)))
goto end;
}
fmt = AV_PIX_FMT_NONE;
end:
av_hwframe_constraints_free(&constraints);
return fmt;
}
static void define_shared_code(AVCodecContext *avctx, FFVulkanShader *shd)
{
VulkanEncodeFFv1Context *fv = avctx->priv_data;
FFV1Context *f = &fv->ctx;
int smp_bits = fv->ctx.use32bit ? 32 : 16;
av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n" ,CONTEXT_SIZE);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n" ,MAX_QUANT_TABLE_MASK);
if (f->ac == AC_GOLOMB_RICE) {
av_bprintf(&shd->src, "#define PB_UNALIGNED\n" );
av_bprintf(&shd->src, "#define GOLOMB\n" );
}
GLSLF(0, #define TYPE int%i_t ,smp_bits);
GLSLF(0, #define VTYPE2 i%ivec2 ,smp_bits);
GLSLF(0, #define VTYPE3 i%ivec3 ,smp_bits);
GLSLD(ff_source_common_comp);
GLSLD(ff_source_rangecoder_comp);
if (f->ac == AC_GOLOMB_RICE)
GLSLD(ff_source_ffv1_vlc_comp);
GLSLD(ff_source_ffv1_common_comp);
}
static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
{
int err;
VulkanEncodeFFv1Context *fv = avctx->priv_data;
FFVulkanShader *shd = &fv->setup;
FFVulkanDescriptorSetBinding *desc_set;
uint8_t *spv_data;
size_t spv_len;
void *spv_opaque = NULL;
RET(ff_vk_shader_init(&fv->s, shd, "ffv1_setup",
VK_SHADER_STAGE_COMPUTE_BIT,
(const char *[]) { "GL_EXT_buffer_reference",
"GL_EXT_buffer_reference2" }, 2,
1, 1, 1,
0));
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
desc_set = (FFVulkanDescriptorSetBinding []) {
{
.name = "rangecoder_static_buf",
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_layout = "scalar",
.buf_content = "uint8_t zero_one_state[512];",
},
{ /* This descriptor is never used */
.name = "quant_buf",
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_layout = "scalar",
.buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
"[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
},
};
RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 1, 0));
define_shared_code(avctx, shd);
desc_set = (FFVulkanDescriptorSetBinding []) {
{
.name = "slice_data_buf",
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.buf_content = "SliceContext slice_ctx[1024];",
},
{
.name = "src",
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.dimensions = 2,
.mem_layout = ff_vk_shader_rep_fmt(fv->s.frames->sw_format,
fv->rep_fmt),
.elems = av_pix_fmt_count_planes(fv->s.frames->sw_format),
.mem_quali = "readonly",
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
};
RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 0, 0));
add_push_data(shd);
GLSLD(ff_source_ffv1_enc_setup_comp);
RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main",
&spv_opaque));
RET(ff_vk_shader_link(&fv->s, shd, spv_data, spv_len, "main"));
RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd));
fail:
if (spv_opaque)
spv->free_shader(spv, &spv_opaque);
return err;
}
static int init_reset_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
{
int err;
VulkanEncodeFFv1Context *fv = avctx->priv_data;
FFVulkanShader *shd = &fv->reset;
FFVulkanDescriptorSetBinding *desc_set;
uint8_t *spv_data;
size_t spv_len;
void *spv_opaque = NULL;
int wg_dim = FFMIN(fv->s.props.properties.limits.maxComputeWorkGroupSize[0], 1024);
RET(ff_vk_shader_init(&fv->s, shd, "ffv1_reset",
VK_SHADER_STAGE_COMPUTE_BIT,
(const char *[]) { "GL_EXT_buffer_reference",
"GL_EXT_buffer_reference2" }, 2,
wg_dim, 1, 1,
0));
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
desc_set = (FFVulkanDescriptorSetBinding []) {
{
.name = "rangecoder_static_buf",
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_layout = "scalar",
.buf_content = "uint8_t zero_one_state[512];",
},
{
.name = "quant_buf",
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_layout = "scalar",
.buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
"[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
},
};
RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 1, 0));
define_shared_code(avctx, shd);
desc_set = (FFVulkanDescriptorSetBinding []) {
{
.name = "slice_data_buf",
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.mem_quali = "readonly",
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.buf_content = "SliceContext slice_ctx[1024];",
},
};
RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 1, 0, 0));
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
GLSLC(1, u8buf slice_state; );
GLSLC(1, uint plane_state_size; );
GLSLC(1, uint context_count; );
GLSLC(1, uint8_t codec_planes; );
GLSLC(1, uint8_t key_frame; );
GLSLC(1, uint8_t padding[3]; );
GLSLC(0, }; );
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters),
VK_SHADER_STAGE_COMPUTE_BIT);
GLSLD(ff_source_ffv1_reset_comp);
RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main",
&spv_opaque));
RET(ff_vk_shader_link(&fv->s, shd, spv_data, spv_len, "main"));
RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd));
fail:
if (spv_opaque)
spv->free_shader(spv, &spv_opaque);
return err;
}
static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
{
int err;
VulkanEncodeFFv1Context *fv = avctx->priv_data;
FFVulkanShader *shd = &fv->rct;
FFVulkanDescriptorSetBinding *desc_set;
uint8_t *spv_data;
size_t spv_len;
void *spv_opaque = NULL;
int wg_count = sqrt(fv->s.props.properties.limits.maxComputeWorkGroupInvocations);
enum AVPixelFormat intermediate_fmt = get_supported_rgb_buffer_fmt(avctx);
if (intermediate_fmt == AV_PIX_FMT_NONE) {
av_log(avctx, AV_LOG_ERROR, "Unable to find a supported compatible "
"pixel format for RCT buffer!\n");
return AVERROR(ENOTSUP);
}
RET(init_indirect(avctx, intermediate_fmt));
RET(ff_vk_shader_init(&fv->s, shd, "ffv1_rct",
VK_SHADER_STAGE_COMPUTE_BIT,
(const char *[]) { "GL_EXT_buffer_reference",
"GL_EXT_buffer_reference2" }, 2,
wg_count, wg_count, 1,
0));
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
desc_set = (FFVulkanDescriptorSetBinding []) {
{
.name = "rangecoder_static_buf",
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_layout = "scalar",
.buf_content = "uint8_t zero_one_state[512];",
},
{
.name = "quant_buf",
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_layout = "scalar",
.buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
"[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
},
};
RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 1, 0));
define_shared_code(avctx, shd);
desc_set = (FFVulkanDescriptorSetBinding []) {
{
.name = "slice_data_buf",
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.mem_quali = "readonly",
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.buf_content = "SliceContext slice_ctx[1024];",
},
{
.name = "src",
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.dimensions = 2,
.mem_layout = ff_vk_shader_rep_fmt(fv->s.frames->sw_format,
fv->rep_fmt),
.elems = av_pix_fmt_count_planes(fv->s.frames->sw_format),
.mem_quali = "readonly",
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
{
.name = "dst",
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.dimensions = 2,
.mem_layout = ff_vk_shader_rep_fmt(intermediate_fmt,
fv->rep_fmt),
.elems = av_pix_fmt_count_planes(intermediate_fmt),
.mem_quali = "writeonly",
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
};
RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0));
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
GLSLC(1, int offset; );
GLSLC(1, uint8_t planar_rgb; );
GLSLC(1, uint8_t transparency; );
GLSLC(1, uint8_t padding[2]; );
GLSLC(0, }; );
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkRCTParameters),
VK_SHADER_STAGE_COMPUTE_BIT);
GLSLD(ff_source_ffv1_enc_rct_comp);
RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main",
&spv_opaque));
RET(ff_vk_shader_link(&fv->s, shd, spv_data, spv_len, "main"));
RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd));
fail:
if (spv_opaque)
spv->free_shader(spv, &spv_opaque);
return err;
}
static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
{
int err;
VulkanEncodeFFv1Context *fv = avctx->priv_data;
FFV1Context *f = &fv->ctx;
FFVulkanShader *shd = &fv->enc;
FFVulkanDescriptorSetBinding *desc_set;
AVHWFramesContext *frames_ctx = fv->intermediate_frames_ref ?
(AVHWFramesContext *)fv->intermediate_frames_ref->data :
fv->s.frames;
uint8_t *spv_data;
size_t spv_len;
void *spv_opaque = NULL;
RET(ff_vk_shader_init(&fv->s, shd, "ffv1_enc",
VK_SHADER_STAGE_COMPUTE_BIT,
(const char *[]) { "GL_EXT_buffer_reference",
"GL_EXT_buffer_reference2" }, 2,
1, 1, 1,
0));
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
desc_set = (FFVulkanDescriptorSetBinding []) {
{
.name = "rangecoder_static_buf",
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_layout = "scalar",
.buf_content = "uint8_t zero_one_state[512];",
},
{
.name = "quant_buf",
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_layout = "scalar",
.buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
"[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
},
{
.name = "crc_ieee_buf",
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_layout = "scalar",
.buf_content = "uint32_t crc_ieee[256];",
},
};
RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 1, 0));
define_shared_code(avctx, shd);
desc_set = (FFVulkanDescriptorSetBinding []) {
{
.name = "slice_data_buf",
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.buf_content = "SliceContext slice_ctx[1024];",
},
{
.name = "src",
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.dimensions = 2,
.mem_layout = ff_vk_shader_rep_fmt(frames_ctx->sw_format,
fv->rep_fmt),
.elems = av_pix_fmt_count_planes(frames_ctx->sw_format),
.mem_quali = "readonly",
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
{
.name = "results_data_buf",
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_quali = "writeonly",
.buf_content = "uint64_t slice_results[2048];",
},
};
RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0));
add_push_data(shd);
/* Assemble the shader body */
GLSLD(ff_source_ffv1_enc_common_comp);
if (f->ac == AC_GOLOMB_RICE)
GLSLD(ff_source_ffv1_enc_vlc_comp);
else
GLSLD(ff_source_ffv1_enc_ac_comp);
if (fv->is_rgb)
GLSLD(ff_source_ffv1_enc_rgb_comp);
else
GLSLD(ff_source_ffv1_enc_comp);
RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main",
&spv_opaque));
RET(ff_vk_shader_link(&fv->s, shd, spv_data, spv_len, "main"));
RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd));
fail:
if (spv_opaque)
spv->free_shader(spv, &spv_opaque);
return err;
}
static int init_state_transition_data(AVCodecContext *avctx)
{
int err;
VulkanEncodeFFv1Context *fv = avctx->priv_data;
uint8_t *buf_mapped;
size_t buf_len = 512*sizeof(uint8_t);
RET(ff_vk_create_buf(&fv->s, &fv->rangecoder_static_buf,
buf_len,
NULL, NULL,
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
RET(ff_vk_map_buffer(&fv->s, &fv->rangecoder_static_buf,
&buf_mapped, 0));
for (int i = 1; i < 256; i++) {
buf_mapped[256 + i] = fv->ctx.state_transition[i];
buf_mapped[256 - i] = 256 - (int)fv->ctx.state_transition[i];
}
RET(ff_vk_unmap_buffer(&fv->s, &fv->rangecoder_static_buf, 1));
/* Update descriptors */
RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
&fv->setup, 0, 0, 0,
&fv->rangecoder_static_buf,
0, fv->rangecoder_static_buf.size,
VK_FORMAT_UNDEFINED));
RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
&fv->enc, 0, 0, 0,
&fv->rangecoder_static_buf,
0, fv->rangecoder_static_buf.size,
VK_FORMAT_UNDEFINED));
fail:
return err;
}
static int init_quant_table_data(AVCodecContext *avctx)
{
int err;
VulkanEncodeFFv1Context *fv = avctx->priv_data;
int16_t *buf_mapped;
size_t buf_len = MAX_QUANT_TABLES*
MAX_CONTEXT_INPUTS*
MAX_QUANT_TABLE_SIZE*sizeof(int16_t);
RET(ff_vk_create_buf(&fv->s, &fv->quant_buf,
buf_len,
NULL, NULL,
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
RET(ff_vk_map_buffer(&fv->s, &fv->quant_buf, (void *)&buf_mapped, 0));
memcpy(buf_mapped, fv->ctx.quant_tables,
sizeof(fv->ctx.quant_tables));
RET(ff_vk_unmap_buffer(&fv->s, &fv->quant_buf, 1));
RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
&fv->enc, 0, 1, 0,
&fv->quant_buf,
0, fv->quant_buf.size,
VK_FORMAT_UNDEFINED));
fail:
return err;
}
static int init_crc_table_data(AVCodecContext *avctx)
{
int err;
VulkanEncodeFFv1Context *fv = avctx->priv_data;
uint32_t *buf_mapped;
size_t buf_len = 256*sizeof(int32_t);
RET(ff_vk_create_buf(&fv->s, &fv->crc_tab_buf,
buf_len,
NULL, NULL,
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
RET(ff_vk_map_buffer(&fv->s, &fv->crc_tab_buf, (void *)&buf_mapped, 0));
memcpy(buf_mapped, av_crc_get_table(AV_CRC_32_IEEE), buf_len);
RET(ff_vk_unmap_buffer(&fv->s, &fv->crc_tab_buf, 1));
RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
&fv->enc, 0, 2, 0,
&fv->crc_tab_buf,
0, fv->crc_tab_buf.size,
VK_FORMAT_UNDEFINED));
fail:
return err;
}
static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx)
{
int err;
VulkanEncodeFFv1Context *fv = avctx->priv_data;
FFV1Context *f = &fv->ctx;
FFVkSPIRVCompiler *spv;
if ((err = ff_ffv1_common_init(avctx)) < 0)
return err;
if (f->ac == 1)
f->ac = AC_RANGE_CUSTOM_TAB;
err = ff_ffv1_encode_setup_plane_info(avctx, avctx->sw_pix_fmt);
if (err < 0)
return err;
/* Target version 3 by default */
f->version = 3;
err = ff_ffv1_encode_init(avctx);
if (err < 0)
return err;
/* Rice coding did not support high bit depths */
if (f->bits_per_raw_sample > (f->version > 3 ? 16 : 8)) {
if (f->ac == AC_GOLOMB_RICE) {
av_log(avctx, AV_LOG_WARNING, "bits_per_raw_sample > 8, "
"forcing range coder\n");
f->ac = AC_RANGE_CUSTOM_TAB;
}
}
if (f->version < 4 && avctx->gop_size > 1) {
av_log(avctx, AV_LOG_ERROR, "Using inter frames requires version 4 (-level 4)\n");
return AVERROR_INVALIDDATA;
}
if (f->version == 4 && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
av_log(avctx, AV_LOG_ERROR, "Version 4 is experimental and requires -strict -2\n");
return AVERROR_INVALIDDATA;
}
//if (fv->ctx.ac == AC_GOLOMB_RICE) {
if (0) {
int w_a = FFALIGN(avctx->width, LG_ALIGN_W);
int h_a = FFALIGN(avctx->height, LG_ALIGN_H);
int w_sl, h_sl;
/* Pixels per line an invocation handles */
int ppi = 0;
/* Chunk size */
int chunks = 0;
do {
if (ppi < 2)
ppi++;
chunks++;
w_sl = w_a / (LG_ALIGN_W*ppi);
h_sl = h_a / (LG_ALIGN_H*chunks);
} while (w_sl > MAX_SLICES / h_sl);
av_log(avctx, AV_LOG_VERBOSE, "Slice config: %ix%i, %i total\n",
LG_ALIGN_W*ppi, LG_ALIGN_H*chunks, w_sl*h_sl);
av_log(avctx, AV_LOG_VERBOSE, "Horizontal slices: %i (%i pixels per invoc)\n",
w_sl, ppi);
av_log(avctx, AV_LOG_VERBOSE, "Vertical slices: %i (%i chunks)\n",
h_sl, chunks);
f->num_h_slices = w_sl;
f->num_v_slices = h_sl;
fv->ppi = ppi;
fv->chunks = chunks;
} else {
f->num_h_slices = fv->num_h_slices;
f->num_v_slices = fv->num_v_slices;
if (f->num_h_slices <= 0)
f->num_h_slices = 32;
if (f->num_v_slices <= 0)
f->num_v_slices = 32;
f->num_h_slices = FFMIN(f->num_h_slices, avctx->width);
f->num_v_slices = FFMIN(f->num_v_slices, avctx->height);
}
if ((err = ff_ffv1_write_extradata(avctx)) < 0)
return err;
if (f->version < 4) {
if (((f->chroma_h_shift > 0) && (avctx->width % (64 << f->chroma_h_shift))) ||
((f->chroma_v_shift > 0) && (avctx->height % (64 << f->chroma_v_shift)))) {
av_log(avctx, AV_LOG_ERROR, "Encoding frames with subsampling and unaligned "
"dimensions is only supported in version 4 (-level 4)\n");
return AVERROR_PATCHWELCOME;
}
}
if (fv->force_pcm) {
if (f->version < 4) {
av_log(avctx, AV_LOG_ERROR, "PCM coding only supported by version 4 (-level 4)\n");
return AVERROR_INVALIDDATA;
} else if (f->ac != AC_RANGE_CUSTOM_TAB) {
av_log(avctx, AV_LOG_ERROR, "PCM coding requires range coding\n");
return AVERROR_INVALIDDATA;
}
}
/* Init Vulkan */
err = ff_vk_init(&fv->s, avctx, NULL, avctx->hw_frames_ctx);
if (err < 0)
return err;
err = ff_vk_qf_init(&fv->s, &fv->qf, VK_QUEUE_COMPUTE_BIT);
if (err < 0) {
av_log(avctx, AV_LOG_ERROR, "Device has no compute queues!\n");
return err;
}
err = ff_vk_exec_pool_init(&fv->s, &fv->qf, &fv->exec_pool,
1, /* Single-threaded for now */
0, 0, 0, NULL);
if (err < 0)
return err;
spv = ff_vk_spirv_init();
if (!spv) {
av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
return AVERROR_EXTERNAL;
}
/* Detect the special RGB coding mode */
fv->is_rgb = !(f->colorspace == 0 && avctx->sw_pix_fmt != AV_PIX_FMT_YA8) &&
!(avctx->sw_pix_fmt == AV_PIX_FMT_YA8);
/* bits_per_raw_sample use regular unsigned representation,
* but in higher bit depths, the data is casted to int16_t */
fv->rep_fmt = FF_VK_REP_UINT;
if (!fv->is_rgb && f->bits_per_raw_sample > 8)
fv->rep_fmt = FF_VK_REP_INT;
/* Init setup shader */
err = init_setup_shader(avctx, spv);
if (err < 0) {
spv->uninit(&spv);
return err;
}
/* Init reset shader */
err = init_reset_shader(avctx, spv);
if (err < 0) {
spv->uninit(&spv);
return err;
}
/* Init RCT shader */
if (fv->is_rgb) {
err = init_rct_shader(avctx, spv);
if (err < 0) {
spv->uninit(&spv);
return err;
}
}
/* Encode shader */
err = init_encode_shader(avctx, spv);
if (err < 0) {
spv->uninit(&spv);
return err;
}
spv->uninit(&spv);
/* Range coder data */
err = init_state_transition_data(avctx);
if (err < 0)
return err;
/* Quantization table data */
err = init_quant_table_data(avctx);
if (err < 0)
return err;
/* CRC table buffer */
err = init_crc_table_data(avctx);
if (err < 0)
return err;
return 0;
}
static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx)
{
VulkanEncodeFFv1Context *fv = avctx->priv_data;
ff_vk_exec_pool_free(&fv->s, &fv->exec_pool);
ff_vk_shader_free(&fv->s, &fv->enc);
ff_vk_shader_free(&fv->s, &fv->rct);
ff_vk_shader_free(&fv->s, &fv->reset);
ff_vk_shader_free(&fv->s, &fv->setup);
av_buffer_unref(&fv->intermediate_frames_ref);
av_buffer_pool_uninit(&fv->results_data_pool);
av_buffer_pool_uninit(&fv->out_data_pool);
av_buffer_pool_uninit(&fv->tmp_data_pool);
av_buffer_unref(&fv->keyframe_slice_data_ref);
av_buffer_pool_uninit(&fv->slice_data_pool);
ff_vk_free_buf(&fv->s, &fv->quant_buf);
ff_vk_free_buf(&fv->s, &fv->rangecoder_static_buf);
ff_vk_free_buf(&fv->s, &fv->crc_tab_buf);
ff_vk_uninit(&fv->s);
return 0;
}
#define OFFSET(x) offsetof(VulkanEncodeFFv1Context, x)
#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
static const AVOption vulkan_encode_ffv1_options[] = {
{ "slicecrc", "Protect slices with CRCs", OFFSET(ctx.ec), AV_OPT_TYPE_BOOL,
{ .i64 = -1 }, -1, 1, VE },
{ "context", "Context model", OFFSET(ctx.context_model), AV_OPT_TYPE_INT,
{ .i64 = 0 }, 0, 1, VE },
{ "coder", "Coder type", OFFSET(ctx.ac), AV_OPT_TYPE_INT,
{ .i64 = AC_RANGE_CUSTOM_TAB }, -2, 2, VE, .unit = "coder" },
{ "rice", "Golomb rice", 0, AV_OPT_TYPE_CONST,
{ .i64 = AC_GOLOMB_RICE }, INT_MIN, INT_MAX, VE, .unit = "coder" },
{ "range_tab", "Range with custom table", 0, AV_OPT_TYPE_CONST,
{ .i64 = AC_RANGE_CUSTOM_TAB }, INT_MIN, INT_MAX, VE, .unit = "coder" },
{ "qtable", "Quantization table", OFFSET(ctx.qtable), AV_OPT_TYPE_INT,
{ .i64 = -1 }, -1, 2, VE },
{ "slices_h", "Number of horizontal slices", OFFSET(num_h_slices), AV_OPT_TYPE_INT,
{ .i64 = -1 }, -1, 32, VE },
{ "slices_v", "Number of vertical slices", OFFSET(num_v_slices), AV_OPT_TYPE_INT,
{ .i64 = -1 }, -1, 32, VE },
{ "force_pcm", "Code all slices with no prediction", OFFSET(force_pcm), AV_OPT_TYPE_BOOL,
{ .i64 = 0 }, 0, 1, VE },
{ NULL }
};
static const FFCodecDefault vulkan_encode_ffv1_defaults[] = {
{ "g", "1" },
{ NULL },
};
static const AVClass vulkan_encode_ffv1_class = {
.class_name = "ffv1_vulkan",
.item_name = av_default_item_name,
.option = vulkan_encode_ffv1_options,
.version = LIBAVUTIL_VERSION_INT,
};
const AVCodecHWConfigInternal *const vulkan_encode_ffv1_hw_configs[] = {
HW_CONFIG_ENCODER_FRAMES(VULKAN, VULKAN),
NULL,
};
const FFCodec ff_ffv1_vulkan_encoder = {
.p.name = "ffv1_vulkan",
CODEC_LONG_NAME("FFmpeg video codec #1 (Vulkan)"),
.p.type = AVMEDIA_TYPE_VIDEO,
.p.id = AV_CODEC_ID_FFV1,
.priv_data_size = sizeof(VulkanEncodeFFv1Context),
.init = &vulkan_encode_ffv1_init,
FF_CODEC_ENCODE_CB(vulkan_encode_ffv1_frame),
.close = &vulkan_encode_ffv1_close,
.p.priv_class = &vulkan_encode_ffv1_class,
.p.capabilities = AV_CODEC_CAP_DELAY |
AV_CODEC_CAP_HARDWARE |
AV_CODEC_CAP_DR1 |
AV_CODEC_CAP_ENCODER_FLUSH |
AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
.caps_internal = FF_CODEC_CAP_INIT_CLEANUP | FF_CODEC_CAP_EOF_FLUSH,
.defaults = vulkan_encode_ffv1_defaults,
.p.pix_fmts = (const enum AVPixelFormat[]) {
AV_PIX_FMT_VULKAN,
AV_PIX_FMT_NONE,
},
.hw_configs = vulkan_encode_ffv1_hw_configs,
.p.wrapper_name = "vulkan",
};