You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-04 22:03:09 +02:00
ffv1enc_vulkan: switch to 2-line cache, unify prediction code
This commit is contained in:
@ -37,6 +37,9 @@
|
|||||||
#define LG_ALIGN_W 32
|
#define LG_ALIGN_W 32
|
||||||
#define LG_ALIGN_H 32
|
#define LG_ALIGN_H 32
|
||||||
|
|
||||||
|
/* Unlike the decoder, we need 4 lines (but really only 3) */
|
||||||
|
#define RGB_LINECACHE 4
|
||||||
|
|
||||||
typedef struct VulkanEncodeFFv1FrameData {
|
typedef struct VulkanEncodeFFv1FrameData {
|
||||||
/* Output data */
|
/* Output data */
|
||||||
AVBufferRef *out_data_ref;
|
AVBufferRef *out_data_ref;
|
||||||
@ -72,7 +75,6 @@ typedef struct VulkanEncodeFFv1Context {
|
|||||||
|
|
||||||
FFVulkanShader setup;
|
FFVulkanShader setup;
|
||||||
FFVulkanShader reset;
|
FFVulkanShader reset;
|
||||||
FFVulkanShader rct;
|
|
||||||
FFVulkanShader enc;
|
FFVulkanShader enc;
|
||||||
|
|
||||||
/* Constant read-only buffers */
|
/* Constant read-only buffers */
|
||||||
@ -111,7 +113,6 @@ extern const char *ff_source_rangecoder_comp;
|
|||||||
extern const char *ff_source_ffv1_vlc_comp;
|
extern const char *ff_source_ffv1_vlc_comp;
|
||||||
extern const char *ff_source_ffv1_common_comp;
|
extern const char *ff_source_ffv1_common_comp;
|
||||||
extern const char *ff_source_ffv1_reset_comp;
|
extern const char *ff_source_ffv1_reset_comp;
|
||||||
extern const char *ff_source_ffv1_enc_rct_comp;
|
|
||||||
extern const char *ff_source_ffv1_enc_setup_comp;
|
extern const char *ff_source_ffv1_enc_setup_comp;
|
||||||
extern const char *ff_source_ffv1_enc_comp;
|
extern const char *ff_source_ffv1_enc_comp;
|
||||||
|
|
||||||
@ -120,6 +121,7 @@ typedef struct FFv1VkParameters {
|
|||||||
VkDeviceAddress scratch_data;
|
VkDeviceAddress scratch_data;
|
||||||
VkDeviceAddress out_data;
|
VkDeviceAddress out_data;
|
||||||
|
|
||||||
|
int32_t fmt_lut[4];
|
||||||
int32_t sar[2];
|
int32_t sar[2];
|
||||||
uint32_t chroma_shift[2];
|
uint32_t chroma_shift[2];
|
||||||
|
|
||||||
@ -127,7 +129,9 @@ typedef struct FFv1VkParameters {
|
|||||||
uint32_t context_count;
|
uint32_t context_count;
|
||||||
uint32_t crcref;
|
uint32_t crcref;
|
||||||
uint32_t slice_size_max;
|
uint32_t slice_size_max;
|
||||||
|
int rct_offset;
|
||||||
|
|
||||||
|
uint8_t extend_lookup[8];
|
||||||
uint8_t bits_per_raw_sample;
|
uint8_t bits_per_raw_sample;
|
||||||
uint8_t context_model;
|
uint8_t context_model;
|
||||||
uint8_t version;
|
uint8_t version;
|
||||||
@ -137,13 +141,14 @@ typedef struct FFv1VkParameters {
|
|||||||
uint8_t components;
|
uint8_t components;
|
||||||
uint8_t planes;
|
uint8_t planes;
|
||||||
uint8_t codec_planes;
|
uint8_t codec_planes;
|
||||||
|
uint8_t planar_rgb;
|
||||||
uint8_t transparency;
|
uint8_t transparency;
|
||||||
uint8_t colorspace;
|
uint8_t colorspace;
|
||||||
uint8_t pic_mode;
|
uint8_t pic_mode;
|
||||||
uint8_t ec;
|
uint8_t ec;
|
||||||
uint8_t ppi;
|
uint8_t ppi;
|
||||||
uint8_t chunks;
|
uint8_t chunks;
|
||||||
uint8_t padding[1];
|
uint8_t padding[4];
|
||||||
} FFv1VkParameters;
|
} FFv1VkParameters;
|
||||||
|
|
||||||
static void add_push_data(FFVulkanShader *shd)
|
static void add_push_data(FFVulkanShader *shd)
|
||||||
@ -153,6 +158,7 @@ static void add_push_data(FFVulkanShader *shd)
|
|||||||
GLSLC(1, u8buf scratch_data; );
|
GLSLC(1, u8buf scratch_data; );
|
||||||
GLSLC(1, u8buf out_data; );
|
GLSLC(1, u8buf out_data; );
|
||||||
GLSLC(0, );
|
GLSLC(0, );
|
||||||
|
GLSLC(1, ivec4 fmt_lut; );
|
||||||
GLSLC(1, ivec2 sar; );
|
GLSLC(1, ivec2 sar; );
|
||||||
GLSLC(1, uvec2 chroma_shift; );
|
GLSLC(1, uvec2 chroma_shift; );
|
||||||
GLSLC(0, );
|
GLSLC(0, );
|
||||||
@ -160,7 +166,9 @@ static void add_push_data(FFVulkanShader *shd)
|
|||||||
GLSLC(1, uint context_count; );
|
GLSLC(1, uint context_count; );
|
||||||
GLSLC(1, uint32_t crcref; );
|
GLSLC(1, uint32_t crcref; );
|
||||||
GLSLC(1, uint32_t slice_size_max; );
|
GLSLC(1, uint32_t slice_size_max; );
|
||||||
|
GLSLC(1, int rct_offset; );
|
||||||
GLSLC(0, );
|
GLSLC(0, );
|
||||||
|
GLSLC(1, uint8_t extend_lookup[8]; );
|
||||||
GLSLC(1, uint8_t bits_per_raw_sample; );
|
GLSLC(1, uint8_t bits_per_raw_sample; );
|
||||||
GLSLC(1, uint8_t context_model; );
|
GLSLC(1, uint8_t context_model; );
|
||||||
GLSLC(1, uint8_t version; );
|
GLSLC(1, uint8_t version; );
|
||||||
@ -170,122 +178,19 @@ static void add_push_data(FFVulkanShader *shd)
|
|||||||
GLSLC(1, uint8_t components; );
|
GLSLC(1, uint8_t components; );
|
||||||
GLSLC(1, uint8_t planes; );
|
GLSLC(1, uint8_t planes; );
|
||||||
GLSLC(1, uint8_t codec_planes; );
|
GLSLC(1, uint8_t codec_planes; );
|
||||||
|
GLSLC(1, uint8_t planar_rgb; );
|
||||||
GLSLC(1, uint8_t transparency; );
|
GLSLC(1, uint8_t transparency; );
|
||||||
GLSLC(1, uint8_t colorspace; );
|
GLSLC(1, uint8_t colorspace; );
|
||||||
GLSLC(1, uint8_t pic_mode; );
|
GLSLC(1, uint8_t pic_mode; );
|
||||||
GLSLC(1, uint8_t ec; );
|
GLSLC(1, uint8_t ec; );
|
||||||
GLSLC(1, uint8_t ppi; );
|
GLSLC(1, uint8_t ppi; );
|
||||||
GLSLC(1, uint8_t chunks; );
|
GLSLC(1, uint8_t chunks; );
|
||||||
GLSLC(1, uint8_t padding[1]; );
|
GLSLC(1, uint8_t padding[4]; );
|
||||||
GLSLC(0, }; );
|
GLSLC(0, }; );
|
||||||
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkParameters),
|
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkParameters),
|
||||||
VK_SHADER_STAGE_COMPUTE_BIT);
|
VK_SHADER_STAGE_COMPUTE_BIT);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int run_rct(AVCodecContext *avctx, FFVkExecContext *exec,
|
|
||||||
AVFrame *enc_in, VkImageView *enc_in_views,
|
|
||||||
AVFrame **intermediate_frame, VkImageView *intermediate_views,
|
|
||||||
VkImageMemoryBarrier2 *img_bar, int *nb_img_bar,
|
|
||||||
VkBufferMemoryBarrier2 *buf_bar, int *nb_buf_bar,
|
|
||||||
FFVkBuffer *slice_data_buf, uint32_t slice_data_size)
|
|
||||||
{
|
|
||||||
int err;
|
|
||||||
VulkanEncodeFFv1Context *fv = avctx->priv_data;
|
|
||||||
FFV1Context *f = &fv->ctx;
|
|
||||||
FFVulkanFunctions *vk = &fv->s.vkfn;
|
|
||||||
AVHWFramesContext *src_hwfc = (AVHWFramesContext *)enc_in->hw_frames_ctx->data;
|
|
||||||
FFv1VkRCTParameters pd;
|
|
||||||
|
|
||||||
/* Create a temporaty frame */
|
|
||||||
*intermediate_frame = av_frame_alloc();
|
|
||||||
if (!(*intermediate_frame))
|
|
||||||
return AVERROR(ENOMEM);
|
|
||||||
|
|
||||||
RET(av_hwframe_get_buffer(fv->intermediate_frames_ref,
|
|
||||||
*intermediate_frame, 0));
|
|
||||||
|
|
||||||
RET(ff_vk_exec_add_dep_frame(&fv->s, exec, *intermediate_frame,
|
|
||||||
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
||||||
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
|
|
||||||
RET(ff_vk_create_imageviews(&fv->s, exec, intermediate_views,
|
|
||||||
*intermediate_frame,
|
|
||||||
fv->rep_fmt));
|
|
||||||
|
|
||||||
/* Update descriptors */
|
|
||||||
ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->rct,
|
|
||||||
1, 0, 0,
|
|
||||||
slice_data_buf,
|
|
||||||
0, slice_data_size*f->slice_count,
|
|
||||||
VK_FORMAT_UNDEFINED);
|
|
||||||
ff_vk_shader_update_img_array(&fv->s, exec, &fv->rct,
|
|
||||||
enc_in, enc_in_views,
|
|
||||||
1, 1,
|
|
||||||
VK_IMAGE_LAYOUT_GENERAL,
|
|
||||||
VK_NULL_HANDLE);
|
|
||||||
ff_vk_shader_update_img_array(&fv->s, exec, &fv->rct,
|
|
||||||
*intermediate_frame, intermediate_views,
|
|
||||||
1, 2,
|
|
||||||
VK_IMAGE_LAYOUT_GENERAL,
|
|
||||||
VK_NULL_HANDLE);
|
|
||||||
|
|
||||||
ff_vk_frame_barrier(&fv->s, exec, *intermediate_frame, img_bar, nb_img_bar,
|
|
||||||
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
||||||
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
|
||||||
VK_ACCESS_SHADER_WRITE_BIT,
|
|
||||||
VK_IMAGE_LAYOUT_GENERAL,
|
|
||||||
VK_QUEUE_FAMILY_IGNORED);
|
|
||||||
|
|
||||||
/* Prep the input/output images */
|
|
||||||
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
|
|
||||||
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
|
||||||
.pImageMemoryBarriers = img_bar,
|
|
||||||
.imageMemoryBarrierCount = *nb_img_bar,
|
|
||||||
.pBufferMemoryBarriers = buf_bar,
|
|
||||||
.bufferMemoryBarrierCount = *nb_buf_bar,
|
|
||||||
});
|
|
||||||
*nb_img_bar = 0;
|
|
||||||
if (*nb_buf_bar) {
|
|
||||||
slice_data_buf->stage = buf_bar[0].dstStageMask;
|
|
||||||
slice_data_buf->access = buf_bar[0].dstAccessMask;
|
|
||||||
*nb_buf_bar = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Run the shader */
|
|
||||||
ff_vk_exec_bind_shader(&fv->s, exec, &fv->rct);
|
|
||||||
pd = (FFv1VkRCTParameters) {
|
|
||||||
.offset = 1 << f->bits_per_raw_sample,
|
|
||||||
.bits = f->bits_per_raw_sample,
|
|
||||||
.planar_rgb = ff_vk_mt_is_np_rgb(src_hwfc->sw_format) &&
|
|
||||||
(ff_vk_count_images((AVVkFrame *)enc_in->data[0]) > 1),
|
|
||||||
.transparency = f->transparency,
|
|
||||||
};
|
|
||||||
|
|
||||||
/* For some reason the C FFv1 encoder/decoder treats these differently */
|
|
||||||
if (src_hwfc->sw_format == AV_PIX_FMT_GBRP10 ||
|
|
||||||
src_hwfc->sw_format == AV_PIX_FMT_GBRP12 ||
|
|
||||||
src_hwfc->sw_format == AV_PIX_FMT_GBRP14)
|
|
||||||
memcpy(pd.fmt_lut, (int [4]) { 2, 1, 0, 3 }, 4*sizeof(int));
|
|
||||||
else
|
|
||||||
ff_vk_set_perm(src_hwfc->sw_format, pd.fmt_lut, 1);
|
|
||||||
|
|
||||||
ff_vk_shader_update_push_const(&fv->s, exec, &fv->rct,
|
|
||||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
|
||||||
0, sizeof(pd), &pd);
|
|
||||||
|
|
||||||
vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1);
|
|
||||||
|
|
||||||
/* Add a post-dispatch barrier before encoding */
|
|
||||||
ff_vk_frame_barrier(&fv->s, exec, *intermediate_frame, img_bar, nb_img_bar,
|
|
||||||
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
|
||||||
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
|
||||||
VK_ACCESS_SHADER_READ_BIT,
|
|
||||||
VK_IMAGE_LAYOUT_GENERAL,
|
|
||||||
VK_QUEUE_FAMILY_IGNORED);
|
|
||||||
|
|
||||||
fail:
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
|
static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
|
||||||
FFVkExecContext *exec,
|
FFVkExecContext *exec,
|
||||||
const AVFrame *pict)
|
const AVFrame *pict)
|
||||||
@ -298,8 +203,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
|
|||||||
VulkanEncodeFFv1FrameData *fd = exec->opaque;
|
VulkanEncodeFFv1FrameData *fd = exec->opaque;
|
||||||
FFv1VkParameters pd;
|
FFv1VkParameters pd;
|
||||||
|
|
||||||
AVFrame *intermediate_frame = NULL;
|
|
||||||
|
|
||||||
/* Slice data */
|
/* Slice data */
|
||||||
AVBufferRef *slice_data_ref;
|
AVBufferRef *slice_data_ref;
|
||||||
FFVkBuffer *slice_data_buf;
|
FFVkBuffer *slice_data_buf;
|
||||||
@ -318,11 +221,11 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
|
|||||||
uint32_t context_count = f->context_count[f->context_model];
|
uint32_t context_count = f->context_count[f->context_model];
|
||||||
const AVPixFmtDescriptor *fmt_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
|
const AVPixFmtDescriptor *fmt_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
|
||||||
|
|
||||||
VkImageView in_views[AV_NUM_DATA_POINTERS];
|
AVFrame *src = (AVFrame *)pict;
|
||||||
VkImageView intermediate_views[AV_NUM_DATA_POINTERS];
|
VkImageView src_views[AV_NUM_DATA_POINTERS];
|
||||||
|
|
||||||
AVFrame *enc_in = (AVFrame *)pict;
|
AVFrame *tmp = NULL;
|
||||||
VkImageView *enc_in_views = in_views;
|
VkImageView tmp_views[AV_NUM_DATA_POINTERS];
|
||||||
|
|
||||||
VkImageMemoryBarrier2 img_bar[37];
|
VkImageMemoryBarrier2 img_bar[37];
|
||||||
int nb_img_bar = 0;
|
int nb_img_bar = 0;
|
||||||
@ -402,27 +305,44 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
|
|||||||
ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->out_data_ref, 1, 1);
|
ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->out_data_ref, 1, 1);
|
||||||
|
|
||||||
/* Prepare input frame */
|
/* Prepare input frame */
|
||||||
RET(ff_vk_exec_add_dep_frame(&fv->s, exec, enc_in,
|
RET(ff_vk_exec_add_dep_frame(&fv->s, exec, src,
|
||||||
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||||
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
|
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
|
||||||
|
|
||||||
RET(ff_vk_create_imageviews(&fv->s, exec, enc_in_views, enc_in,
|
RET(ff_vk_create_imageviews(&fv->s, exec, src_views, src,
|
||||||
fv->rep_fmt));
|
fv->rep_fmt));
|
||||||
ff_vk_frame_barrier(&fv->s, exec, enc_in, img_bar, &nb_img_bar,
|
ff_vk_frame_barrier(&fv->s, exec, src, img_bar, &nb_img_bar,
|
||||||
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||||
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
||||||
VK_ACCESS_SHADER_READ_BIT,
|
VK_ACCESS_SHADER_READ_BIT,
|
||||||
VK_IMAGE_LAYOUT_GENERAL,
|
VK_IMAGE_LAYOUT_GENERAL,
|
||||||
VK_QUEUE_FAMILY_IGNORED);
|
VK_QUEUE_FAMILY_IGNORED);
|
||||||
|
|
||||||
/* Setup shader needs the original input */
|
if (fv->is_rgb) {
|
||||||
|
/* Create a temporaty frame */
|
||||||
|
tmp = av_frame_alloc();
|
||||||
|
if (!(tmp))
|
||||||
|
return AVERROR(ENOMEM);
|
||||||
|
|
||||||
|
RET(av_hwframe_get_buffer(fv->intermediate_frames_ref,
|
||||||
|
tmp, 0));
|
||||||
|
|
||||||
|
RET(ff_vk_exec_add_dep_frame(&fv->s, exec, tmp,
|
||||||
|
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||||
|
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
|
||||||
|
RET(ff_vk_create_imageviews(&fv->s, exec, tmp_views,
|
||||||
|
tmp,
|
||||||
|
fv->rep_fmt));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Setup shader */
|
||||||
ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->setup,
|
ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->setup,
|
||||||
1, 0, 0,
|
1, 0, 0,
|
||||||
slice_data_buf,
|
slice_data_buf,
|
||||||
0, slice_data_size*f->slice_count,
|
0, slice_data_size*f->slice_count,
|
||||||
VK_FORMAT_UNDEFINED);
|
VK_FORMAT_UNDEFINED);
|
||||||
ff_vk_shader_update_img_array(&fv->s, exec, &fv->setup,
|
ff_vk_shader_update_img_array(&fv->s, exec, &fv->setup,
|
||||||
enc_in, enc_in_views,
|
src, src_views,
|
||||||
1, 1,
|
1, 1,
|
||||||
VK_IMAGE_LAYOUT_GENERAL,
|
VK_IMAGE_LAYOUT_GENERAL,
|
||||||
VK_NULL_HANDLE);
|
VK_NULL_HANDLE);
|
||||||
@ -471,6 +391,7 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
|
|||||||
.plane_state_size = plane_state_size,
|
.plane_state_size = plane_state_size,
|
||||||
.context_count = context_count,
|
.context_count = context_count,
|
||||||
.crcref = f->crcref,
|
.crcref = f->crcref,
|
||||||
|
.rct_offset = 1 << f->bits_per_raw_sample,
|
||||||
.slice_size_max = out_data_buf->size / f->slice_count,
|
.slice_size_max = out_data_buf->size / f->slice_count,
|
||||||
.context_model = fv->ctx.context_model,
|
.context_model = fv->ctx.context_model,
|
||||||
.version = f->version,
|
.version = f->version,
|
||||||
@ -480,6 +401,8 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
|
|||||||
.components = fmt_desc->nb_components,
|
.components = fmt_desc->nb_components,
|
||||||
.planes = av_pix_fmt_count_planes(avctx->sw_pix_fmt),
|
.planes = av_pix_fmt_count_planes(avctx->sw_pix_fmt),
|
||||||
.codec_planes = f->plane_count,
|
.codec_planes = f->plane_count,
|
||||||
|
.planar_rgb = ff_vk_mt_is_np_rgb(avctx->sw_pix_fmt) &&
|
||||||
|
(ff_vk_count_images((AVVkFrame *)src->data[0]) > 1),
|
||||||
.transparency = f->transparency,
|
.transparency = f->transparency,
|
||||||
.colorspace = f->colorspace,
|
.colorspace = f->colorspace,
|
||||||
.pic_mode = !(pict->flags & AV_FRAME_FLAG_INTERLACED) ? 3 :
|
.pic_mode = !(pict->flags & AV_FRAME_FLAG_INTERLACED) ? 3 :
|
||||||
@ -488,11 +411,35 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
|
|||||||
.ppi = fv->ppi,
|
.ppi = fv->ppi,
|
||||||
.chunks = fv->chunks,
|
.chunks = fv->chunks,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* For some reason the C FFv1 encoder/decoder treats these differently */
|
||||||
|
if (avctx->sw_pix_fmt == AV_PIX_FMT_GBRP10 ||
|
||||||
|
avctx->sw_pix_fmt == AV_PIX_FMT_GBRP12 ||
|
||||||
|
avctx->sw_pix_fmt == AV_PIX_FMT_GBRP14)
|
||||||
|
memcpy(pd.fmt_lut, (int [4]) { 2, 1, 0, 3 }, 4*sizeof(int));
|
||||||
|
else
|
||||||
|
ff_vk_set_perm(avctx->sw_pix_fmt, pd.fmt_lut, 1);
|
||||||
|
|
||||||
|
for (int i = 0; i < f->quant_table_count; i++)
|
||||||
|
pd.extend_lookup[i] = (f->quant_tables[i][3][127] != 0) ||
|
||||||
|
(f->quant_tables[i][4][127] != 0);
|
||||||
ff_vk_shader_update_push_const(&fv->s, exec, &fv->setup,
|
ff_vk_shader_update_push_const(&fv->s, exec, &fv->setup,
|
||||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||||
0, sizeof(pd), &pd);
|
0, sizeof(pd), &pd);
|
||||||
vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1);
|
vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1);
|
||||||
|
|
||||||
|
/* Clean up temporary image */
|
||||||
|
if (fv->is_rgb) {
|
||||||
|
AVVkFrame *vkf = (AVVkFrame *)tmp->data[0];
|
||||||
|
vk->CmdClearColorImage(exec->buf, vkf->img[0], VK_IMAGE_LAYOUT_GENERAL,
|
||||||
|
&((VkClearColorValue) { 0 }),
|
||||||
|
1, &((VkImageSubresourceRange) {
|
||||||
|
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||||
|
.levelCount = 1,
|
||||||
|
.layerCount = 1,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
/* Setup shader modified the slice data buffer */
|
/* Setup shader modified the slice data buffer */
|
||||||
buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
|
buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
|
||||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
|
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
|
||||||
@ -546,19 +493,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
|
|||||||
f->plane_count);
|
f->plane_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Run RCT shader */
|
|
||||||
if (fv->is_rgb) {
|
|
||||||
RET(run_rct(avctx, exec,
|
|
||||||
enc_in, enc_in_views,
|
|
||||||
&intermediate_frame, intermediate_views,
|
|
||||||
img_bar, &nb_img_bar, buf_bar, &nb_buf_bar,
|
|
||||||
slice_data_buf, slice_data_size));
|
|
||||||
|
|
||||||
/* Use the new frame */
|
|
||||||
enc_in = intermediate_frame;
|
|
||||||
enc_in_views = intermediate_views;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If the reset shader ran, insert a barrier now. */
|
/* If the reset shader ran, insert a barrier now. */
|
||||||
if (f->key_frame || f->version > 3) {
|
if (f->key_frame || f->version > 3) {
|
||||||
/* Reset shader modified the slice data buffer */
|
/* Reset shader modified the slice data buffer */
|
||||||
@ -577,6 +511,15 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (fv->is_rgb) {
|
||||||
|
ff_vk_frame_barrier(&fv->s, exec, tmp, img_bar, &nb_img_bar,
|
||||||
|
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||||
|
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
||||||
|
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
|
||||||
|
VK_IMAGE_LAYOUT_GENERAL,
|
||||||
|
VK_QUEUE_FAMILY_IGNORED);
|
||||||
|
}
|
||||||
|
|
||||||
/* Final barrier before encoding */
|
/* Final barrier before encoding */
|
||||||
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
|
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
|
||||||
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
||||||
@ -599,7 +542,7 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
|
|||||||
0, slice_data_size*f->slice_count,
|
0, slice_data_size*f->slice_count,
|
||||||
VK_FORMAT_UNDEFINED);
|
VK_FORMAT_UNDEFINED);
|
||||||
ff_vk_shader_update_img_array(&fv->s, exec, &fv->enc,
|
ff_vk_shader_update_img_array(&fv->s, exec, &fv->enc,
|
||||||
enc_in, enc_in_views,
|
src, src_views,
|
||||||
1, 1,
|
1, 1,
|
||||||
VK_IMAGE_LAYOUT_GENERAL,
|
VK_IMAGE_LAYOUT_GENERAL,
|
||||||
VK_NULL_HANDLE);
|
VK_NULL_HANDLE);
|
||||||
@ -608,6 +551,12 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
|
|||||||
results_data_buf,
|
results_data_buf,
|
||||||
0, results_data_buf->size,
|
0, results_data_buf->size,
|
||||||
VK_FORMAT_UNDEFINED);
|
VK_FORMAT_UNDEFINED);
|
||||||
|
if (fv->is_rgb)
|
||||||
|
ff_vk_shader_update_img_array(&fv->s, exec, &fv->enc,
|
||||||
|
tmp, tmp_views,
|
||||||
|
1, 3,
|
||||||
|
VK_IMAGE_LAYOUT_GENERAL,
|
||||||
|
VK_NULL_HANDLE);
|
||||||
|
|
||||||
ff_vk_exec_bind_shader(&fv->s, exec, &fv->enc);
|
ff_vk_exec_bind_shader(&fv->s, exec, &fv->enc);
|
||||||
ff_vk_shader_update_push_const(&fv->s, exec, &fv->enc,
|
ff_vk_shader_update_push_const(&fv->s, exec, &fv->enc,
|
||||||
@ -624,11 +573,11 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
|
|||||||
|
|
||||||
/* This, if needed, was referenced by the execution context
|
/* This, if needed, was referenced by the execution context
|
||||||
* as it was declared as a dependency. */
|
* as it was declared as a dependency. */
|
||||||
av_frame_free(&intermediate_frame);
|
av_frame_free(&tmp);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
av_frame_free(&intermediate_frame);
|
av_frame_free(&tmp);
|
||||||
ff_vk_exec_discard_deps(&fv->s, exec);
|
ff_vk_exec_discard_deps(&fv->s, exec);
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
@ -846,6 +795,7 @@ static int init_indirect(AVCodecContext *avctx, enum AVPixelFormat sw_format)
|
|||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
VulkanEncodeFFv1Context *fv = avctx->priv_data;
|
VulkanEncodeFFv1Context *fv = avctx->priv_data;
|
||||||
|
FFV1Context *f = &fv->ctx;
|
||||||
AVHWFramesContext *frames_ctx;
|
AVHWFramesContext *frames_ctx;
|
||||||
AVVulkanFramesContext *vk_frames;
|
AVVulkanFramesContext *vk_frames;
|
||||||
|
|
||||||
@ -856,12 +806,13 @@ static int init_indirect(AVCodecContext *avctx, enum AVPixelFormat sw_format)
|
|||||||
frames_ctx = (AVHWFramesContext *)fv->intermediate_frames_ref->data;
|
frames_ctx = (AVHWFramesContext *)fv->intermediate_frames_ref->data;
|
||||||
frames_ctx->format = AV_PIX_FMT_VULKAN;
|
frames_ctx->format = AV_PIX_FMT_VULKAN;
|
||||||
frames_ctx->sw_format = sw_format;
|
frames_ctx->sw_format = sw_format;
|
||||||
frames_ctx->width = FFALIGN(fv->s.frames->width, 32);
|
frames_ctx->width = fv->s.frames->width;
|
||||||
frames_ctx->height = FFALIGN(fv->s.frames->height, 32);
|
frames_ctx->height = f->num_v_slices*RGB_LINECACHE;
|
||||||
|
|
||||||
vk_frames = frames_ctx->hwctx;
|
vk_frames = frames_ctx->hwctx;
|
||||||
vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL;
|
vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||||
vk_frames->usage = VK_IMAGE_USAGE_STORAGE_BIT;
|
vk_frames->usage = VK_IMAGE_USAGE_STORAGE_BIT |
|
||||||
|
VK_IMAGE_USAGE_TRANSFER_DST_BIT;
|
||||||
vk_frames->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
|
vk_frames->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
|
||||||
|
|
||||||
err = av_hwframe_ctx_init(fv->intermediate_frames_ref);
|
err = av_hwframe_ctx_init(fv->intermediate_frames_ref);
|
||||||
@ -929,6 +880,7 @@ static void define_shared_code(AVCodecContext *avctx, FFVulkanShader *shd)
|
|||||||
FFV1Context *f = &fv->ctx;
|
FFV1Context *f = &fv->ctx;
|
||||||
int smp_bits = fv->ctx.use32bit ? 32 : 16;
|
int smp_bits = fv->ctx.use32bit ? 32 : 16;
|
||||||
|
|
||||||
|
av_bprintf(&shd->src, "#define RGB_LINECACHE %i\n" ,RGB_LINECACHE);
|
||||||
av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n" ,CONTEXT_SIZE);
|
av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n" ,CONTEXT_SIZE);
|
||||||
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n" ,MAX_QUANT_TABLE_MASK);
|
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n" ,MAX_QUANT_TABLE_MASK);
|
||||||
|
|
||||||
@ -1120,122 +1072,6 @@ fail:
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
|
|
||||||
{
|
|
||||||
int err;
|
|
||||||
VulkanEncodeFFv1Context *fv = avctx->priv_data;
|
|
||||||
FFVulkanShader *shd = &fv->rct;
|
|
||||||
FFVulkanDescriptorSetBinding *desc_set;
|
|
||||||
|
|
||||||
uint8_t *spv_data;
|
|
||||||
size_t spv_len;
|
|
||||||
void *spv_opaque = NULL;
|
|
||||||
int wg_count = sqrt(fv->s.props.properties.limits.maxComputeWorkGroupInvocations);
|
|
||||||
|
|
||||||
enum AVPixelFormat intermediate_fmt = get_supported_rgb_buffer_fmt(avctx);
|
|
||||||
if (intermediate_fmt == AV_PIX_FMT_NONE) {
|
|
||||||
av_log(avctx, AV_LOG_ERROR, "Unable to find a supported compatible "
|
|
||||||
"pixel format for RCT buffer!\n");
|
|
||||||
return AVERROR(ENOTSUP);
|
|
||||||
}
|
|
||||||
|
|
||||||
RET(init_indirect(avctx, intermediate_fmt));
|
|
||||||
|
|
||||||
RET(ff_vk_shader_init(&fv->s, shd, "ffv1_rct",
|
|
||||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
|
||||||
(const char *[]) { "GL_EXT_buffer_reference",
|
|
||||||
"GL_EXT_buffer_reference2" }, 2,
|
|
||||||
wg_count, wg_count, 1,
|
|
||||||
0));
|
|
||||||
|
|
||||||
/* Common codec header */
|
|
||||||
GLSLD(ff_source_common_comp);
|
|
||||||
|
|
||||||
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
|
|
||||||
GLSLC(1, ivec4 fmt_lut; );
|
|
||||||
GLSLC(1, int offset; );
|
|
||||||
GLSLC(1, uint8_t bits; );
|
|
||||||
GLSLC(1, uint8_t planar_rgb; );
|
|
||||||
GLSLC(1, uint8_t color_planes; );
|
|
||||||
GLSLC(1, uint8_t transparency; );
|
|
||||||
GLSLC(1, uint8_t version; );
|
|
||||||
GLSLC(1, uint8_t micro_version; );
|
|
||||||
GLSLC(1, uint8_t padding[2]; );
|
|
||||||
GLSLC(0, }; );
|
|
||||||
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkRCTParameters),
|
|
||||||
VK_SHADER_STAGE_COMPUTE_BIT);
|
|
||||||
|
|
||||||
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
|
|
||||||
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
|
|
||||||
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
|
|
||||||
|
|
||||||
desc_set = (FFVulkanDescriptorSetBinding []) {
|
|
||||||
{
|
|
||||||
.name = "rangecoder_static_buf",
|
|
||||||
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
|
||||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
||||||
.mem_layout = "scalar",
|
|
||||||
.buf_content = "uint8_t zero_one_state[512];",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
.name = "quant_buf",
|
|
||||||
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
|
||||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
||||||
.mem_layout = "scalar",
|
|
||||||
.buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
|
|
||||||
"[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
|
|
||||||
},
|
|
||||||
};
|
|
||||||
RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 1, 0));
|
|
||||||
|
|
||||||
define_shared_code(avctx, shd);
|
|
||||||
|
|
||||||
desc_set = (FFVulkanDescriptorSetBinding []) {
|
|
||||||
{
|
|
||||||
.name = "slice_data_buf",
|
|
||||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
||||||
.mem_quali = "readonly",
|
|
||||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
||||||
.buf_content = "SliceContext slice_ctx[1024];",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
.name = "src",
|
|
||||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
|
||||||
.dimensions = 2,
|
|
||||||
.mem_layout = ff_vk_shader_rep_fmt(fv->s.frames->sw_format,
|
|
||||||
fv->rep_fmt),
|
|
||||||
.elems = av_pix_fmt_count_planes(fv->s.frames->sw_format),
|
|
||||||
.mem_quali = "readonly",
|
|
||||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
.name = "dst",
|
|
||||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
|
||||||
.dimensions = 2,
|
|
||||||
.mem_layout = ff_vk_shader_rep_fmt(intermediate_fmt,
|
|
||||||
fv->rep_fmt),
|
|
||||||
.elems = av_pix_fmt_count_planes(intermediate_fmt),
|
|
||||||
.mem_quali = "writeonly",
|
|
||||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0));
|
|
||||||
|
|
||||||
GLSLD(ff_source_ffv1_enc_rct_comp);
|
|
||||||
|
|
||||||
RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main",
|
|
||||||
&spv_opaque));
|
|
||||||
RET(ff_vk_shader_link(&fv->s, shd, spv_data, spv_len, "main"));
|
|
||||||
|
|
||||||
RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd));
|
|
||||||
|
|
||||||
fail:
|
|
||||||
if (spv_opaque)
|
|
||||||
spv->free_shader(spv, &spv_opaque);
|
|
||||||
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
|
static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
@ -1243,10 +1079,6 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
|
|||||||
FFVulkanShader *shd = &fv->enc;
|
FFVulkanShader *shd = &fv->enc;
|
||||||
FFVulkanDescriptorSetBinding *desc_set;
|
FFVulkanDescriptorSetBinding *desc_set;
|
||||||
|
|
||||||
AVHWFramesContext *frames_ctx = fv->intermediate_frames_ref ?
|
|
||||||
(AVHWFramesContext *)fv->intermediate_frames_ref->data :
|
|
||||||
fv->s.frames;
|
|
||||||
|
|
||||||
uint8_t *spv_data;
|
uint8_t *spv_data;
|
||||||
size_t spv_len;
|
size_t spv_len;
|
||||||
void *spv_opaque = NULL;
|
void *spv_opaque = NULL;
|
||||||
@ -1307,9 +1139,9 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
|
|||||||
.name = "src",
|
.name = "src",
|
||||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||||
.dimensions = 2,
|
.dimensions = 2,
|
||||||
.mem_layout = ff_vk_shader_rep_fmt(frames_ctx->sw_format,
|
.mem_layout = ff_vk_shader_rep_fmt(fv->s.frames->sw_format,
|
||||||
fv->rep_fmt),
|
fv->rep_fmt),
|
||||||
.elems = av_pix_fmt_count_planes(frames_ctx->sw_format),
|
.elems = av_pix_fmt_count_planes(fv->s.frames->sw_format),
|
||||||
.mem_quali = "readonly",
|
.mem_quali = "readonly",
|
||||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||||
},
|
},
|
||||||
@ -1321,7 +1153,19 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
|
|||||||
.buf_content = "uint64_t slice_results[2048];",
|
.buf_content = "uint64_t slice_results[2048];",
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0));
|
if (fv->is_rgb) {
|
||||||
|
AVHWFramesContext *intermediate_frames_ctx;
|
||||||
|
intermediate_frames_ctx = (AVHWFramesContext *)fv->intermediate_frames_ref->data;
|
||||||
|
desc_set[3] = (FFVulkanDescriptorSetBinding) {
|
||||||
|
.name = "tmp",
|
||||||
|
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||||
|
.dimensions = 2,
|
||||||
|
.mem_layout = ff_vk_shader_rep_fmt(intermediate_frames_ctx->sw_format,
|
||||||
|
FF_VK_REP_NATIVE),
|
||||||
|
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3 + fv->is_rgb, 0, 0));
|
||||||
|
|
||||||
GLSLD(ff_source_ffv1_enc_comp);
|
GLSLD(ff_source_ffv1_enc_comp);
|
||||||
|
|
||||||
@ -1566,13 +1410,15 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx)
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Init RCT shader */
|
|
||||||
if (fv->is_rgb) {
|
if (fv->is_rgb) {
|
||||||
err = init_rct_shader(avctx, spv);
|
enum AVPixelFormat intermediate_fmt = get_supported_rgb_buffer_fmt(avctx);
|
||||||
if (err < 0) {
|
if (intermediate_fmt == AV_PIX_FMT_NONE) {
|
||||||
spv->uninit(&spv);
|
av_log(avctx, AV_LOG_ERROR, "Unable to find a supported compatible "
|
||||||
return err;
|
"pixel format for RCT buffer!\n");
|
||||||
|
return AVERROR(ENOTSUP);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
RET(init_indirect(avctx, intermediate_fmt));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Encode shader */
|
/* Encode shader */
|
||||||
@ -1659,7 +1505,6 @@ static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx)
|
|||||||
ff_vk_exec_pool_free(&fv->s, &fv->transfer_exec_pool);
|
ff_vk_exec_pool_free(&fv->s, &fv->transfer_exec_pool);
|
||||||
|
|
||||||
ff_vk_shader_free(&fv->s, &fv->enc);
|
ff_vk_shader_free(&fv->s, &fv->enc);
|
||||||
ff_vk_shader_free(&fv->s, &fv->rct);
|
|
||||||
ff_vk_shader_free(&fv->s, &fv->reset);
|
ff_vk_shader_free(&fv->s, &fv->reset);
|
||||||
ff_vk_shader_free(&fv->s, &fv->setup);
|
ff_vk_shader_free(&fv->s, &fv->setup);
|
||||||
|
|
||||||
|
@ -92,3 +92,90 @@ uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift)
|
|||||||
|
|
||||||
return sx;
|
return sx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef RGB
|
||||||
|
#define RGB_LBUF (RGB_LINECACHE - 1)
|
||||||
|
#define LADDR(p) (ivec2((p).x, ((p).y & RGB_LBUF)))
|
||||||
|
|
||||||
|
ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off,
|
||||||
|
int comp, int sw, uint8_t quant_table_idx, bool extend_lookup)
|
||||||
|
{
|
||||||
|
const ivec2 yoff_border1 = expectEXT(off.x == 0, false) ? ivec2(1, -1) : ivec2(0, 0);
|
||||||
|
|
||||||
|
/* Thanks to the same coincidence as below, we can skip checking if off == 0, 1 */
|
||||||
|
VTYPE3 top = VTYPE3(TYPE(imageLoad(pred, sp + LADDR(off + ivec2(-1, -1) + yoff_border1))[comp]),
|
||||||
|
TYPE(imageLoad(pred, sp + LADDR(off + ivec2(0, -1)))[comp]),
|
||||||
|
TYPE(imageLoad(pred, sp + LADDR(off + ivec2(min(1, sw - off.x - 1), -1)))[comp]));
|
||||||
|
|
||||||
|
/* Normally, we'd need to check if off != ivec2(0, 0) here, since otherwise, we must
|
||||||
|
* return zero. However, ivec2(-1, 0) + ivec2(1, -1) == ivec2(0, -1), e.g. previous
|
||||||
|
* row, 0 offset, same slice, which is zero since we zero out the buffer for RGB */
|
||||||
|
TYPE cur = TYPE(imageLoad(pred, sp + LADDR(off + ivec2(-1, 0) + yoff_border1))[comp]);
|
||||||
|
|
||||||
|
int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] +
|
||||||
|
quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +
|
||||||
|
quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK];
|
||||||
|
|
||||||
|
if (expectEXT(extend_lookup, false)) {
|
||||||
|
TYPE cur2 = TYPE(0);
|
||||||
|
if (expectEXT(off.x > 0, true)) {
|
||||||
|
const ivec2 yoff_border2 = expectEXT(off.x == 1, false) ? ivec2(-1, -1) : ivec2(-2, 0);
|
||||||
|
cur2 = TYPE(imageLoad(pred, sp + LADDR(off + yoff_border2))[comp]);
|
||||||
|
}
|
||||||
|
base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK];
|
||||||
|
|
||||||
|
/* top-2 became current upon swap */
|
||||||
|
TYPE top2 = TYPE(imageLoad(pred, sp + LADDR(off))[comp]);
|
||||||
|
base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* context, prediction */
|
||||||
|
return ivec2(base, predict(cur, VTYPE2(top)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#else /* RGB */
|
||||||
|
|
||||||
|
#define LADDR(p) (p)
|
||||||
|
|
||||||
|
ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off,
|
||||||
|
int comp, int sw, uint8_t quant_table_idx, bool extend_lookup)
|
||||||
|
{
|
||||||
|
const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);
|
||||||
|
sp += off;
|
||||||
|
|
||||||
|
VTYPE3 top = VTYPE3(TYPE(0),
|
||||||
|
TYPE(0),
|
||||||
|
TYPE(0));
|
||||||
|
if (off.y > 0 && off != ivec2(0, 1))
|
||||||
|
top[0] = TYPE(imageLoad(pred, sp + ivec2(-1, -1) + yoff_border1)[comp]);
|
||||||
|
if (off.y > 0) {
|
||||||
|
top[1] = TYPE(imageLoad(pred, sp + ivec2(0, -1))[comp]);
|
||||||
|
top[2] = TYPE(imageLoad(pred, sp + ivec2(min(1, sw - off.x - 1), -1))[comp]);
|
||||||
|
}
|
||||||
|
|
||||||
|
TYPE cur = TYPE(0);
|
||||||
|
if (off != ivec2(0, 0))
|
||||||
|
cur = TYPE(imageLoad(pred, sp + ivec2(-1, 0) + yoff_border1)[comp]);
|
||||||
|
|
||||||
|
int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] +
|
||||||
|
quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +
|
||||||
|
quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK];
|
||||||
|
|
||||||
|
if (expectEXT(extend_lookup, false)) {
|
||||||
|
TYPE cur2 = TYPE(0);
|
||||||
|
if (off.x > 0 && off != ivec2(1, 0)) {
|
||||||
|
const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
|
||||||
|
cur2 = TYPE(imageLoad(pred, sp + ivec2(-2, 0) + yoff_border2)[comp]);
|
||||||
|
}
|
||||||
|
base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK];
|
||||||
|
|
||||||
|
TYPE top2 = TYPE(0);
|
||||||
|
if (off.y > 1)
|
||||||
|
top2 = TYPE(imageLoad(pred, sp + ivec2(0, -2))[comp]);
|
||||||
|
base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* context, prediction */
|
||||||
|
return ivec2(base, predict(cur, VTYPE2(top)));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
@ -20,93 +20,6 @@
|
|||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef RGB
|
|
||||||
#define LADDR(p) (p)
|
|
||||||
#else
|
|
||||||
#define RGB_LINECACHE 2
|
|
||||||
#define RGB_LBUF (RGB_LINECACHE - 1)
|
|
||||||
#define LADDR(p) (ivec2((p).x, ((p).y & RGB_LBUF)))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef RGB
|
|
||||||
ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, int comp, int sw, uint8_t quant_table_idx)
|
|
||||||
{
|
|
||||||
const ivec2 yoff_border1 = expectEXT(off.x == 0, false) ? ivec2(1, -1) : ivec2(0, 0);
|
|
||||||
|
|
||||||
/* Thanks to the same coincidence as below, we can skip checking if off == 0, 1 */
|
|
||||||
VTYPE3 top = VTYPE3(TYPE(imageLoad(pred, sp + LADDR(off + ivec2(-1, -1) + yoff_border1))[comp]),
|
|
||||||
TYPE(imageLoad(pred, sp + LADDR(off + ivec2(0, -1)))[comp]),
|
|
||||||
TYPE(imageLoad(pred, sp + LADDR(off + ivec2(min(1, sw - off.x - 1), -1)))[comp]));
|
|
||||||
|
|
||||||
/* Normally, we'd need to check if off != ivec2(0, 0) here, since otherwise, we must
|
|
||||||
* return zero. However, ivec2(-1, 0) + ivec2(1, -1) == ivec2(0, -1), e.g. previous
|
|
||||||
* row, 0 offset, same slice, which is zero since we zero out the buffer for RGB */
|
|
||||||
TYPE cur = TYPE(imageLoad(pred, sp + LADDR(off + ivec2(-1, 0) + yoff_border1))[comp]);
|
|
||||||
|
|
||||||
int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] +
|
|
||||||
quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +
|
|
||||||
quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK];
|
|
||||||
|
|
||||||
if (expectEXT(extend_lookup[quant_table_idx] > 0, false)) {
|
|
||||||
TYPE cur2 = TYPE(0);
|
|
||||||
if (expectEXT(off.x > 0, true)) {
|
|
||||||
const ivec2 yoff_border2 = expectEXT(off.x == 1, false) ? ivec2(-1, -1) : ivec2(-2, 0);
|
|
||||||
cur2 = TYPE(imageLoad(pred, sp + LADDR(off + yoff_border2))[comp]);
|
|
||||||
}
|
|
||||||
base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK];
|
|
||||||
|
|
||||||
/* top-2 became current upon swap */
|
|
||||||
TYPE top2 = TYPE(imageLoad(pred, sp + LADDR(off))[comp]);
|
|
||||||
base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* context, prediction */
|
|
||||||
return ivec2(base, predict(cur, VTYPE2(top)));
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, int comp, int sw, uint8_t quant_table_idx)
|
|
||||||
{
|
|
||||||
const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);
|
|
||||||
sp += off;
|
|
||||||
|
|
||||||
VTYPE3 top = VTYPE3(TYPE(0),
|
|
||||||
TYPE(0),
|
|
||||||
TYPE(0));
|
|
||||||
if (off.y > 0 && off != ivec2(0, 1))
|
|
||||||
top[0] = TYPE(imageLoad(pred, sp + ivec2(-1, -1) + yoff_border1)[comp]);
|
|
||||||
if (off.y > 0) {
|
|
||||||
top[1] = TYPE(imageLoad(pred, sp + ivec2(0, -1))[comp]);
|
|
||||||
top[2] = TYPE(imageLoad(pred, sp + ivec2(min(1, sw - off.x - 1), -1))[comp]);
|
|
||||||
}
|
|
||||||
|
|
||||||
TYPE cur = TYPE(0);
|
|
||||||
if (off != ivec2(0, 0))
|
|
||||||
cur = TYPE(imageLoad(pred, sp + ivec2(-1, 0) + yoff_border1)[comp]);
|
|
||||||
|
|
||||||
int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] +
|
|
||||||
quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +
|
|
||||||
quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK];
|
|
||||||
|
|
||||||
if ((quant_table[quant_table_idx][3][127] != 0) ||
|
|
||||||
(quant_table[quant_table_idx][4][127] != 0)) {
|
|
||||||
TYPE cur2 = TYPE(0);
|
|
||||||
if (off.x > 0 && off != ivec2(1, 0)) {
|
|
||||||
const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
|
|
||||||
cur2 = TYPE(imageLoad(pred, sp + ivec2(-2, 0) + yoff_border2)[comp]);
|
|
||||||
}
|
|
||||||
base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK];
|
|
||||||
|
|
||||||
TYPE top2 = TYPE(0);
|
|
||||||
if (off.y > 1)
|
|
||||||
top2 = TYPE(imageLoad(pred, sp + ivec2(0, -2))[comp]);
|
|
||||||
base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* context, prediction */
|
|
||||||
return ivec2(base, predict(cur, VTYPE2(top)));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef GOLOMB
|
#ifndef GOLOMB
|
||||||
#ifdef CACHED_SYMBOL_READER
|
#ifdef CACHED_SYMBOL_READER
|
||||||
shared uint8_t state[CONTEXT_SIZE];
|
shared uint8_t state[CONTEXT_SIZE];
|
||||||
@ -172,7 +85,7 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w,
|
|||||||
|
|
||||||
for (int x = 0; x < w; x++) {
|
for (int x = 0; x < w; x++) {
|
||||||
ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w,
|
ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w,
|
||||||
quant_table_idx);
|
quant_table_idx, extend_lookup[quant_table_idx] > 0);
|
||||||
|
|
||||||
uint context_off = state_off + CONTEXT_SIZE*abs(pr[0]);
|
uint context_off = state_off + CONTEXT_SIZE*abs(pr[0]);
|
||||||
#ifdef CACHED_SYMBOL_READER
|
#ifdef CACHED_SYMBOL_READER
|
||||||
@ -217,7 +130,7 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w,
|
|||||||
ivec2 pos = sp + ivec2(x, y);
|
ivec2 pos = sp + ivec2(x, y);
|
||||||
int diff;
|
int diff;
|
||||||
ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w,
|
ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w,
|
||||||
quant_table_idx);
|
quant_table_idx, extend_lookup[quant_table_idx] > 0);
|
||||||
|
|
||||||
VlcState sb = VlcState(uint64_t(slice_state) + state_off + VLC_STATE_SIZE*abs(pr[0]));
|
VlcState sb = VlcState(uint64_t(slice_state) + state_off + VLC_STATE_SIZE*abs(pr[0]));
|
||||||
|
|
||||||
|
@ -20,48 +20,6 @@
|
|||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
*/
|
*/
|
||||||
|
|
||||||
ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, int comp, int sw, uint8_t quant_table_idx)
|
|
||||||
{
|
|
||||||
const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);
|
|
||||||
sp += off;
|
|
||||||
|
|
||||||
VTYPE3 top = VTYPE3(TYPE(0),
|
|
||||||
TYPE(0),
|
|
||||||
TYPE(0));
|
|
||||||
if (off.y > 0 && off != ivec2(0, 1))
|
|
||||||
top[0] = TYPE(imageLoad(pred, sp + ivec2(-1, -1) + yoff_border1)[comp]);
|
|
||||||
if (off.y > 0) {
|
|
||||||
top[1] = TYPE(imageLoad(pred, sp + ivec2(0, -1))[comp]);
|
|
||||||
top[2] = TYPE(imageLoad(pred, sp + ivec2(min(1, sw - off.x - 1), -1))[comp]);
|
|
||||||
}
|
|
||||||
|
|
||||||
TYPE cur = TYPE(0);
|
|
||||||
if (off != ivec2(0, 0))
|
|
||||||
cur = TYPE(imageLoad(pred, sp + ivec2(-1, 0) + yoff_border1)[comp]);
|
|
||||||
|
|
||||||
int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] +
|
|
||||||
quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +
|
|
||||||
quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK];
|
|
||||||
|
|
||||||
if ((quant_table[quant_table_idx][3][127] != 0) ||
|
|
||||||
(quant_table[quant_table_idx][4][127] != 0)) {
|
|
||||||
TYPE cur2 = TYPE(0);
|
|
||||||
if (off.x > 0 && off != ivec2(1, 0)) {
|
|
||||||
const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
|
|
||||||
cur2 = TYPE(imageLoad(pred, sp + ivec2(-2, 0) + yoff_border2)[comp]);
|
|
||||||
}
|
|
||||||
base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK];
|
|
||||||
|
|
||||||
TYPE top2 = TYPE(0);
|
|
||||||
if (off.y > 1)
|
|
||||||
top2 = TYPE(imageLoad(pred, sp + ivec2(0, -2))[comp]);
|
|
||||||
base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* context, prediction */
|
|
||||||
return ivec2(base, predict(cur, VTYPE2(top)));
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef GOLOMB
|
#ifndef GOLOMB
|
||||||
/* Note - only handles signed values */
|
/* Note - only handles signed values */
|
||||||
void put_symbol(inout RangeCoder c, uint64_t state, int v)
|
void put_symbol(inout RangeCoder c, uint64_t state, int v)
|
||||||
@ -86,38 +44,42 @@ void put_symbol(inout RangeCoder c, uint64_t state, int v)
|
|||||||
put_rac(c, state - 11 + min(e, 10), v < 0);
|
put_rac(c, state - 11 + min(e, 10), v < 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void encode_line_pcm(inout SliceContext sc, int y, int p, int comp,
|
void encode_line_pcm(inout SliceContext sc, readonly uimage2D img,
|
||||||
int bits)
|
ivec2 sp, int y, int p, int comp, int bits)
|
||||||
{
|
{
|
||||||
ivec2 sp = sc.slice_pos;
|
|
||||||
int w = sc.slice_dim.x;
|
int w = sc.slice_dim.x;
|
||||||
|
|
||||||
|
#ifndef RGB
|
||||||
if (p > 0 && p < 3) {
|
if (p > 0 && p < 3) {
|
||||||
w >>= chroma_shift.x;
|
w >>= chroma_shift.x;
|
||||||
sp >>= chroma_shift;
|
sp >>= chroma_shift;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
for (int x = 0; x < w; x++) {
|
for (int x = 0; x < w; x++) {
|
||||||
uint v = imageLoad(src[p], (sp + ivec2(x, y)))[comp];
|
uint v = imageLoad(img, sp + LADDR(ivec2(x, y)))[comp];
|
||||||
for (int i = (bits - 1); i >= 0; i--)
|
for (int i = (bits - 1); i >= 0; i--)
|
||||||
put_rac_equi(sc.c, bool(bitfieldExtract(v, i, 1)));
|
put_rac_equi(sc.c, bool(bitfieldExtract(v, i, 1)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void encode_line(inout SliceContext sc, uint64_t state,
|
void encode_line(inout SliceContext sc, readonly uimage2D img, uint64_t state,
|
||||||
int y, int p, int comp, int bits,
|
ivec2 sp, int y, int p, int comp, int bits,
|
||||||
uint8_t quant_table_idx, const int run_index)
|
uint8_t quant_table_idx, const int run_index)
|
||||||
{
|
{
|
||||||
ivec2 sp = sc.slice_pos;
|
|
||||||
|
|
||||||
int w = sc.slice_dim.x;
|
int w = sc.slice_dim.x;
|
||||||
|
|
||||||
|
#ifndef RGB
|
||||||
if (p > 0 && p < 3) {
|
if (p > 0 && p < 3) {
|
||||||
w >>= chroma_shift.x;
|
w >>= chroma_shift.x;
|
||||||
sp >>= chroma_shift;
|
sp >>= chroma_shift;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
for (int x = 0; x < w; x++) {
|
for (int x = 0; x < w; x++) {
|
||||||
ivec2 d = get_pred(src[p], sp, ivec2(x, y), comp, w, quant_table_idx);
|
ivec2 d = get_pred(img, sp, ivec2(x, y), comp, w,
|
||||||
d[1] = int(imageLoad(src[p], sp + ivec2(x, y))[comp]) - d[1];
|
quant_table_idx, extend_lookup[quant_table_idx] > 0);
|
||||||
|
d[1] = int(imageLoad(img, sp + LADDR(ivec2(x, y)))[comp]) - d[1];
|
||||||
|
|
||||||
if (d[0] < 0)
|
if (d[0] < 0)
|
||||||
d = -d;
|
d = -d;
|
||||||
@ -130,24 +92,26 @@ void encode_line(inout SliceContext sc, uint64_t state,
|
|||||||
|
|
||||||
#else /* GOLOMB */
|
#else /* GOLOMB */
|
||||||
|
|
||||||
void encode_line(inout SliceContext sc, uint64_t state,
|
void encode_line(inout SliceContext sc, readonly uimage2D img, uint64_t state,
|
||||||
int y, int p, int comp, int bits,
|
ivec2 sp, int y, int p, int comp, int bits,
|
||||||
uint8_t quant_table_idx, inout int run_index)
|
uint8_t quant_table_idx, inout int run_index)
|
||||||
{
|
{
|
||||||
ivec2 sp = sc.slice_pos;
|
|
||||||
|
|
||||||
int w = sc.slice_dim.x;
|
int w = sc.slice_dim.x;
|
||||||
|
|
||||||
|
#ifndef RGB
|
||||||
if (p > 0 && p < 3) {
|
if (p > 0 && p < 3) {
|
||||||
w >>= chroma_shift.x;
|
w >>= chroma_shift.x;
|
||||||
sp >>= chroma_shift;
|
sp >>= chroma_shift;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
int run_count = 0;
|
int run_count = 0;
|
||||||
bool run_mode = false;
|
bool run_mode = false;
|
||||||
|
|
||||||
for (int x = 0; x < w; x++) {
|
for (int x = 0; x < w; x++) {
|
||||||
ivec2 d = get_pred(src[p], sp, ivec2(x, y), comp, w, quant_table_idx);
|
ivec2 d = get_pred(img, sp, ivec2(x, y), comp, w,
|
||||||
d[1] = int(imageLoad(src[p], sp + ivec2(x, y))[comp]) - d[1];
|
quant_table_idx, extend_lookup[quant_table_idx] > 0);
|
||||||
|
d[1] = int(imageLoad(img, sp + LADDR(ivec2(x, y)))[comp]) - d[1];
|
||||||
|
|
||||||
if (d[0] < 0)
|
if (d[0] < 0)
|
||||||
d = -d;
|
d = -d;
|
||||||
@ -198,14 +162,56 @@ void encode_line(inout SliceContext sc, uint64_t state,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef RGB
|
||||||
|
ivec4 load_components(ivec2 pos)
|
||||||
|
{
|
||||||
|
ivec4 pix = ivec4(imageLoad(src[0], pos));
|
||||||
|
if (planar_rgb != 0) {
|
||||||
|
for (int i = 1; i < (3 + transparency); i++)
|
||||||
|
pix[i] = int(imageLoad(src[i], pos)[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]],
|
||||||
|
pix[fmt_lut[2]], pix[fmt_lut[3]]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void transform_sample(inout ivec4 pix, ivec2 rct_coef)
|
||||||
|
{
|
||||||
|
pix.b -= pix.g;
|
||||||
|
pix.r -= pix.g;
|
||||||
|
pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2;
|
||||||
|
pix.b += rct_offset;
|
||||||
|
pix.r += rct_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
void preload_rgb(in SliceContext sc, ivec2 sp, int w, int y, bool apply_rct)
|
||||||
|
{
|
||||||
|
for (uint x = gl_LocalInvocationID.x; x < w; x += gl_WorkGroupSize.x) {
|
||||||
|
ivec2 lpos = sp + LADDR(ivec2(x, y));
|
||||||
|
ivec2 pos = sc.slice_pos + ivec2(x, y);
|
||||||
|
|
||||||
|
ivec4 pix = load_components(pos);
|
||||||
|
|
||||||
|
if (expectEXT(apply_rct, true))
|
||||||
|
transform_sample(pix, sc.slice_rct_coef);
|
||||||
|
|
||||||
|
imageStore(tmp, lpos, pix);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void encode_slice(inout SliceContext sc, const uint slice_idx)
|
void encode_slice(inout SliceContext sc, const uint slice_idx)
|
||||||
{
|
{
|
||||||
|
ivec2 sp = sc.slice_pos;
|
||||||
|
|
||||||
#ifndef RGB
|
#ifndef RGB
|
||||||
int bits = bits_per_raw_sample;
|
int bits = bits_per_raw_sample;
|
||||||
#else
|
#else
|
||||||
int bits = 9;
|
int bits = 9;
|
||||||
if (bits != 8 || sc.slice_coding_mode != 0)
|
if (bits != 8 || sc.slice_coding_mode != 0)
|
||||||
bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1);
|
bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1);
|
||||||
|
|
||||||
|
sp.y = int(gl_WorkGroupID.y)*RGB_LINECACHE;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef GOLOMB
|
#ifndef GOLOMB
|
||||||
@ -222,15 +228,17 @@ void encode_slice(inout SliceContext sc, const uint slice_idx)
|
|||||||
int comp = c - p;
|
int comp = c - p;
|
||||||
|
|
||||||
for (int y = 0; y < h; y++)
|
for (int y = 0; y < h; y++)
|
||||||
encode_line_pcm(sc, y, p, comp, bits);
|
encode_line_pcm(sc, src[p], sp, y, p, comp, bits);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
for (int y = 0; y < sc.slice_dim.y; y++) {
|
for (int y = 0; y < sc.slice_dim.y; y++) {
|
||||||
encode_line_pcm(sc, y, 0, 1, bits);
|
preload_rgb(sc, sp, sc.slice_dim.x, y, false);
|
||||||
encode_line_pcm(sc, y, 0, 2, bits);
|
|
||||||
encode_line_pcm(sc, y, 0, 0, bits);
|
encode_line_pcm(sc, tmp, sp, y, 0, 1, bits);
|
||||||
|
encode_line_pcm(sc, tmp, sp, y, 0, 2, bits);
|
||||||
|
encode_line_pcm(sc, tmp, sp, y, 0, 0, bits);
|
||||||
if (transparency == 1)
|
if (transparency == 1)
|
||||||
encode_line_pcm(sc, y, 0, 3, bits);
|
encode_line_pcm(sc, tmp, sp, y, 0, 3, bits);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
} else
|
} else
|
||||||
@ -252,7 +260,8 @@ void encode_slice(inout SliceContext sc, const uint slice_idx)
|
|||||||
int comp = c - p;
|
int comp = c - p;
|
||||||
|
|
||||||
for (int y = 0; y < h; y++)
|
for (int y = 0; y < h; y++)
|
||||||
encode_line(sc, slice_state_off, y, p, comp, bits, quant_table_idx[c], run_index);
|
encode_line(sc, src[p], slice_state_off, sp, y, p,
|
||||||
|
comp, bits, quant_table_idx[c], run_index);
|
||||||
|
|
||||||
/* For the second chroma plane, reuse the first plane's state */
|
/* For the second chroma plane, reuse the first plane's state */
|
||||||
if (c != 1)
|
if (c != 1)
|
||||||
@ -261,15 +270,17 @@ void encode_slice(inout SliceContext sc, const uint slice_idx)
|
|||||||
#else
|
#else
|
||||||
int run_index = 0;
|
int run_index = 0;
|
||||||
for (int y = 0; y < sc.slice_dim.y; y++) {
|
for (int y = 0; y < sc.slice_dim.y; y++) {
|
||||||
encode_line(sc, slice_state_off + plane_state_size*0,
|
preload_rgb(sc, sp, sc.slice_dim.x, y, true);
|
||||||
y, 0, 1, bits, quant_table_idx[0], run_index);
|
|
||||||
encode_line(sc, slice_state_off + plane_state_size*1,
|
encode_line(sc, tmp, slice_state_off + plane_state_size*0,
|
||||||
y, 0, 2, bits, quant_table_idx[1], run_index);
|
sp, y, 0, 1, bits, quant_table_idx[0], run_index);
|
||||||
encode_line(sc, slice_state_off + plane_state_size*1,
|
encode_line(sc, tmp, slice_state_off + plane_state_size*1,
|
||||||
y, 0, 0, bits, quant_table_idx[2], run_index);
|
sp, y, 0, 2, bits, quant_table_idx[1], run_index);
|
||||||
|
encode_line(sc, tmp, slice_state_off + plane_state_size*1,
|
||||||
|
sp, y, 0, 0, bits, quant_table_idx[2], run_index);
|
||||||
if (transparency == 1)
|
if (transparency == 1)
|
||||||
encode_line(sc, slice_state_off + plane_state_size*2,
|
encode_line(sc, tmp, slice_state_off + plane_state_size*2,
|
||||||
y, 0, 3, bits, quant_table_idx[3], run_index);
|
sp, y, 0, 3, bits, quant_table_idx[3], run_index);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -26,6 +26,8 @@
|
|||||||
#include "libavutil/vulkan_spirv.h"
|
#include "libavutil/vulkan_spirv.h"
|
||||||
#include "libavutil/mem.h"
|
#include "libavutil/mem.h"
|
||||||
|
|
||||||
|
#define RGB_LINECACHE 2
|
||||||
|
|
||||||
extern const char *ff_source_common_comp;
|
extern const char *ff_source_common_comp;
|
||||||
extern const char *ff_source_rangecoder_comp;
|
extern const char *ff_source_rangecoder_comp;
|
||||||
extern const char *ff_source_ffv1_vlc_comp;
|
extern const char *ff_source_ffv1_vlc_comp;
|
||||||
@ -610,6 +612,7 @@ static void define_shared_code(FFVulkanShader *shd, int use32bit)
|
|||||||
|
|
||||||
GLSLC(0, #define DECODE );
|
GLSLC(0, #define DECODE );
|
||||||
|
|
||||||
|
av_bprintf(&shd->src, "#define RGB_LINECACHE %i\n" ,RGB_LINECACHE);
|
||||||
av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n" ,CONTEXT_SIZE);
|
av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n" ,CONTEXT_SIZE);
|
||||||
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n" ,MAX_QUANT_TABLE_MASK);
|
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n" ,MAX_QUANT_TABLE_MASK);
|
||||||
|
|
||||||
@ -936,7 +939,7 @@ static int init_indirect(AVCodecContext *avctx, FFVulkanContext *s,
|
|||||||
frames_ctx->format = AV_PIX_FMT_VULKAN;
|
frames_ctx->format = AV_PIX_FMT_VULKAN;
|
||||||
frames_ctx->sw_format = sw_format;
|
frames_ctx->sw_format = sw_format;
|
||||||
frames_ctx->width = s->frames->width;
|
frames_ctx->width = s->frames->width;
|
||||||
frames_ctx->height = f->num_v_slices*2;
|
frames_ctx->height = f->num_v_slices*RGB_LINECACHE;
|
||||||
|
|
||||||
vk_frames = frames_ctx->hwctx;
|
vk_frames = frames_ctx->hwctx;
|
||||||
vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL;
|
vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||||
|
Reference in New Issue
Block a user