mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
ffv1enc: add a Vulkan encoder
This commit implements a standard, compliant, version 3 and version 4 FFv1 encoder, entirely in Vulkan. The encoder is written in standard GLSL and requires a Vulkan 1.3 supporting GPU with the BDA extension. The encoder can use any amount of slices, but nominally, should use 32x32 slices (1024 in total) to maximize parallelism. All features are supported, as well as all pixel formats. This includes: - Rice - Range coding with a custom quantization table - PCM encoding CRC calculation is also massively parallelized on the GPU. Encoding of unaligned dimensions on subsampled data requires version 4, or requires oversizing the image to 64-pixel alignment and cropping out the padding via container flags. Performance-wise, this makes 1080p real-time screen capture possible at 60fps on even modest GPUs.
This commit is contained in:
parent
a6c58353ac
commit
ed2391d341
1
configure
vendored
1
configure
vendored
@ -2951,6 +2951,7 @@ exr_decoder_deps="zlib"
|
||||
exr_encoder_deps="zlib"
|
||||
ffv1_decoder_select="rangecoder"
|
||||
ffv1_encoder_select="rangecoder"
|
||||
ffv1_vulkan_encoder_select="vulkan spirv_compiler"
|
||||
ffvhuff_decoder_select="huffyuv_decoder"
|
||||
ffvhuff_encoder_select="huffyuv_encoder"
|
||||
fic_decoder_select="golomb"
|
||||
|
@ -370,6 +370,7 @@ OBJS-$(CONFIG_EXR_ENCODER) += exrenc.o float2half.o
|
||||
OBJS-$(CONFIG_FASTAUDIO_DECODER) += fastaudio.o
|
||||
OBJS-$(CONFIG_FFV1_DECODER) += ffv1dec.o ffv1.o
|
||||
OBJS-$(CONFIG_FFV1_ENCODER) += ffv1enc.o ffv1.o
|
||||
OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += ffv1enc.o ffv1.o ffv1enc_vulkan.o
|
||||
OBJS-$(CONFIG_FFWAVESYNTH_DECODER) += ffwavesynth.o
|
||||
OBJS-$(CONFIG_FIC_DECODER) += fic.o
|
||||
OBJS-$(CONFIG_FITS_DECODER) += fitsdec.o fits.o
|
||||
|
@ -116,6 +116,7 @@ extern const FFCodec ff_escape130_decoder;
|
||||
extern const FFCodec ff_exr_encoder;
|
||||
extern const FFCodec ff_exr_decoder;
|
||||
extern const FFCodec ff_ffv1_encoder;
|
||||
extern const FFCodec ff_ffv1_vulkan_encoder;
|
||||
extern const FFCodec ff_ffv1_decoder;
|
||||
extern const FFCodec ff_ffvhuff_encoder;
|
||||
extern const FFCodec ff_ffvhuff_decoder;
|
||||
|
1604
libavcodec/ffv1enc_vulkan.c
Normal file
1604
libavcodec/ffv1enc_vulkan.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -3,6 +3,14 @@ GEN_CLEANSUFFIXES = *.o *.c *.d
|
||||
clean::
|
||||
$(RM) $(GEN_CLEANSUFFIXES:%=libavcodec/vulkan/%)
|
||||
|
||||
OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += vulkan/common.o \
|
||||
vulkan/rangecoder.o vulkan/ffv1_vlc.o \
|
||||
vulkan/ffv1_common.o vulkan/ffv1_reset.o \
|
||||
vulkan/ffv1_enc_common.o \
|
||||
vulkan/ffv1_enc_rct.o vulkan/ffv1_enc_setup.o \
|
||||
vulkan/ffv1_enc_vlc.o vulkan/ffv1_enc_ac.o \
|
||||
vulkan/ffv1_enc.o vulkan/ffv1_enc_rgb.o
|
||||
|
||||
VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavcodec/vulkan/*.comp))
|
||||
.SECONDARY: $(VULKAN:.comp=.c)
|
||||
libavcodec/vulkan/%.c: TAG = VULKAN
|
||||
|
170
libavcodec/vulkan/common.comp
Normal file
170
libavcodec/vulkan/common.comp
Normal file
@ -0,0 +1,170 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
layout(buffer_reference, buffer_reference_align = 1) buffer u8buf {
|
||||
uint8_t v;
|
||||
};
|
||||
|
||||
layout(buffer_reference, buffer_reference_align = 2) buffer u16buf {
|
||||
uint16_t v;
|
||||
};
|
||||
|
||||
layout(buffer_reference, buffer_reference_align = 4) buffer u32buf {
|
||||
uint32_t v;
|
||||
};
|
||||
|
||||
layout(buffer_reference, buffer_reference_align = 8) buffer u64buf {
|
||||
uint64_t v;
|
||||
};
|
||||
|
||||
#define OFFBUF(type, b, l) \
|
||||
type(uint64_t(b) + uint64_t(l))
|
||||
|
||||
#define zero_extend(a, p) \
|
||||
((a) & ((1 << (p)) - 1))
|
||||
|
||||
#define sign_extend(val, bits) \
|
||||
bitfieldExtract(val, 0, bits)
|
||||
|
||||
#define fold(diff, bits) \
|
||||
sign_extend(diff, bits)
|
||||
|
||||
#define mid_pred(a, b, c) \
|
||||
max(min((a), (b)), min(max((a), (b)), (c)))
|
||||
|
||||
/* TODO: optimize */
|
||||
uint align(uint src, uint a)
|
||||
{
|
||||
uint res = src % a;
|
||||
if (res == 0)
|
||||
return src;
|
||||
return src + a - res;
|
||||
}
|
||||
|
||||
/* TODO: optimize */
|
||||
uint64_t align64(uint64_t src, uint64_t a)
|
||||
{
|
||||
uint64_t res = src % a;
|
||||
if (res == 0)
|
||||
return src;
|
||||
return src + a - res;
|
||||
}
|
||||
|
||||
#define reverse4(src) \
|
||||
(pack32(unpack8(uint32_t(src)).wzxy))
|
||||
|
||||
uint64_t reverse8(uint64_t src)
|
||||
{
|
||||
u32vec2 tmp = unpack32(src);
|
||||
tmp.x = reverse4(tmp.x);
|
||||
tmp.y = reverse4(tmp.y);
|
||||
return pack64(tmp.yx);
|
||||
}
|
||||
|
||||
#ifdef PB_32
|
||||
#define BIT_BUF_TYPE uint32_t
|
||||
#define BUF_TYPE u32buf
|
||||
#define BUF_REVERSE(src) reverse4(src)
|
||||
#define BUF_BITS uint8_t(32)
|
||||
#define BUF_BYTES uint8_t(4)
|
||||
#define BYTE_EXTRACT(src, byte_off) \
|
||||
(uint8_t(bitfieldExtract((src), ((byte_off) << 3), 8)))
|
||||
#else
|
||||
#define BIT_BUF_TYPE uint64_t
|
||||
#define BUF_TYPE u64buf
|
||||
#define BUF_REVERSE(src) reverse8(src)
|
||||
#define BUF_BITS uint8_t(64)
|
||||
#define BUF_BYTES uint8_t(8)
|
||||
#define BYTE_EXTRACT(src, byte_off) \
|
||||
(uint8_t(((src) >> ((byte_off) << 3)) & 0xFF))
|
||||
#endif
|
||||
|
||||
struct PutBitContext {
|
||||
uint64_t buf_start;
|
||||
uint64_t buf;
|
||||
|
||||
BIT_BUF_TYPE bit_buf;
|
||||
uint8_t bit_left;
|
||||
};
|
||||
|
||||
void put_bits(inout PutBitContext pb, const uint32_t n, uint32_t value)
|
||||
{
|
||||
if (n < pb.bit_left) {
|
||||
pb.bit_buf = (pb.bit_buf << n) | value;
|
||||
pb.bit_left -= uint8_t(n);
|
||||
} else {
|
||||
pb.bit_buf <<= pb.bit_left;
|
||||
pb.bit_buf |= (value >> (n - pb.bit_left));
|
||||
|
||||
#ifdef PB_UNALIGNED
|
||||
u8buf bs = u8buf(pb.buf);
|
||||
[[unroll]]
|
||||
for (uint8_t i = uint8_t(0); i < BUF_BYTES; i++)
|
||||
bs[i].v = BYTE_EXTRACT(pb.bit_buf, BUF_BYTES - uint8_t(1) - i);
|
||||
#else
|
||||
#ifdef DEBUG
|
||||
if ((pb.buf % BUF_BYTES) != 0)
|
||||
debugPrintfEXT("put_bits buffer is not aligned!");
|
||||
#endif
|
||||
|
||||
BUF_TYPE bs = BUF_TYPE(pb.buf);
|
||||
bs.v = BUF_REVERSE(pb.bit_buf);
|
||||
#endif
|
||||
pb.buf = uint64_t(bs) + BUF_BYTES;
|
||||
|
||||
pb.bit_left += BUF_BITS - uint8_t(n);
|
||||
pb.bit_buf = value;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t flush_put_bits(inout PutBitContext pb)
|
||||
{
|
||||
/* Align bits to MSBs */
|
||||
if (pb.bit_left < BUF_BITS)
|
||||
pb.bit_buf <<= pb.bit_left;
|
||||
|
||||
if (pb.bit_left < BUF_BITS) {
|
||||
uint to_write = ((BUF_BITS - pb.bit_left) >> 3) + 1;
|
||||
|
||||
u8buf bs = u8buf(pb.buf);
|
||||
for (int i = 0; i < to_write; i++)
|
||||
bs[i].v = BYTE_EXTRACT(pb.bit_buf, BUF_BYTES - uint8_t(1) - i);
|
||||
pb.buf = uint64_t(bs) + BUF_BYTES;
|
||||
}
|
||||
|
||||
pb.bit_left = BUF_BITS;
|
||||
pb.bit_buf = 0x0;
|
||||
|
||||
return uint32_t(pb.buf - pb.buf_start);
|
||||
}
|
||||
|
||||
void init_put_bits(out PutBitContext pb, u8buf data, uint64_t len)
|
||||
{
|
||||
pb.buf_start = uint64_t(data);
|
||||
pb.buf = uint64_t(data);
|
||||
|
||||
pb.bit_buf = 0;
|
||||
pb.bit_left = BUF_BITS;
|
||||
}
|
||||
|
||||
uint64_t put_bits_count(in PutBitContext pb)
|
||||
{
|
||||
return (pb.buf - pb.buf_start)*8 + BUF_BITS - pb.bit_left;
|
||||
}
|
74
libavcodec/vulkan/ffv1_common.comp
Normal file
74
libavcodec/vulkan/ffv1_common.comp
Normal file
@ -0,0 +1,74 @@
|
||||
/*
|
||||
* FFv1 codec
|
||||
*
|
||||
* Copyright (c) 2024 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
struct SliceContext {
|
||||
RangeCoder c;
|
||||
|
||||
#ifdef GOLOMB
|
||||
PutBitContext pb; /* 8*8 bytes */
|
||||
#endif
|
||||
|
||||
ivec2 slice_dim;
|
||||
ivec2 slice_pos;
|
||||
ivec2 slice_rct_coef;
|
||||
|
||||
uint hdr_len; // only used for golomb
|
||||
int slice_coding_mode;
|
||||
};
|
||||
|
||||
/* -1, { -1, 0 } */
|
||||
int predict(int L, ivec2 top)
|
||||
{
|
||||
return mid_pred(L, L + top[1] - top[0], top[1]);
|
||||
}
|
||||
|
||||
/* { -2, -1 }, { -1, 0, 1 }, 0 */
|
||||
int get_context(VTYPE2 cur_l, VTYPE3 top_l, TYPE top2, uint8_t quant_table_idx)
|
||||
{
|
||||
const int LT = top_l[0]; /* -1 */
|
||||
const int T = top_l[1]; /* 0 */
|
||||
const int RT = top_l[2]; /* 1 */
|
||||
const int L = cur_l[1]; /* -1 */
|
||||
|
||||
int base = quant_table[quant_table_idx][0][(L - LT) & MAX_QUANT_TABLE_MASK] +
|
||||
quant_table[quant_table_idx][1][(LT - T) & MAX_QUANT_TABLE_MASK] +
|
||||
quant_table[quant_table_idx][2][(T - RT) & MAX_QUANT_TABLE_MASK];
|
||||
|
||||
if ((quant_table[quant_table_idx][3][127] == 0) &&
|
||||
(quant_table[quant_table_idx][4][127] == 0))
|
||||
return base;
|
||||
|
||||
const int TT = top2; /* -2 */
|
||||
const int LL = cur_l[0]; /* -2 */
|
||||
return base +
|
||||
quant_table[quant_table_idx][3][(LL - L) & MAX_QUANT_TABLE_MASK] +
|
||||
quant_table[quant_table_idx][4][(TT - T) & MAX_QUANT_TABLE_MASK];
|
||||
}
|
||||
|
||||
const uint32_t log2_run[41] = {
|
||||
0, 0, 0, 0, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 3, 3, 3, 3,
|
||||
4, 4, 5, 5, 6, 6, 7, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24,
|
||||
};
|
67
libavcodec/vulkan/ffv1_enc.comp
Normal file
67
libavcodec/vulkan/ffv1_enc.comp
Normal file
@ -0,0 +1,67 @@
|
||||
/*
|
||||
* FFv1 codec
|
||||
*
|
||||
* Copyright (c) 2024 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
void encode_slice(inout SliceContext sc, const uint slice_idx)
|
||||
{
|
||||
int bits = bits_per_raw_sample;
|
||||
|
||||
#ifndef GOLOMB
|
||||
if (sc.slice_coding_mode == 1) {
|
||||
for (int p = 0; p < planes; p++) {
|
||||
|
||||
int h = sc.slice_dim.y;
|
||||
if (p > 0 && p < 3)
|
||||
h >>= chroma_shift.y;
|
||||
|
||||
for (int y = 0; y < h; y++)
|
||||
encode_line_pcm(sc, y, p, 0, bits);
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
uint64_t slice_state_off = uint64_t(slice_state) +
|
||||
slice_idx*plane_state_size*codec_planes;
|
||||
|
||||
for (int p = 0; p < planes; p++) {
|
||||
int run_index = 0;
|
||||
|
||||
int h = sc.slice_dim.y;
|
||||
if (p > 0 && p < 3)
|
||||
h >>= chroma_shift.y;
|
||||
|
||||
for (int y = 0; y < h; y++)
|
||||
encode_line(sc, slice_state_off, y, p, 0, bits, run_index);
|
||||
|
||||
/* For the second chroma plane, reuse the first plane's state */
|
||||
if (p != 1)
|
||||
slice_state_off += plane_state_size;
|
||||
}
|
||||
}
|
||||
|
||||
finalize_slice(sc, slice_idx);
|
||||
}
|
||||
|
||||
void main(void)
|
||||
{
|
||||
const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
|
||||
encode_slice(slice_ctx[slice_idx], slice_idx);
|
||||
}
|
83
libavcodec/vulkan/ffv1_enc_ac.comp
Normal file
83
libavcodec/vulkan/ffv1_enc_ac.comp
Normal file
@ -0,0 +1,83 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
void put_rac(inout RangeCoder c, uint64_t state, bool bit)
|
||||
{
|
||||
put_rac_norenorm(c, state, bit);
|
||||
if (c.range < 0x100)
|
||||
renorm_encoder(c);
|
||||
}
|
||||
|
||||
/* Note - only handles signed values */
|
||||
void put_symbol(inout RangeCoder c, uint64_t state, int v)
|
||||
{
|
||||
bool is_nil = (v == 0);
|
||||
put_rac(c, state, is_nil);
|
||||
if (is_nil)
|
||||
return;
|
||||
|
||||
const int a = abs(v);
|
||||
const int e = findMSB(a);
|
||||
|
||||
state += 1;
|
||||
for (int i = 0; i < e; i++)
|
||||
put_rac(c, state + min(i, 9), true);
|
||||
put_rac(c, state + min(e, 9), false);
|
||||
|
||||
state += 21;
|
||||
for (int i = e - 1; i >= 0; i--)
|
||||
put_rac(c, state + min(i, 9), bool(bitfieldExtract(a, i, 1)));
|
||||
|
||||
put_rac(c, state - 11 + min(e, 10), v < 0);
|
||||
}
|
||||
|
||||
void encode_line_pcm(inout SliceContext sc, int y, int p, int comp,
|
||||
int bits)
|
||||
{
|
||||
ivec2 sp = sc.slice_pos;
|
||||
int w = sc.slice_dim.x;
|
||||
if (p > 0 && p < 3) {
|
||||
w >>= chroma_shift.x;
|
||||
sp >>= chroma_shift;
|
||||
}
|
||||
|
||||
for (int x = 0; x < w; x++) {
|
||||
uint v = imageLoad(src[p], (sp + ivec2(x, y)))[comp];
|
||||
for (int i = (bits - 1); i >= 0; i--)
|
||||
put_rac_equi(sc.c, bool(bitfieldExtract(v, i, 1)));
|
||||
}
|
||||
}
|
||||
|
||||
void encode_line(inout SliceContext sc, uint64_t state,
|
||||
int y, int p, int comp, int bits, const int run_index)
|
||||
{
|
||||
ivec2 sp = sc.slice_pos;
|
||||
|
||||
int w = sc.slice_dim.x;
|
||||
if (p > 0 && p < 3) {
|
||||
w >>= chroma_shift.x;
|
||||
sp >>= chroma_shift;
|
||||
}
|
||||
|
||||
for (int x = 0; x < w; x++) {
|
||||
const ivec2 d = get_diff(sp + ivec2(x, y), ivec2(x, y), p, comp, w, bits);
|
||||
put_symbol(sc.c, state + CONTEXT_SIZE*d[0], d[1]);
|
||||
}
|
||||
}
|
101
libavcodec/vulkan/ffv1_enc_common.comp
Normal file
101
libavcodec/vulkan/ffv1_enc_common.comp
Normal file
@ -0,0 +1,101 @@
|
||||
/*
|
||||
* FFv1 codec
|
||||
*
|
||||
* Copyright (c) 2024 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
ivec2 get_diff(ivec2 pos, ivec2 off, int p, int comp, int sw, int bits)
|
||||
{
|
||||
const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);
|
||||
const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
|
||||
|
||||
TYPE top2 = TYPE(0);
|
||||
if (off.y > 1)
|
||||
top2 = TYPE(imageLoad(src[p], pos + ivec2(0, -2))[comp]);
|
||||
|
||||
VTYPE3 top = VTYPE3(TYPE(0),
|
||||
TYPE(0),
|
||||
TYPE(0));
|
||||
if (off.y > 0 && off != ivec2(0, 1))
|
||||
top[0] = TYPE(imageLoad(src[p], pos + ivec2(-1, -1) + yoff_border1)[comp]);
|
||||
if (off.y > 0) {
|
||||
top[1] = TYPE(imageLoad(src[p], pos + ivec2(0, -1))[comp]);
|
||||
top[2] = TYPE(imageLoad(src[p], pos + ivec2(min(1, sw - off.x - 1), -1))[comp]);
|
||||
}
|
||||
|
||||
VTYPE3 cur = VTYPE3(TYPE(0),
|
||||
TYPE(0),
|
||||
imageLoad(src[p], pos)[comp]);
|
||||
if (off.x > 0 && off != ivec2(1, 0))
|
||||
cur[0] = TYPE(imageLoad(src[p], pos + ivec2(-2, 0) + yoff_border2)[comp]);
|
||||
if (off != ivec2(0, 0))
|
||||
cur[1] = TYPE(imageLoad(src[p], pos + ivec2(-1, 0) + yoff_border1)[comp]);
|
||||
|
||||
/* context, diff */
|
||||
ivec2 d = ivec2(get_context(VTYPE2(cur), top, top2, context_model),
|
||||
cur[2] - predict(cur[1], VTYPE2(top)));
|
||||
|
||||
if (d[0] < 0)
|
||||
d = -d;
|
||||
|
||||
d[1] = fold(d[1], bits);
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
void finalize_slice(inout SliceContext sc, const uint slice_idx)
|
||||
{
|
||||
#ifdef GOLOMB
|
||||
uint32_t enc_len = sc.hdr_len + flush_put_bits(sc.pb);
|
||||
#else
|
||||
uint32_t enc_len = rac_terminate(sc.c);
|
||||
#endif
|
||||
|
||||
u8buf bs = u8buf(sc.c.bytestream_start);
|
||||
|
||||
/* Append slice length */
|
||||
u8vec4 enc_len_p = unpack8(enc_len);
|
||||
bs[enc_len + 0].v = enc_len_p.z;
|
||||
bs[enc_len + 1].v = enc_len_p.y;
|
||||
bs[enc_len + 2].v = enc_len_p.x;
|
||||
enc_len += 3;
|
||||
|
||||
/* Calculate and write CRC */
|
||||
if (ec != 0) {
|
||||
bs[enc_len].v = uint8_t(0);
|
||||
enc_len++;
|
||||
|
||||
uint32_t crc = crcref;
|
||||
for (int i = 0; i < enc_len; i++)
|
||||
crc = crc_ieee[(crc & 0xFF) ^ uint32_t(bs[i].v)] ^ (crc >> 8);
|
||||
|
||||
if (crcref != 0x00000000)
|
||||
crc ^= 0x8CD88196;
|
||||
|
||||
u8vec4 crc_p = unpack8(crc);
|
||||
bs[enc_len + 0].v = crc_p.x;
|
||||
bs[enc_len + 1].v = crc_p.y;
|
||||
bs[enc_len + 2].v = crc_p.z;
|
||||
bs[enc_len + 3].v = crc_p.w;
|
||||
enc_len += 4;
|
||||
}
|
||||
|
||||
slice_results[slice_idx*2 + 0] = enc_len;
|
||||
slice_results[slice_idx*2 + 1] = uint32_t(uint64_t(bs) - uint64_t(out_data));
|
||||
}
|
82
libavcodec/vulkan/ffv1_enc_rct.comp
Normal file
82
libavcodec/vulkan/ffv1_enc_rct.comp
Normal file
@ -0,0 +1,82 @@
|
||||
/*
|
||||
* FFv1 codec
|
||||
*
|
||||
* Copyright (c) 2024 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
ivec4 load_components(ivec2 pos)
|
||||
{
|
||||
if (planar_rgb == 0)
|
||||
return ivec4(imageLoad(src[0], pos));
|
||||
|
||||
ivec4 pix;
|
||||
for (int i = 0; i < (3 + transparency); i++)
|
||||
pix[i] = int(imageLoad(src[i], pos)[0]);
|
||||
|
||||
/* Swizzle out the difference */
|
||||
if (transparency > 0)
|
||||
return pix.brga;
|
||||
return pix.bgra;
|
||||
}
|
||||
|
||||
void bypass_sample(ivec2 pos)
|
||||
{
|
||||
imageStore(dst[0], pos, load_components(pos));
|
||||
}
|
||||
|
||||
void bypass_block(in SliceContext sc)
|
||||
{
|
||||
ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos;
|
||||
ivec2 end = sc.slice_pos + sc.slice_dim;
|
||||
for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y)
|
||||
for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x)
|
||||
bypass_sample(ivec2(x, y));
|
||||
}
|
||||
|
||||
void transform_sample(ivec2 pos, ivec2 rct_coef)
|
||||
{
|
||||
ivec4 pix = load_components(pos);
|
||||
pix.b -= pix.g;
|
||||
pix.r -= pix.g;
|
||||
pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2;
|
||||
pix.b += offset;
|
||||
pix.r += offset;
|
||||
imageStore(dst[0], pos, pix);
|
||||
}
|
||||
|
||||
void transform_block(in SliceContext sc)
|
||||
{
|
||||
const ivec2 rct_coef = sc.slice_rct_coef;
|
||||
const ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos;
|
||||
const ivec2 end = sc.slice_pos + sc.slice_dim;
|
||||
|
||||
for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y)
|
||||
for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x)
|
||||
transform_sample(ivec2(x, y), rct_coef);
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
|
||||
|
||||
if (slice_ctx[slice_idx].slice_coding_mode == 1)
|
||||
bypass_block(slice_ctx[slice_idx]);
|
||||
else
|
||||
transform_block(slice_ctx[slice_idx]);
|
||||
}
|
83
libavcodec/vulkan/ffv1_enc_rgb.comp
Normal file
83
libavcodec/vulkan/ffv1_enc_rgb.comp
Normal file
@ -0,0 +1,83 @@
|
||||
/*
|
||||
* FFv1 codec
|
||||
*
|
||||
* Copyright (c) 2024 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
void encode_slice_rgb(inout SliceContext sc, const uint slice_idx)
|
||||
{
|
||||
int bits = 9;
|
||||
if (bits != 8 || sc.slice_coding_mode != 0)
|
||||
bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1);
|
||||
|
||||
int run_index = 0;
|
||||
|
||||
#ifndef GOLOMB
|
||||
if (sc.slice_coding_mode == 1) {
|
||||
if (transparency == 1) {
|
||||
for (int y = 0; y < sc.slice_dim.y; y++) {
|
||||
encode_line_pcm(sc, y, 0, 1, bits);
|
||||
encode_line_pcm(sc, y, 0, 2, bits);
|
||||
encode_line_pcm(sc, y, 0, 0, bits);
|
||||
encode_line_pcm(sc, y, 0, 3, bits);
|
||||
}
|
||||
} else {
|
||||
for (int y = 0; y < sc.slice_dim.y; y++) {
|
||||
encode_line_pcm(sc, y, 0, 1, bits);
|
||||
encode_line_pcm(sc, y, 0, 2, bits);
|
||||
encode_line_pcm(sc, y, 0, 0, bits);
|
||||
}
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
uint64_t slice_state_off = uint64_t(slice_state) +
|
||||
slice_idx*plane_state_size*codec_planes;
|
||||
|
||||
if (transparency == 1) {
|
||||
for (int y = 0; y < sc.slice_dim.y; y++) {
|
||||
encode_line(sc, slice_state_off + plane_state_size*0,
|
||||
y, 0, 1, bits, run_index);
|
||||
encode_line(sc, slice_state_off + plane_state_size*1,
|
||||
y, 0, 2, bits, run_index);
|
||||
encode_line(sc, slice_state_off + plane_state_size*1,
|
||||
y, 0, 0, bits, run_index);
|
||||
encode_line(sc, slice_state_off + plane_state_size*2,
|
||||
y, 0, 3, bits, run_index);
|
||||
}
|
||||
} else {
|
||||
for (int y = 0; y < sc.slice_dim.y; y++) {
|
||||
encode_line(sc, slice_state_off + plane_state_size*0,
|
||||
y, 0, 1, bits, run_index);
|
||||
encode_line(sc, slice_state_off + plane_state_size*1,
|
||||
y, 0, 2, bits, run_index);
|
||||
encode_line(sc, slice_state_off + plane_state_size*1,
|
||||
y, 0, 0, bits, run_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
finalize_slice(sc, slice_idx);
|
||||
}
|
||||
|
||||
void main(void)
|
||||
{
|
||||
const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
|
||||
encode_slice_rgb(slice_ctx[slice_idx], slice_idx);
|
||||
}
|
151
libavcodec/vulkan/ffv1_enc_setup.comp
Normal file
151
libavcodec/vulkan/ffv1_enc_setup.comp
Normal file
@ -0,0 +1,151 @@
|
||||
/*
|
||||
* FFv1 codec
|
||||
*
|
||||
* Copyright (c) 2024 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift)
|
||||
{
|
||||
uint mpw = 1 << chroma_shift;
|
||||
uint awidth = align(width, mpw);
|
||||
|
||||
if ((version < 4) || ((version == 4) && (micro_version < 3)))
|
||||
return width * sx / num_h_slices;
|
||||
|
||||
sx = (2 * awidth * sx + num_h_slices * mpw) / (2 * num_h_slices * mpw) * mpw;
|
||||
if (sx == awidth)
|
||||
sx = width;
|
||||
|
||||
return sx;
|
||||
}
|
||||
|
||||
void init_slice(out SliceContext sc, const uint slice_idx)
|
||||
{
|
||||
/* Set coordinates */
|
||||
uvec2 img_size = imageSize(src[0]);
|
||||
uint sxs = slice_coord(img_size.x, gl_WorkGroupID.x + 0,
|
||||
gl_NumWorkGroups.x, chroma_shift.x);
|
||||
uint sxe = slice_coord(img_size.x, gl_WorkGroupID.x + 1,
|
||||
gl_NumWorkGroups.x, chroma_shift.x);
|
||||
uint sys = slice_coord(img_size.y, gl_WorkGroupID.y + 0,
|
||||
gl_NumWorkGroups.y, chroma_shift.y);
|
||||
uint sye = slice_coord(img_size.y, gl_WorkGroupID.y + 1,
|
||||
gl_NumWorkGroups.y, chroma_shift.y);
|
||||
|
||||
sc.slice_pos = ivec2(sxs, sys);
|
||||
sc.slice_dim = ivec2(sxe - sxs, sye - sys);
|
||||
sc.slice_rct_coef = ivec2(1, 1);
|
||||
|
||||
rac_init(sc.c,
|
||||
OFFBUF(u8buf, out_data, slice_idx * slice_size_max),
|
||||
slice_size_max);
|
||||
}
|
||||
|
||||
void put_rac_full(inout RangeCoder c, uint64_t state, bool bit)
|
||||
{
|
||||
put_rac_norenorm(c, state, bit);
|
||||
if (c.range < 0x100)
|
||||
renorm_encoder_full(c);
|
||||
}
|
||||
|
||||
void put_symbol_unsigned(inout RangeCoder c, uint64_t state, uint v)
|
||||
{
|
||||
bool is_nil = (v == 0);
|
||||
put_rac_full(c, state, is_nil);
|
||||
if (is_nil)
|
||||
return;
|
||||
|
||||
const int e = findMSB(v);
|
||||
|
||||
state += 1;
|
||||
for (int i = 0; i < e; i++)
|
||||
put_rac_full(c, state + min(i, 9), true);
|
||||
put_rac_full(c, state + min(e, 9), false);
|
||||
|
||||
state += 21;
|
||||
for (int i = e - 1; i >= 0; i--)
|
||||
put_rac_full(c, state + min(i, 9), bool(bitfieldExtract(v, i, 1)));
|
||||
}
|
||||
|
||||
void write_slice_header(inout SliceContext sc, uint64_t state)
|
||||
{
|
||||
u8buf sb = u8buf(state);
|
||||
|
||||
[[unroll]]
|
||||
for (int i = 0; i < CONTEXT_SIZE; i++)
|
||||
sb[i].v = uint8_t(128);
|
||||
|
||||
put_symbol_unsigned(sc.c, state, gl_WorkGroupID.x);
|
||||
put_symbol_unsigned(sc.c, state, gl_WorkGroupID.y);
|
||||
put_symbol_unsigned(sc.c, state, 0);
|
||||
put_symbol_unsigned(sc.c, state, 0);
|
||||
|
||||
for (int i = 0; i < codec_planes; i++)
|
||||
put_symbol_unsigned(sc.c, state, context_model);
|
||||
|
||||
put_symbol_unsigned(sc.c, state, pic_mode);
|
||||
put_symbol_unsigned(sc.c, state, sar.x);
|
||||
put_symbol_unsigned(sc.c, state, sar.y);
|
||||
|
||||
if (version >= 4) {
|
||||
put_rac_full(sc.c, state, sc.slice_coding_mode == 1);
|
||||
put_symbol_unsigned(sc.c, state, sc.slice_coding_mode);
|
||||
if (sc.slice_coding_mode != 1 && colorspace == 1) {
|
||||
put_symbol_unsigned(sc.c, state, sc.slice_rct_coef.y);
|
||||
put_symbol_unsigned(sc.c, state, sc.slice_rct_coef.x);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void write_frame_header(inout SliceContext sc, uint64_t state)
|
||||
{
|
||||
u8buf sb = u8buf(state);
|
||||
sb.v = uint8_t(128);
|
||||
put_rac_full(sc.c, state, bool(key_frame));
|
||||
}
|
||||
|
||||
#ifdef GOLOMB
|
||||
void init_golomb(inout SliceContext sc)
|
||||
{
|
||||
sc.hdr_len = rac_terminate(sc.c);
|
||||
init_put_bits(sc.pb,
|
||||
OFFBUF(u8buf, sc.c.bytestream_start, sc.hdr_len),
|
||||
slice_size_max - sc.hdr_len);
|
||||
}
|
||||
#endif
|
||||
|
||||
void main(void)
|
||||
{
|
||||
const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
|
||||
|
||||
/* Write slice data */
|
||||
uint64_t scratch_state = uint64_t(scratch_data) + slice_idx*CONTEXT_SIZE;
|
||||
u8buf sb = u8buf(scratch_state);
|
||||
|
||||
init_slice(slice_ctx[slice_idx], slice_idx);
|
||||
|
||||
if (slice_idx == 0)
|
||||
write_frame_header(slice_ctx[slice_idx], scratch_state);
|
||||
|
||||
write_slice_header(slice_ctx[slice_idx], scratch_state);
|
||||
|
||||
#ifdef GOLOMB
|
||||
init_golomb(slice_ctx[slice_idx]);
|
||||
#endif
|
||||
}
|
112
libavcodec/vulkan/ffv1_enc_vlc.comp
Normal file
112
libavcodec/vulkan/ffv1_enc_vlc.comp
Normal file
@ -0,0 +1,112 @@
|
||||
/*
|
||||
* FFv1 codec
|
||||
*
|
||||
* Copyright (c) 2024 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
struct RLEState {
|
||||
int count;
|
||||
int diff;
|
||||
int index;
|
||||
bool mode;
|
||||
};
|
||||
|
||||
void calc_new_state(inout RLEState state, int context)
|
||||
{
|
||||
if (context == 0)
|
||||
state.mode = false;
|
||||
|
||||
if (!state.mode)
|
||||
return;
|
||||
|
||||
if (state.diff > 0) {
|
||||
while (state.count >= (1 << log2_run[state.index])) {
|
||||
state.count -= 1 << log2_run[state.index];
|
||||
state.index++;
|
||||
}
|
||||
if (state.index > 0)
|
||||
state.index--;
|
||||
state.count = 0;
|
||||
state.mode = false;
|
||||
if (state.diff > 0)
|
||||
state.diff--;
|
||||
} else {
|
||||
state.count++;
|
||||
}
|
||||
}
|
||||
|
||||
void encode_line(inout SliceContext sc, uint64_t state,
|
||||
int y, int p, int comp, int bits, inout int run_index)
|
||||
{
|
||||
ivec2 sp = sc.slice_pos;
|
||||
|
||||
int w = sc.slice_dim.x;
|
||||
if (p > 0 && p < 3) {
|
||||
w >>= chroma_shift.x;
|
||||
sp >>= chroma_shift;
|
||||
}
|
||||
|
||||
int run_count = 0;
|
||||
bool run_mode = false;
|
||||
|
||||
for (int x = 0; x < w; x++) {
|
||||
ivec2 d = get_diff(sp + ivec2(x, y), ivec2(x, y), p, comp, w, bits);
|
||||
|
||||
if (d[0] == 0)
|
||||
run_mode = true;
|
||||
|
||||
if (run_mode) {
|
||||
if (d[1] != 0) {
|
||||
/* A very unlikely loop */
|
||||
while (run_count >= 1 << log2_run[run_index]) {
|
||||
run_count -= 1 << log2_run[run_index];
|
||||
run_index++;
|
||||
put_bits(sc.pb, 1, 1);
|
||||
}
|
||||
|
||||
put_bits(sc.pb, 1 + log2_run[run_index], run_count);
|
||||
if (run_index != 0)
|
||||
run_index--;
|
||||
run_count = 0;
|
||||
run_mode = false;
|
||||
if (d[1] > 0)
|
||||
d[1]--;
|
||||
} else {
|
||||
run_count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (!run_mode) {
|
||||
VlcState sb = VlcState(state + VLC_STATE_SIZE*d[0]);
|
||||
Symbol sym = get_vlc_symbol(sb, d[1], bits);
|
||||
put_bits(sc.pb, sym.bits, sym.val);
|
||||
}
|
||||
}
|
||||
|
||||
if (run_mode) {
|
||||
while (run_count >= (1 << log2_run[run_index])) {
|
||||
run_count -= 1 << log2_run[run_index];
|
||||
run_index++;
|
||||
put_bits(sc.pb, 1, 1);
|
||||
}
|
||||
|
||||
if (run_count > 0)
|
||||
put_bits(sc.pb, 1, 1);
|
||||
}
|
||||
}
|
55
libavcodec/vulkan/ffv1_reset.comp
Normal file
55
libavcodec/vulkan/ffv1_reset.comp
Normal file
@ -0,0 +1,55 @@
|
||||
/*
|
||||
* FFv1 codec
|
||||
*
|
||||
* Copyright (c) 2024 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
void main(void)
|
||||
{
|
||||
const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
|
||||
|
||||
if (slice_ctx[slice_idx].slice_coding_mode == 0 && key_frame == 0)
|
||||
return;
|
||||
|
||||
uint64_t slice_state_off = uint64_t(slice_state) +
|
||||
slice_idx*plane_state_size*codec_planes;
|
||||
|
||||
#ifdef GOLOMB
|
||||
uint64_t start = slice_state_off +
|
||||
(gl_WorkGroupID.z*context_count +
|
||||
gl_LocalInvocationID.x)*VLC_STATE_SIZE;
|
||||
for (uint x = gl_LocalInvocationID.x; x < context_count; x += gl_WorkGroupSize.x) {
|
||||
VlcState sb = VlcState(start);
|
||||
sb.drift = int16_t(0);
|
||||
sb.error_sum = uint16_t(4);
|
||||
sb.bias = int8_t(0);
|
||||
sb.count = uint8_t(1);
|
||||
start += gl_WorkGroupSize.x*VLC_STATE_SIZE;
|
||||
}
|
||||
#else
|
||||
uint64_t start = slice_state_off +
|
||||
(gl_WorkGroupID.z*context_count)*CONTEXT_SIZE +
|
||||
(gl_LocalInvocationID.x << 2 /* dwords */); /* Bytes */
|
||||
uint count_total = context_count*(CONTEXT_SIZE /* bytes */ >> 2 /* dwords */);
|
||||
for (uint x = gl_LocalInvocationID.x; x < count_total; x += gl_WorkGroupSize.x) {
|
||||
u32buf(start).v = 0x80808080;
|
||||
start += gl_WorkGroupSize.x*(CONTEXT_SIZE >> 3 /* 1/8th of context */);
|
||||
}
|
||||
#endif
|
||||
}
|
122
libavcodec/vulkan/ffv1_vlc.comp
Normal file
122
libavcodec/vulkan/ffv1_vlc.comp
Normal file
@ -0,0 +1,122 @@
|
||||
/*
|
||||
* FFv1 codec
|
||||
*
|
||||
* Copyright (c) 2024 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#define VLC_STATE_SIZE 8
|
||||
layout(buffer_reference, buffer_reference_align = VLC_STATE_SIZE) buffer VlcState {
|
||||
uint32_t error_sum;
|
||||
int16_t drift;
|
||||
int8_t bias;
|
||||
uint8_t count;
|
||||
};
|
||||
|
||||
void update_vlc_state(inout VlcState state, const int v)
|
||||
{
|
||||
int drift = state.drift;
|
||||
int count = state.count;
|
||||
int bias = state.bias;
|
||||
state.error_sum += uint16_t(abs(v));
|
||||
drift += v;
|
||||
|
||||
if (count == 128) { // FIXME: variable
|
||||
count >>= 1;
|
||||
drift >>= 1;
|
||||
state.error_sum >>= 1;
|
||||
}
|
||||
count++;
|
||||
|
||||
if (drift <= -count) {
|
||||
bias = max(bias - 1, -128);
|
||||
drift = max(drift + count, -count + 1);
|
||||
} else if (drift > 0) {
|
||||
bias = min(bias + 1, 127);
|
||||
drift = min(drift - count, 0);
|
||||
}
|
||||
|
||||
state.bias = int8_t(bias);
|
||||
state.drift = int16_t(drift);
|
||||
state.count = uint8_t(count);
|
||||
}
|
||||
|
||||
struct Symbol {
|
||||
uint32_t bits;
|
||||
uint32_t val;
|
||||
};
|
||||
|
||||
Symbol set_ur_golomb(int i, int k, int limit, int esc_len)
|
||||
{
|
||||
int e;
|
||||
Symbol sym;
|
||||
|
||||
#ifdef DEBUG
|
||||
if (i < 0)
|
||||
debugPrintfEXT("Error: i is zero!");
|
||||
#endif
|
||||
|
||||
e = i >> k;
|
||||
if (e < limit) {
|
||||
sym.bits = e + k + 1;
|
||||
sym.val = (1 << k) + zero_extend(i, k);
|
||||
} else {
|
||||
sym.bits = limit + esc_len;
|
||||
sym.val = i - limit + 1;
|
||||
}
|
||||
|
||||
return sym;
|
||||
}
|
||||
|
||||
/**
|
||||
* write signed golomb rice code (ffv1).
|
||||
*/
|
||||
Symbol set_sr_golomb(int i, int k, int limit, int esc_len)
|
||||
{
|
||||
int v;
|
||||
|
||||
v = -2 * i - 1;
|
||||
v ^= (v >> 31);
|
||||
|
||||
return set_ur_golomb(v, k, limit, esc_len);
|
||||
}
|
||||
|
||||
Symbol get_vlc_symbol(inout VlcState state, int v, int bits)
|
||||
{
|
||||
int i, k, code;
|
||||
Symbol sym;
|
||||
v = fold(v - int(state.bias), bits);
|
||||
|
||||
i = state.count;
|
||||
k = 0;
|
||||
while (i < state.error_sum) { // FIXME: optimize
|
||||
k++;
|
||||
i += i;
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
if (k > 16)
|
||||
debugPrintfEXT("Error: k > 16!");
|
||||
#endif
|
||||
|
||||
code = v ^ ((2 * state.drift + state.count) >> 31);
|
||||
|
||||
update_vlc_state(state, v);
|
||||
|
||||
return set_sr_golomb(code, k, 12, bits);
|
||||
}
|
190
libavcodec/vulkan/rangecoder.comp
Normal file
190
libavcodec/vulkan/rangecoder.comp
Normal file
@ -0,0 +1,190 @@
|
||||
/*
|
||||
* FFv1 codec
|
||||
*
|
||||
* Copyright (c) 2024 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
struct RangeCoder {
|
||||
u8buf bytestream_start;
|
||||
u8buf bytestream;
|
||||
|
||||
uint low;
|
||||
uint16_t range;
|
||||
uint8_t outstanding_count;
|
||||
uint8_t outstanding_byte;
|
||||
};
|
||||
|
||||
/* Full renorm version that can handle outstanding_byte == 0xFF */
|
||||
void renorm_encoder_full(inout RangeCoder c)
|
||||
{
|
||||
int bs_cnt = 0;
|
||||
|
||||
if (c.outstanding_byte == 0xFF) {
|
||||
c.outstanding_byte = uint8_t(c.low >> 8);
|
||||
} else if (c.low <= 0xFF00) {
|
||||
c.bytestream[bs_cnt++].v = c.outstanding_byte;
|
||||
uint8_t cnt = c.outstanding_count;
|
||||
for (; cnt > 0; cnt--)
|
||||
c.bytestream[bs_cnt++].v = uint8_t(0xFF);
|
||||
c.outstanding_count = uint8_t(0);
|
||||
c.outstanding_byte = uint8_t(c.low >> 8);
|
||||
} else if (c.low >= 0x10000) {
|
||||
c.bytestream[bs_cnt++].v = c.outstanding_byte + uint8_t(1);
|
||||
uint8_t cnt = c.outstanding_count;
|
||||
for (; cnt > 0; cnt--)
|
||||
c.bytestream[bs_cnt++].v = uint8_t(0x00);
|
||||
c.outstanding_count = uint8_t(0);
|
||||
c.outstanding_byte = uint8_t(bitfieldExtract(c.low, 8, 8));
|
||||
} else {
|
||||
c.outstanding_count++;
|
||||
}
|
||||
|
||||
c.bytestream = OFFBUF(u8buf, c.bytestream, bs_cnt);
|
||||
c.range <<= 8;
|
||||
c.low = bitfieldInsert(0, c.low, 8, 8);
|
||||
}
|
||||
|
||||
/* Cannot deal with outstanding_byte == -1 in the name of speed */
|
||||
void renorm_encoder(inout RangeCoder c)
|
||||
{
|
||||
uint8_t oc = c.outstanding_count + uint8_t(1);
|
||||
uint low = c.low;
|
||||
|
||||
c.range <<= 8;
|
||||
c.low = bitfieldInsert(0, low, 8, 8);
|
||||
|
||||
if (low > 0xFF00 && low < 0x10000) {
|
||||
c.outstanding_count = oc;
|
||||
return;
|
||||
}
|
||||
|
||||
u8buf bs = c.bytestream;
|
||||
uint8_t outstanding_byte = c.outstanding_byte;
|
||||
|
||||
c.bytestream = OFFBUF(u8buf, bs, oc);
|
||||
c.outstanding_count = uint8_t(0);
|
||||
c.outstanding_byte = uint8_t(low >> 8);
|
||||
|
||||
uint8_t obs = uint8_t(low > 0xFF00);
|
||||
uint8_t fill = obs - uint8_t(1); /* unsigned underflow */
|
||||
|
||||
bs[0].v = outstanding_byte + obs;
|
||||
for (int i = 1; i < oc; i++)
|
||||
bs[i].v = fill;
|
||||
}
|
||||
|
||||
void put_rac_norenorm(inout RangeCoder c, uint64_t state, bool bit)
|
||||
{
|
||||
u8buf sb = u8buf(state);
|
||||
uint val = uint(sb.v);
|
||||
uint16_t range1 = uint16_t((uint(c.range) * val) >> 8);
|
||||
|
||||
#ifdef DEBUG
|
||||
if (val == 0)
|
||||
debugPrintfEXT("Error: state is zero (addr: 0x%lx)", uint64_t(sb));
|
||||
if (range1 >= c.range)
|
||||
debugPrintfEXT("Error: range1 >= c.range");
|
||||
if (range1 <= 0)
|
||||
debugPrintfEXT("Error: range1 <= 0");
|
||||
#endif
|
||||
|
||||
uint16_t diff = c.range - range1;
|
||||
if (bit) {
|
||||
c.low += diff;
|
||||
c.range = range1;
|
||||
} else {
|
||||
c.range = diff;
|
||||
}
|
||||
|
||||
sb.v = zero_one_state[(uint(bit) << 8) + val];
|
||||
|
||||
#ifdef DEBUG
|
||||
if (sb.v == 0)
|
||||
debugPrintfEXT("Error: inserted zero state from tab %i idx %i", bit, val);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Equiprobable bit */
|
||||
void put_rac_equi(inout RangeCoder c, bool bit)
|
||||
{
|
||||
uint16_t range1 = c.range >> 1;
|
||||
|
||||
#ifdef DEBUG
|
||||
if (range1 >= c.range)
|
||||
debugPrintfEXT("Error: range1 >= c.range");
|
||||
if (range1 <= 0)
|
||||
debugPrintfEXT("Error: range1 <= 0");
|
||||
#endif
|
||||
|
||||
if (bit) {
|
||||
c.low += c.range - range1;
|
||||
c.range = range1;
|
||||
} else {
|
||||
c.range -= range1;
|
||||
}
|
||||
|
||||
if (c.range < 0x100)
|
||||
renorm_encoder(c);
|
||||
}
|
||||
|
||||
void put_rac_terminate(inout RangeCoder c)
|
||||
{
|
||||
uint16_t range1 = uint16_t((uint(c.range) * 129) >> 8);
|
||||
|
||||
#ifdef DEBUG
|
||||
if (range1 >= c.range)
|
||||
debugPrintfEXT("Error: range1 >= c.range");
|
||||
if (range1 <= 0)
|
||||
debugPrintfEXT("Error: range1 <= 0");
|
||||
#endif
|
||||
|
||||
c.range -= range1;
|
||||
if (c.range < 0x100)
|
||||
renorm_encoder(c);
|
||||
}
|
||||
|
||||
/* Return the number of bytes written. */
|
||||
uint32_t rac_terminate(inout RangeCoder c)
|
||||
{
|
||||
put_rac_terminate(c);
|
||||
c.range = uint16_t(0xFF);
|
||||
c.low += 0xFF;
|
||||
renorm_encoder(c);
|
||||
c.range = uint16_t(0xFF);
|
||||
renorm_encoder(c);
|
||||
|
||||
#ifdef DEBUG
|
||||
if (c.low != 0)
|
||||
debugPrintfEXT("Error: c.low != 0");
|
||||
if (c.range < 0x100)
|
||||
debugPrintfEXT("Error: range < 0x100");
|
||||
#endif
|
||||
|
||||
return uint32_t(uint64_t(c.bytestream) - uint64_t(c.bytestream_start));
|
||||
}
|
||||
|
||||
void rac_init(out RangeCoder r, u8buf data, uint64_t buf_size)
|
||||
{
|
||||
r.bytestream_start = data;
|
||||
r.bytestream = data;
|
||||
r.low = 0;
|
||||
r.range = uint16_t(0xFF00);
|
||||
r.outstanding_count = uint8_t(0);
|
||||
r.outstanding_byte = uint8_t(0xFF);
|
||||
}
|
Loading…
Reference in New Issue
Block a user