1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-08 13:22:53 +02:00
FFmpeg/libavcodec/vulkan/ffv1_enc_common.comp
Lynne eb536d97a0
ffv1enc_vulkan: support buffers larger than 4GiB
Unlike the software FFv1 encoder, none of our buffers are allocated by
FFmpeg, which supports at most 4GiB large allocations.

For really large sizes, the maximum size of the buffer can exceed 4GiB,
which the software encoder optimistically tries to allocate as 4GiB
in the hopes that the encoder will compress to under that amount.

We can just let Vulkan allocate us a larger buffer, and switch to
64-bit offsets.
2024-11-20 05:23:05 +01:00

102 lines
3.2 KiB
Plaintext

/*
* FFv1 codec
*
* Copyright (c) 2024 Lynne <dev@lynne.ee>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
ivec2 get_diff(ivec2 pos, ivec2 off, int p, int comp, int sw, int bits)
{
const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);
const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
TYPE top2 = TYPE(0);
if (off.y > 1)
top2 = TYPE(imageLoad(src[p], pos + ivec2(0, -2))[comp]);
VTYPE3 top = VTYPE3(TYPE(0),
TYPE(0),
TYPE(0));
if (off.y > 0 && off != ivec2(0, 1))
top[0] = TYPE(imageLoad(src[p], pos + ivec2(-1, -1) + yoff_border1)[comp]);
if (off.y > 0) {
top[1] = TYPE(imageLoad(src[p], pos + ivec2(0, -1))[comp]);
top[2] = TYPE(imageLoad(src[p], pos + ivec2(min(1, sw - off.x - 1), -1))[comp]);
}
VTYPE3 cur = VTYPE3(TYPE(0),
TYPE(0),
imageLoad(src[p], pos)[comp]);
if (off.x > 0 && off != ivec2(1, 0))
cur[0] = TYPE(imageLoad(src[p], pos + ivec2(-2, 0) + yoff_border2)[comp]);
if (off != ivec2(0, 0))
cur[1] = TYPE(imageLoad(src[p], pos + ivec2(-1, 0) + yoff_border1)[comp]);
/* context, diff */
ivec2 d = ivec2(get_context(VTYPE2(cur), top, top2, context_model),
cur[2] - predict(cur[1], VTYPE2(top)));
if (d[0] < 0)
d = -d;
d[1] = fold(d[1], bits);
return d;
}
void finalize_slice(inout SliceContext sc, const uint slice_idx)
{
#ifdef GOLOMB
uint32_t enc_len = sc.hdr_len + flush_put_bits(sc.pb);
#else
uint32_t enc_len = rac_terminate(sc.c);
#endif
u8buf bs = u8buf(sc.c.bytestream_start);
/* Append slice length */
u8vec4 enc_len_p = unpack8(enc_len);
bs[enc_len + 0].v = enc_len_p.z;
bs[enc_len + 1].v = enc_len_p.y;
bs[enc_len + 2].v = enc_len_p.x;
enc_len += 3;
/* Calculate and write CRC */
if (ec != 0) {
bs[enc_len].v = uint8_t(0);
enc_len++;
uint32_t crc = crcref;
for (int i = 0; i < enc_len; i++)
crc = crc_ieee[(crc & 0xFF) ^ uint32_t(bs[i].v)] ^ (crc >> 8);
if (crcref != 0x00000000)
crc ^= 0x8CD88196;
u8vec4 crc_p = unpack8(crc);
bs[enc_len + 0].v = crc_p.x;
bs[enc_len + 1].v = crc_p.y;
bs[enc_len + 2].v = crc_p.z;
bs[enc_len + 3].v = crc_p.w;
enc_len += 4;
}
slice_results[slice_idx*2 + 0] = enc_len;
slice_results[slice_idx*2 + 1] = uint64_t(bs) - uint64_t(out_data);
}