1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-08-04 22:03:09 +02:00

avcodec/ffv1: 32-bit float sample support

Sponsored-by: Sovereign Tech Fund
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
This commit is contained in:
Michael Niedermayer
2025-03-19 01:30:47 +01:00
parent 62c7d08947
commit 171060d5dc
5 changed files with 262 additions and 20 deletions

View File

@ -106,7 +106,11 @@ typedef struct FFV1SliceContext {
uint64_t (*rc_stat2[MAX_QUANT_TABLES])[32][2]; uint64_t (*rc_stat2[MAX_QUANT_TABLES])[32][2];
}; };
}; };
uint16_t fltmap[4][65536]; union {
uint16_t bitmap [4][65536]; //float encode
uint16_t fltmap [4][65536]; //halffloat encode & decode
uint32_t fltmap32[4][65536]; //float decode
};
} FFV1SliceContext; } FFV1SliceContext;
typedef struct FFV1Context { typedef struct FFV1Context {

View File

@ -419,6 +419,16 @@ int ff_ffv1_parse_header(FFV1Context *f, RangeCoder *c, uint8_t *state)
} else } else
f->pix_fmt = AV_PIX_FMT_GBRAP16; f->pix_fmt = AV_PIX_FMT_GBRAP16;
f->use32bit = 1; f->use32bit = 1;
} else if (f->avctx->bits_per_raw_sample == 32 && !f->transparency) {
if (f->flt) {
f->pix_fmt = AV_PIX_FMT_GBRPF32;
}
f->use32bit = 1;
} else if (f->avctx->bits_per_raw_sample == 32 && f->transparency) {
if (f->flt) {
f->pix_fmt = AV_PIX_FMT_GBRAPF32;
}
f->use32bit = 1;
} }
} else { } else {
av_log(f->avctx, AV_LOG_ERROR, "colorspace not supported\n"); av_log(f->avctx, AV_LOG_ERROR, "colorspace not supported\n");

View File

@ -249,6 +249,16 @@ static int decode_slice_header(const FFV1Context *f,
} }
} }
} }
if (f->avctx->bits_per_raw_sample == 32) {
if (!sc->remap) {
av_log(f->avctx, AV_LOG_ERROR, "unsupported remap\n");
return AVERROR_INVALIDDATA;
}
if (sc->slice_width * sc->slice_height > 65536) {
av_log(f->avctx, AV_LOG_ERROR, "32bit needs remap\n");
return AVERROR_INVALIDDATA;
}
}
return 0; return 0;
} }
@ -265,28 +275,38 @@ static void slice_set_damaged(FFV1Context *f, FFV1SliceContext *sc)
static int decode_remap(FFV1Context *f, FFV1SliceContext *sc) static int decode_remap(FFV1Context *f, FFV1SliceContext *sc)
{ {
int flip = sc->remap == 2 ? 0x7FFF : 0; unsigned int end = f->avctx->bits_per_raw_sample == 32 ? 0xFFFFFFFF : 0xFFFF;
int flip = sc->remap == 2 ? (end>>1) : 0;
int sign = (end>>1)+1;
for (int p= 0; p < 1 + 2*f->chroma_planes + f->transparency; p++) { for (int p= 0; p < 1 + 2*f->chroma_planes + f->transparency; p++) {
int j = 0; int j = 0;
int lu = 0; int lu = 0;
uint8_t state[2][32]; uint8_t state[2][32];
int64_t i;
memset(state, 128, sizeof(state)); memset(state, 128, sizeof(state));
for (i=0; i <= end ; i++) {
for (int i= 0; i<65536; i++) { unsigned run = get_symbol_inline(&sc->c, state[lu], 0);
int run = get_symbol_inline(&sc->c, state[lu], 0); if (run > end - i + 1)
if (run > 65536U - i)
return AVERROR_INVALIDDATA; return AVERROR_INVALIDDATA;
if (lu) { if (lu) {
lu ^= !run; lu ^= !run;
while (run--) { while (run--) {
if (end == 0xFFFF) {
sc->fltmap [p][j++] = i ^ ((i& 0x8000) ? 0 : flip); sc->fltmap [p][j++] = i ^ ((i& 0x8000) ? 0 : flip);
} else
sc->fltmap32[p][j++] = i ^ ((i&0x80000000) ? 0 : flip);
i++; i++;
} }
} else { } else {
i += run; i += run;
if (i != 65536) if (i <= end) {
if (end == 0xFFFF) {
sc->fltmap [p][j++] = i ^ ((i& 0x8000) ? 0 : flip); sc->fltmap [p][j++] = i ^ ((i& 0x8000) ? 0 : flip);
} else {
sc->fltmap32[p][j++] = i ^ ((i&0x80000000) ? 0 : flip);
}
}
lu ^= !run; lu ^= !run;
} }
} }

View File

@ -138,7 +138,7 @@ static int RENAME(decode_rgb_frame)(FFV1Context *f, FFV1SliceContext *sc,
int x, y, p; int x, y, p;
TYPE *sample[4][2]; TYPE *sample[4][2];
int lbd = f->avctx->bits_per_raw_sample <= 8; int lbd = f->avctx->bits_per_raw_sample <= 8;
int bits = f->avctx->bits_per_raw_sample > 0 ? f->avctx->bits_per_raw_sample : 8; int bits = f->avctx->bits_per_raw_sample > 0 ? FFMIN(f->avctx->bits_per_raw_sample, 16) : 8;
int offset = 1 << bits; int offset = 1 << bits;
int transparency = f->transparency; int transparency = f->transparency;
int ac = f->ac; int ac = f->ac;
@ -186,16 +186,30 @@ static int RENAME(decode_rgb_frame)(FFV1Context *f, FFV1SliceContext *sc,
r += g; r += g;
} }
if (sc->remap) { if (sc->remap) {
if (f->avctx->bits_per_raw_sample == 32) {
g = sc->fltmap32[0][g & 0xFFFF];
b = sc->fltmap32[1][b & 0xFFFF];
r = sc->fltmap32[2][r & 0xFFFF];
if (transparency)
a = sc->fltmap32[3][a & 0xFFFF];
} else {
g = sc->fltmap[0][g & 0xFFFF]; g = sc->fltmap[0][g & 0xFFFF];
b = sc->fltmap[1][b & 0xFFFF]; b = sc->fltmap[1][b & 0xFFFF];
r = sc->fltmap[2][r & 0xFFFF]; r = sc->fltmap[2][r & 0xFFFF];
if (transparency) if (transparency)
a = sc->fltmap[3][a & 0xFFFF]; a = sc->fltmap[3][a & 0xFFFF];
} }
}
if (lbd) if (lbd) {
*((uint32_t*)(src[0] + x*4 + stride[0]*y)) = b + ((unsigned)g<<8) + ((unsigned)r<<16) + ((unsigned)a<<24); *((uint32_t*)(src[0] + x*4 + stride[0]*y)) = b + ((unsigned)g<<8) + ((unsigned)r<<16) + ((unsigned)a<<24);
else if (sizeof(TYPE) == 4 || transparency) { } else if (f->avctx->bits_per_raw_sample == 32) {
*((uint32_t*)(src[0] + x*4 + stride[0]*y)) = g;
*((uint32_t*)(src[1] + x*4 + stride[1]*y)) = b;
*((uint32_t*)(src[2] + x*4 + stride[2]*y)) = r;
if (transparency)
*((uint32_t*)(src[3] + x*4 + stride[3]*y)) = a;
} else if (sizeof(TYPE) == 4 || transparency) {
*((uint16_t*)(src[0] + x*2 + stride[0]*y)) = g; *((uint16_t*)(src[0] + x*2 + stride[0]*y)) = g;
*((uint16_t*)(src[1] + x*2 + stride[1]*y)) = b; *((uint16_t*)(src[1] + x*2 + stride[1]*y)) = b;
*((uint16_t*)(src[2] + x*2 + stride[2]*y)) = r; *((uint16_t*)(src[2] + x*2 + stride[2]*y)) = r;

View File

@ -31,6 +31,7 @@
#include "libavutil/mem.h" #include "libavutil/mem.h"
#include "libavutil/opt.h" #include "libavutil/opt.h"
#include "libavutil/pixdesc.h" #include "libavutil/pixdesc.h"
#include "libavutil/qsort.h"
#include "avcodec.h" #include "avcodec.h"
#include "encode.h" #include "encode.h"
@ -576,6 +577,9 @@ int ff_ffv1_encode_determine_slices(AVCodecContext *avctx)
continue; continue;
if (maxw * maxh * (int64_t)(s->bits_per_raw_sample+1) * plane_count > 8<<24) if (maxw * maxh * (int64_t)(s->bits_per_raw_sample+1) * plane_count > 8<<24)
continue; continue;
if (s->bits_per_raw_sample == 32)
if (maxw * maxh > 65536)
continue;
if (s->version < 4) if (s->version < 4)
if ( ff_need_new_slices(avctx->width , s->num_h_slices, s->chroma_h_shift) if ( ff_need_new_slices(avctx->width , s->num_h_slices, s->chroma_h_shift)
||ff_need_new_slices(avctx->height, s->num_v_slices, s->chroma_v_shift)) ||ff_need_new_slices(avctx->height, s->num_v_slices, s->chroma_v_shift))
@ -917,6 +921,10 @@ av_cold int ff_ffv1_encode_setup_plane_info(AVCodecContext *avctx,
case AV_PIX_FMT_GBRAPF16: case AV_PIX_FMT_GBRAPF16:
if (!avctx->bits_per_raw_sample && !s->bits_per_raw_sample) if (!avctx->bits_per_raw_sample && !s->bits_per_raw_sample)
s->bits_per_raw_sample = 16; s->bits_per_raw_sample = 16;
case AV_PIX_FMT_GBRPF32:
case AV_PIX_FMT_GBRAPF32:
if (!avctx->bits_per_raw_sample && !s->bits_per_raw_sample)
s->bits_per_raw_sample = 32;
else if (!s->bits_per_raw_sample) else if (!s->bits_per_raw_sample)
s->bits_per_raw_sample = avctx->bits_per_raw_sample; s->bits_per_raw_sample = avctx->bits_per_raw_sample;
s->transparency = !!(desc->flags & AV_PIX_FMT_FLAG_ALPHA); s->transparency = !!(desc->flags & AV_PIX_FMT_FLAG_ALPHA);
@ -939,6 +947,10 @@ av_cold int ff_ffv1_encode_setup_plane_info(AVCodecContext *avctx,
if (s->remap_mode < 0) if (s->remap_mode < 0)
s->remap_mode = s->flt ? 2 : 0; s->remap_mode = s->flt ? 2 : 0;
if (s->remap_mode == 0 && s->bits_per_raw_sample == 32) {
av_log(avctx, AV_LOG_ERROR, "32bit requires remap\n");
return AVERROR(EINVAL);
}
return av_pix_fmt_get_chroma_sub_sample(pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift); return av_pix_fmt_get_chroma_sub_sample(pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
} }
@ -1149,7 +1161,7 @@ static void choose_rct_params(const FFV1Context *f, FFV1SliceContext *sc,
sc->slice_rct_ry_coef = rct_y_coeff[best][0]; sc->slice_rct_ry_coef = rct_y_coeff[best][0];
} }
static void encode_remap(FFV1Context *f, FFV1SliceContext *sc) static void encode_histogram_remap(FFV1Context *f, FFV1SliceContext *sc)
{ {
int flip = sc->remap == 2 ? 0x7FFF : 0; int flip = sc->remap == 2 ? 0x7FFF : 0;
@ -1179,6 +1191,177 @@ static void encode_remap(FFV1Context *f, FFV1SliceContext *sc)
} }
} }
typedef struct Unit {
uint32_t val; //this is unneeded if you accept a dereference on each access
uint16_t ndx;
} Unit;
static void load_rgb_float32_frame(FFV1Context *f, FFV1SliceContext *sc,
const uint8_t *src[4],
int w, int h, const int stride[4],
Unit unit[4][65536])
{
int x, y;
int transparency = f->transparency;
int i = 0;
for (y = 0; y < h; y++) {
for (x = 0; x < w; x++) {
int b, g, r, av_uninit(a);
g = *((const uint32_t *)(src[0] + x*4 + stride[0]*y));
b = *((const uint32_t *)(src[1] + x*4 + stride[1]*y));
r = *((const uint32_t *)(src[2] + x*4 + stride[2]*y));
if (transparency)
a = *((const uint32_t *)(src[3] + x*4 + stride[3]*y));
if (sc->remap == 2) {
#define FLIP(f) (((f)&0x80000000) ? (f) : (f)^0x7FFFFFFF);
g = FLIP(g);
b = FLIP(b);
r = FLIP(r);
}
// We cannot build a histogram as we do for 16bit, we need a bit of magic here
// Its possible to reduce the memory needed at the cost of more dereferencing
unit[0][i].val = g;
unit[0][i].ndx = x + y*w;
unit[1][i].val = b;
unit[1][i].ndx = x + y*w;
unit[2][i].val = r;
unit[2][i].ndx = x + y*w;
if (transparency) {
unit[3][i].val = a;
unit[3][i].ndx = x + y*w;
}
i++;
}
}
//TODO switch to radix sort
#define CMP(A,B) ((A)->val - (int64_t)(B)->val)
AV_QSORT(unit[0], i, Unit, CMP);
AV_QSORT(unit[1], i, Unit, CMP);
AV_QSORT(unit[2], i, Unit, CMP);
if (transparency)
AV_QSORT(unit[3], i, Unit, CMP);
}
static void encode_float32_remap(FFV1Context *f, FFV1SliceContext *sc,
const uint8_t *src[4], Unit unit[4][65536])
{
int pixel_num = sc->slice_width * sc->slice_height;
av_assert0 (pixel_num <= 65536);
for (int p= 0; p < 1 + 2*f->chroma_planes + f->transparency; p++) {
int lu = 0;
uint8_t state[2][32];
int run = 0;
int64_t last_val = -1;
int compact_index = -1;
memset(state, 128, sizeof(state));
for (int i= 0; i<pixel_num+1; i++) {
int64_t val;
if (i == pixel_num) {
if (last_val == 0xFFFFFFFF) {
break;
} else {
val = 1LL<<32;
}
} else
val = unit[p][i].val;
if (last_val != val) {
av_assert2(last_val < val);
if (lu) {
if (val - last_val == 1) {
run ++;
last_val = val;
} else {
put_symbol_inline(&sc->c, state[lu], run, 0, NULL, NULL);
if (run == 0)
lu ^= 1;
run = 0;
i--; // we did not encode val so we need to backstep
last_val ++;
continue;
}
} else {
av_assert2(run == 0);
put_symbol_inline(&sc->c, state[lu], val - last_val - 1, 0, NULL, NULL);
if (val - last_val == 1)
lu ^= 1;
last_val = val;
}
compact_index ++;
}
if (i < pixel_num)
sc->bitmap[p][unit[p][i].ndx] = compact_index;
}
}
}
static int encode_float32_rgb_frame(FFV1Context *f, FFV1SliceContext *sc,
const uint8_t *src[4],
int w, int h, const int stride[4], int ac)
{
int x, y, p, i;
const int ring_size = f->context_model ? 3 : 2;
int32_t *sample[4][3];
const int pass1 = !!(f->avctx->flags & AV_CODEC_FLAG_PASS1);
int bits = 16; //TODO explain this in the specifciation, we have 32bits in but really encode max 16
int offset = 1 << bits;
int transparency = f->transparency;
sc->run_index = 0;
memset(RENAME(sc->sample_buffer), 0, ring_size * MAX_PLANES *
(w + 6) * sizeof(*RENAME(sc->sample_buffer)));
for (y = 0; y < h; y++) {
for (i = 0; i < ring_size; i++)
for (p = 0; p < MAX_PLANES; p++)
sample[p][i]= RENAME(sc->sample_buffer) + p*ring_size*(w+6) + ((h+i-y)%ring_size)*(w+6) + 3;
for (x = 0; x < w; x++) {
int b, g, r, av_uninit(a);
g = sc->bitmap[0][x + w*y];
b = sc->bitmap[1][x + w*y];
r = sc->bitmap[2][x + w*y];
if (transparency)
a = sc->bitmap[3][x + w*y];
if (sc->slice_coding_mode != 1) {
b -= g;
r -= g;
g += (b * sc->slice_rct_by_coef + r * sc->slice_rct_ry_coef) >> 2;
b += offset;
r += offset;
}
sample[0][0][x] = g;
sample[1][0][x] = b;
sample[2][0][x] = r;
sample[3][0][x] = a;
}
for (p = 0; p < 3 + transparency; p++) {
int ret;
sample[p][0][-1] = sample[p][1][0 ];
sample[p][1][ w] = sample[p][1][w-1];
ret = encode_line32(f, sc, f->avctx, w, sample[p], (p + 1) / 2,
bits + (sc->slice_coding_mode != 1), ac, pass1);
if (ret < 0)
return ret;
}
}
return 0;
}
static int encode_slice(AVCodecContext *c, void *arg) static int encode_slice(AVCodecContext *c, void *arg)
{ {
FFV1SliceContext *sc = arg; FFV1SliceContext *sc = arg;
@ -1215,6 +1398,10 @@ retry:
} }
if (sc->remap) { if (sc->remap) {
//Both the 16bit and 32bit remap do exactly the same thing but with 16bits we can
//Implement this using a "histogram" while for 32bit that would be gb sized, thus a more
//complex implementation sorting pairs is used.
if (f->bits_per_raw_sample != 32) {
if (f->colorspace == 0 && c->pix_fmt != AV_PIX_FMT_YA8 && c->pix_fmt != AV_PIX_FMT_YAF16) { if (f->colorspace == 0 && c->pix_fmt != AV_PIX_FMT_YA8 && c->pix_fmt != AV_PIX_FMT_YAF16) {
const int cx = x >> f->chroma_h_shift; const int cx = x >> f->chroma_h_shift;
const int cy = y >> f->chroma_v_shift; const int cy = y >> f->chroma_v_shift;
@ -1238,7 +1425,12 @@ retry:
} else } else
load_rgb_frame (f, sc, planes, width, height, p->linesize); load_rgb_frame (f, sc, planes, width, height, p->linesize);
encode_remap(f, sc); encode_histogram_remap(f, sc);
} else {
Unit pairs[4][65536];
load_rgb_float32_frame(f, sc, planes, width, height, p->linesize, pairs);
encode_float32_remap(f, sc, planes, pairs);
}
} }
if (ac == AC_GOLOMB_RICE) { if (ac == AC_GOLOMB_RICE) {
@ -1263,6 +1455,8 @@ retry:
} else if (c->pix_fmt == AV_PIX_FMT_YA8 || c->pix_fmt == AV_PIX_FMT_YAF16) { } else if (c->pix_fmt == AV_PIX_FMT_YA8 || c->pix_fmt == AV_PIX_FMT_YAF16) {
ret = encode_plane(f, sc, p->data[0] + ps*x + y*p->linesize[0], width, height, p->linesize[0], 0, 0, 2, ac); ret = encode_plane(f, sc, p->data[0] + ps*x + y*p->linesize[0], width, height, p->linesize[0], 0, 0, 2, ac);
ret |= encode_plane(f, sc, p->data[0] + (ps>>1) + ps*x + y*p->linesize[0], width, height, p->linesize[0], 1, 1, 2, ac); ret |= encode_plane(f, sc, p->data[0] + (ps>>1) + ps*x + y*p->linesize[0], width, height, p->linesize[0], 1, 1, 2, ac);
} else if (f->bits_per_raw_sample == 32) {
ret = encode_float32_rgb_frame(f, sc, planes, width, height, p->linesize, ac);
} else if (f->use32bit) { } else if (f->use32bit) {
ret = encode_rgb_frame32(f, sc, planes, width, height, p->linesize, ac); ret = encode_rgb_frame32(f, sc, planes, width, height, p->linesize, ac);
} else { } else {
@ -1541,7 +1735,7 @@ const FFCodec ff_ffv1_encoder = {
AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV440P12,
AV_PIX_FMT_YAF16, AV_PIX_FMT_YAF16,
AV_PIX_FMT_GRAYF16, AV_PIX_FMT_GRAYF16,
AV_PIX_FMT_GBRPF16), AV_PIX_FMT_GBRPF16, AV_PIX_FMT_GBRPF32),
.color_ranges = AVCOL_RANGE_MPEG, .color_ranges = AVCOL_RANGE_MPEG,
.p.priv_class = &ffv1_class, .p.priv_class = &ffv1_class,
.caps_internal = FF_CODEC_CAP_INIT_CLEANUP | FF_CODEC_CAP_EOF_FLUSH, .caps_internal = FF_CODEC_CAP_INIT_CLEANUP | FF_CODEC_CAP_EOF_FLUSH,