You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-04 22:03:09 +02:00
avcodec/ffv1enc: reduce stack usage
Fixes failures in some systems since 171060d5dc
.
This can be further improved by only allocating the arrays when needed.
Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
@ -111,6 +111,10 @@ typedef struct FFV1SliceContext {
|
|||||||
uint16_t fltmap [4][65536]; //halffloat encode & decode
|
uint16_t fltmap [4][65536]; //halffloat encode & decode
|
||||||
uint32_t fltmap32[4][65536]; //float decode
|
uint32_t fltmap32[4][65536]; //float decode
|
||||||
};
|
};
|
||||||
|
struct Unit {
|
||||||
|
uint32_t val; //this is unneeded if you accept a dereference on each access
|
||||||
|
uint16_t ndx;
|
||||||
|
} unit[4][65536];
|
||||||
} FFV1SliceContext;
|
} FFV1SliceContext;
|
||||||
|
|
||||||
typedef struct FFV1Context {
|
typedef struct FFV1Context {
|
||||||
|
@ -1194,15 +1194,9 @@ static void encode_histogram_remap(FFV1Context *f, FFV1SliceContext *sc)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct Unit {
|
|
||||||
uint32_t val; //this is unneeded if you accept a dereference on each access
|
|
||||||
uint16_t ndx;
|
|
||||||
} Unit;
|
|
||||||
|
|
||||||
static void load_rgb_float32_frame(FFV1Context *f, FFV1SliceContext *sc,
|
static void load_rgb_float32_frame(FFV1Context *f, FFV1SliceContext *sc,
|
||||||
const uint8_t *src[4],
|
const uint8_t *src[4],
|
||||||
int w, int h, const int stride[4],
|
int w, int h, const int stride[4])
|
||||||
Unit unit[4][65536])
|
|
||||||
{
|
{
|
||||||
int x, y;
|
int x, y;
|
||||||
int transparency = f->transparency;
|
int transparency = f->transparency;
|
||||||
@ -1226,18 +1220,18 @@ static void load_rgb_float32_frame(FFV1Context *f, FFV1SliceContext *sc,
|
|||||||
}
|
}
|
||||||
// We cannot build a histogram as we do for 16bit, we need a bit of magic here
|
// We cannot build a histogram as we do for 16bit, we need a bit of magic here
|
||||||
// Its possible to reduce the memory needed at the cost of more dereferencing
|
// Its possible to reduce the memory needed at the cost of more dereferencing
|
||||||
unit[0][i].val = g;
|
sc->unit[0][i].val = g;
|
||||||
unit[0][i].ndx = x + y*w;
|
sc->unit[0][i].ndx = x + y*w;
|
||||||
|
|
||||||
unit[1][i].val = b;
|
sc->unit[1][i].val = b;
|
||||||
unit[1][i].ndx = x + y*w;
|
sc->unit[1][i].ndx = x + y*w;
|
||||||
|
|
||||||
unit[2][i].val = r;
|
sc->unit[2][i].val = r;
|
||||||
unit[2][i].ndx = x + y*w;
|
sc->unit[2][i].ndx = x + y*w;
|
||||||
|
|
||||||
if (transparency) {
|
if (transparency) {
|
||||||
unit[3][i].val = a;
|
sc->unit[3][i].val = a;
|
||||||
unit[3][i].ndx = x + y*w;
|
sc->unit[3][i].ndx = x + y*w;
|
||||||
}
|
}
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
@ -1245,11 +1239,11 @@ static void load_rgb_float32_frame(FFV1Context *f, FFV1SliceContext *sc,
|
|||||||
|
|
||||||
//TODO switch to radix sort
|
//TODO switch to radix sort
|
||||||
#define CMP(A,B) ((A)->val - (int64_t)(B)->val)
|
#define CMP(A,B) ((A)->val - (int64_t)(B)->val)
|
||||||
AV_QSORT(unit[0], i, Unit, CMP);
|
AV_QSORT(sc->unit[0], i, struct Unit, CMP);
|
||||||
AV_QSORT(unit[1], i, Unit, CMP);
|
AV_QSORT(sc->unit[1], i, struct Unit, CMP);
|
||||||
AV_QSORT(unit[2], i, Unit, CMP);
|
AV_QSORT(sc->unit[2], i, struct Unit, CMP);
|
||||||
if (transparency)
|
if (transparency)
|
||||||
AV_QSORT(unit[3], i, Unit, CMP);
|
AV_QSORT(sc->unit[3], i, struct Unit, CMP);
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct RemapEncoderState {
|
typedef struct RemapEncoderState {
|
||||||
@ -1296,7 +1290,7 @@ static inline void encode_mul(RemapEncoderState *s, int mul_index)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int encode_float32_remap_segment(FFV1SliceContext *sc, Unit unit[4][65536],
|
static int encode_float32_remap_segment(FFV1SliceContext *sc,
|
||||||
RemapEncoderState *state_arg, int update, int final)
|
RemapEncoderState *state_arg, int update, int final)
|
||||||
{
|
{
|
||||||
RemapEncoderState s;
|
RemapEncoderState s;
|
||||||
@ -1324,7 +1318,7 @@ static int encode_float32_remap_segment(FFV1SliceContext *sc, Unit unit[4][65536
|
|||||||
val = 1LL<<32;
|
val = 1LL<<32;
|
||||||
}
|
}
|
||||||
} else
|
} else
|
||||||
val = unit[s.p][s.i].val;
|
val = sc->unit[s.p][s.i].val;
|
||||||
|
|
||||||
if (s.last_val != val) {
|
if (s.last_val != val) {
|
||||||
int64_t delta = 0;
|
int64_t delta = 0;
|
||||||
@ -1376,7 +1370,7 @@ static int encode_float32_remap_segment(FFV1SliceContext *sc, Unit unit[4][65536
|
|||||||
|
|
||||||
av_assert2(s.i == s.pixel_num || s.last_val + (val - s.last_val) * current_mul + delta == unit[s.p][s.i].val);
|
av_assert2(s.i == s.pixel_num || s.last_val + (val - s.last_val) * current_mul + delta == unit[s.p][s.i].val);
|
||||||
if (s.i < s.pixel_num)
|
if (s.i < s.pixel_num)
|
||||||
s.last_val = unit[s.p][s.i].val;
|
s.last_val = sc->unit[s.p][s.i].val;
|
||||||
}
|
}
|
||||||
s.current_mul_index = ((s.last_val + 1) * s.mul_count) >> 32;
|
s.current_mul_index = ((s.last_val + 1) * s.mul_count) >> 32;
|
||||||
if (!s.run)
|
if (!s.run)
|
||||||
@ -1384,7 +1378,7 @@ static int encode_float32_remap_segment(FFV1SliceContext *sc, Unit unit[4][65536
|
|||||||
s.compact_index ++;
|
s.compact_index ++;
|
||||||
}
|
}
|
||||||
if (final && s.i < s.pixel_num)
|
if (final && s.i < s.pixel_num)
|
||||||
sc->bitmap[s.p][unit[s.p][s.i].ndx] = s.compact_index;
|
sc->bitmap[s.p][sc->unit[s.p][s.i].ndx] = s.compact_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (update) {
|
if (update) {
|
||||||
@ -1394,7 +1388,7 @@ static int encode_float32_remap_segment(FFV1SliceContext *sc, Unit unit[4][65536
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void encode_float32_remap(FFV1Context *f, FFV1SliceContext *sc,
|
static void encode_float32_remap(FFV1Context *f, FFV1SliceContext *sc,
|
||||||
const uint8_t *src[4], Unit unit[4][65536])
|
const uint8_t *src[4])
|
||||||
{
|
{
|
||||||
RemapEncoderState s;
|
RemapEncoderState s;
|
||||||
s.pixel_num = sc->slice_width * sc->slice_height;
|
s.pixel_num = sc->slice_width * sc->slice_height;
|
||||||
@ -1411,7 +1405,7 @@ static void encode_float32_remap(FFV1Context *f, FFV1SliceContext *sc,
|
|||||||
s.mul_count = 1;
|
s.mul_count = 1;
|
||||||
|
|
||||||
for (int i= 0; i<s.pixel_num; i++) {
|
for (int i= 0; i<s.pixel_num; i++) {
|
||||||
int64_t val = unit[p][i].val;
|
int64_t val = sc->unit[p][i].val;
|
||||||
if (val != last_val) {
|
if (val != last_val) {
|
||||||
av_assert2(last_val < val);
|
av_assert2(last_val < val);
|
||||||
for(int si= 0; si < FF_ARRAY_ELEMS(score_tab); si++) {
|
for(int si= 0; si < FF_ARRAY_ELEMS(score_tab); si++) {
|
||||||
@ -1431,7 +1425,7 @@ static void encode_float32_remap(FFV1Context *f, FFV1SliceContext *sc,
|
|||||||
s.mul[0] = -1 << best_index;
|
s.mul[0] = -1 << best_index;
|
||||||
s.mul[s.mul_count] = 1;
|
s.mul[s.mul_count] = 1;
|
||||||
|
|
||||||
encode_float32_remap_segment(sc, unit, &s, 1, 1);
|
encode_float32_remap_segment(sc, &s, 1, 1);
|
||||||
|
|
||||||
sc->c = s.rc;
|
sc->c = s.rc;
|
||||||
}
|
}
|
||||||
@ -1559,9 +1553,8 @@ retry:
|
|||||||
|
|
||||||
encode_histogram_remap(f, sc);
|
encode_histogram_remap(f, sc);
|
||||||
} else {
|
} else {
|
||||||
Unit pairs[4][65536];
|
load_rgb_float32_frame(f, sc, planes, width, height, p->linesize);
|
||||||
load_rgb_float32_frame(f, sc, planes, width, height, p->linesize, pairs);
|
encode_float32_remap(f, sc, planes);
|
||||||
encode_float32_remap(f, sc, planes, pairs);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user