You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-07-16 22:42:38 +02:00
avcodec/roqvideoenc: Avoid allocating buffers separately
This is possible because their size is known at compile-time; so they can be put directly into the context and don't need to be allocated for every frame. Reviewed-by: Paul B Mahol <onemda@gmail.com> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@gmail.com>
This commit is contained in:
@ -78,6 +78,36 @@
|
|||||||
/* The cast is useful when multiplying it by INT_MAX */
|
/* The cast is useful when multiplying it by INT_MAX */
|
||||||
#define ROQ_LAMBDA_SCALE ((uint64_t) FF_LAMBDA_SCALE)
|
#define ROQ_LAMBDA_SCALE ((uint64_t) FF_LAMBDA_SCALE)
|
||||||
|
|
||||||
|
typedef struct RoqCodebooks {
|
||||||
|
int numCB4;
|
||||||
|
int numCB2;
|
||||||
|
int usedCB2[MAX_CBS_2x2];
|
||||||
|
int usedCB4[MAX_CBS_4x4];
|
||||||
|
uint8_t unpacked_cb2[MAX_CBS_2x2*2*2*3];
|
||||||
|
uint8_t unpacked_cb4[MAX_CBS_4x4*4*4*3];
|
||||||
|
uint8_t unpacked_cb4_enlarged[MAX_CBS_4x4*8*8*3];
|
||||||
|
} RoqCodebooks;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Temporary vars
|
||||||
|
*/
|
||||||
|
typedef struct RoqTempData
|
||||||
|
{
|
||||||
|
int f2i4[MAX_CBS_4x4];
|
||||||
|
int i2f4[MAX_CBS_4x4];
|
||||||
|
int f2i2[MAX_CBS_2x2];
|
||||||
|
int i2f2[MAX_CBS_2x2];
|
||||||
|
|
||||||
|
int mainChunkSize;
|
||||||
|
|
||||||
|
int numCB4;
|
||||||
|
int numCB2;
|
||||||
|
|
||||||
|
RoqCodebooks codebooks;
|
||||||
|
|
||||||
|
int used_option[4];
|
||||||
|
} RoqTempData;
|
||||||
|
|
||||||
typedef struct SubcelEvaluation {
|
typedef struct SubcelEvaluation {
|
||||||
int eval_dist[4];
|
int eval_dist[4];
|
||||||
int best_bit_use;
|
int best_bit_use;
|
||||||
@ -115,7 +145,9 @@ typedef struct RoqEncContext {
|
|||||||
|
|
||||||
const AVFrame *frame_to_enc;
|
const AVFrame *frame_to_enc;
|
||||||
uint8_t *out_buf;
|
uint8_t *out_buf;
|
||||||
struct RoqTempData *tmpData;
|
RoqTempData tmp_data;
|
||||||
|
roq_cell results4[4 * MAX_CBS_4x4];
|
||||||
|
int tmp_codebook_buf[FFMAX(24 * MAX_CBS_4x4, 6 * MAX_CBS_2x2)];
|
||||||
|
|
||||||
CelEvaluation *cel_evals;
|
CelEvaluation *cel_evals;
|
||||||
int *closest_cb;
|
int *closest_cb;
|
||||||
@ -233,36 +265,6 @@ static inline int squared_diff_macroblock(uint8_t a[], uint8_t b[], int size)
|
|||||||
return sdiff;
|
return sdiff;
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct RoqCodebooks {
|
|
||||||
int numCB4;
|
|
||||||
int numCB2;
|
|
||||||
int usedCB2[MAX_CBS_2x2];
|
|
||||||
int usedCB4[MAX_CBS_4x4];
|
|
||||||
uint8_t unpacked_cb2[MAX_CBS_2x2*2*2*3];
|
|
||||||
uint8_t unpacked_cb4[MAX_CBS_4x4*4*4*3];
|
|
||||||
uint8_t unpacked_cb4_enlarged[MAX_CBS_4x4*8*8*3];
|
|
||||||
} RoqCodebooks;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Temporary vars
|
|
||||||
*/
|
|
||||||
typedef struct RoqTempData
|
|
||||||
{
|
|
||||||
int f2i4[MAX_CBS_4x4];
|
|
||||||
int i2f4[MAX_CBS_4x4];
|
|
||||||
int f2i2[MAX_CBS_2x2];
|
|
||||||
int i2f2[MAX_CBS_2x2];
|
|
||||||
|
|
||||||
int mainChunkSize;
|
|
||||||
|
|
||||||
int numCB4;
|
|
||||||
int numCB2;
|
|
||||||
|
|
||||||
RoqCodebooks codebooks;
|
|
||||||
|
|
||||||
int used_option[4];
|
|
||||||
} RoqTempdata;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initialize cel evaluators and set their source coordinates
|
* Initialize cel evaluators and set their source coordinates
|
||||||
*/
|
*/
|
||||||
@ -424,9 +426,10 @@ static void motion_search(RoqEncContext *enc, int blocksize)
|
|||||||
* Get distortion for all options available to a subcel
|
* Get distortion for all options available to a subcel
|
||||||
*/
|
*/
|
||||||
static void gather_data_for_subcel(SubcelEvaluation *subcel, int x,
|
static void gather_data_for_subcel(SubcelEvaluation *subcel, int x,
|
||||||
int y, RoqEncContext *enc, RoqTempdata *tempData)
|
int y, RoqEncContext *enc)
|
||||||
{
|
{
|
||||||
RoqContext *const roq = &enc->common;
|
RoqContext *const roq = &enc->common;
|
||||||
|
RoqTempData *const tempData = &enc->tmp_data;
|
||||||
uint8_t mb4[4*4*3];
|
uint8_t mb4[4*4*3];
|
||||||
uint8_t mb2[2*2*3];
|
uint8_t mb2[2*2*3];
|
||||||
int cluster_index;
|
int cluster_index;
|
||||||
@ -488,10 +491,10 @@ static void gather_data_for_subcel(SubcelEvaluation *subcel, int x,
|
|||||||
/**
|
/**
|
||||||
* Get distortion for all options available to a cel
|
* Get distortion for all options available to a cel
|
||||||
*/
|
*/
|
||||||
static void gather_data_for_cel(CelEvaluation *cel, RoqEncContext *enc,
|
static void gather_data_for_cel(CelEvaluation *cel, RoqEncContext *enc)
|
||||||
RoqTempdata *tempData)
|
|
||||||
{
|
{
|
||||||
RoqContext *const roq = &enc->common;
|
RoqContext *const roq = &enc->common;
|
||||||
|
RoqTempData *const tempData = &enc->tmp_data;
|
||||||
uint8_t mb8[8*8*3];
|
uint8_t mb8[8*8*3];
|
||||||
int index = cel->sourceY * roq->width / 64 + cel->sourceX/8;
|
int index = cel->sourceY * roq->width / 64 + cel->sourceX/8;
|
||||||
int i, j, best_dist, divide_bit_use;
|
int i, j, best_dist, divide_bit_use;
|
||||||
@ -523,10 +526,10 @@ static void gather_data_for_cel(CelEvaluation *cel, RoqEncContext *enc,
|
|||||||
index_mb(mb8, tempData->codebooks.unpacked_cb4_enlarged,
|
index_mb(mb8, tempData->codebooks.unpacked_cb4_enlarged,
|
||||||
tempData->codebooks.numCB4, &cel->cbEntry, 8);
|
tempData->codebooks.numCB4, &cel->cbEntry, 8);
|
||||||
|
|
||||||
gather_data_for_subcel(cel->subCels + 0, cel->sourceX+0, cel->sourceY+0, enc, tempData);
|
gather_data_for_subcel(cel->subCels + 0, cel->sourceX+0, cel->sourceY+0, enc);
|
||||||
gather_data_for_subcel(cel->subCels + 1, cel->sourceX+4, cel->sourceY+0, enc, tempData);
|
gather_data_for_subcel(cel->subCels + 1, cel->sourceX+4, cel->sourceY+0, enc);
|
||||||
gather_data_for_subcel(cel->subCels + 2, cel->sourceX+0, cel->sourceY+4, enc, tempData);
|
gather_data_for_subcel(cel->subCels + 2, cel->sourceX+0, cel->sourceY+4, enc);
|
||||||
gather_data_for_subcel(cel->subCels + 3, cel->sourceX+4, cel->sourceY+4, enc, tempData);
|
gather_data_for_subcel(cel->subCels + 3, cel->sourceX+4, cel->sourceY+4, enc);
|
||||||
|
|
||||||
cel->eval_dist[RoQ_ID_CCC] = 0;
|
cel->eval_dist[RoQ_ID_CCC] = 0;
|
||||||
divide_bit_use = 0;
|
divide_bit_use = 0;
|
||||||
@ -563,9 +566,10 @@ static void gather_data_for_cel(CelEvaluation *cel, RoqEncContext *enc,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void remap_codebooks(RoqEncContext *enc, RoqTempdata *tempData)
|
static void remap_codebooks(RoqEncContext *enc)
|
||||||
{
|
{
|
||||||
RoqContext *const roq = &enc->common;
|
RoqContext *const roq = &enc->common;
|
||||||
|
RoqTempData *const tempData = &enc->tmp_data;
|
||||||
int i, j, idx=0;
|
int i, j, idx=0;
|
||||||
|
|
||||||
/* Make remaps for the final codebook usage */
|
/* Make remaps for the final codebook usage */
|
||||||
@ -596,9 +600,10 @@ static void remap_codebooks(RoqEncContext *enc, RoqTempdata *tempData)
|
|||||||
/**
|
/**
|
||||||
* Write codebook chunk
|
* Write codebook chunk
|
||||||
*/
|
*/
|
||||||
static void write_codebooks(RoqEncContext *enc, RoqTempdata *tempData)
|
static void write_codebooks(RoqEncContext *enc)
|
||||||
{
|
{
|
||||||
RoqContext *const roq = &enc->common;
|
RoqContext *const roq = &enc->common;
|
||||||
|
RoqTempData *const tempData = &enc->tmp_data;
|
||||||
int i, j;
|
int i, j;
|
||||||
uint8_t **outp= &enc->out_buf;
|
uint8_t **outp= &enc->out_buf;
|
||||||
|
|
||||||
@ -652,10 +657,10 @@ static void write_typecode(CodingSpool *s, uint8_t type)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void reconstruct_and_encode_image(RoqEncContext *enc,
|
static void reconstruct_and_encode_image(RoqEncContext *enc,
|
||||||
RoqTempdata *tempData,
|
|
||||||
int w, int h, int numBlocks)
|
int w, int h, int numBlocks)
|
||||||
{
|
{
|
||||||
RoqContext *const roq = &enc->common;
|
RoqContext *const roq = &enc->common;
|
||||||
|
RoqTempData *const tempData = &enc->tmp_data;
|
||||||
int i, j, k;
|
int i, j, k;
|
||||||
int x, y;
|
int x, y;
|
||||||
int subX, subY;
|
int subX, subY;
|
||||||
@ -815,20 +820,17 @@ static int generate_codebook(RoqEncContext *enc,
|
|||||||
int i, j, k, ret = 0;
|
int i, j, k, ret = 0;
|
||||||
int c_size = size*size/4;
|
int c_size = size*size/4;
|
||||||
int *buf;
|
int *buf;
|
||||||
int *codebook = av_malloc_array(6*c_size, cbsize*sizeof(int));
|
int *codebook = enc->tmp_codebook_buf;
|
||||||
int *closest_cb = enc->closest_cb;
|
int *closest_cb = enc->closest_cb;
|
||||||
|
|
||||||
if (!codebook)
|
|
||||||
return AVERROR(ENOMEM);
|
|
||||||
|
|
||||||
ret = avpriv_init_elbg(points, 6 * c_size, inputCount, codebook,
|
ret = avpriv_init_elbg(points, 6 * c_size, inputCount, codebook,
|
||||||
cbsize, 1, closest_cb, &enc->randctx);
|
cbsize, 1, closest_cb, &enc->randctx);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto out;
|
return ret;
|
||||||
ret = avpriv_do_elbg(points, 6 * c_size, inputCount, codebook,
|
ret = avpriv_do_elbg(points, 6 * c_size, inputCount, codebook,
|
||||||
cbsize, 1, closest_cb, &enc->randctx);
|
cbsize, 1, closest_cb, &enc->randctx);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto out;
|
return ret;
|
||||||
|
|
||||||
buf = codebook;
|
buf = codebook;
|
||||||
for (i=0; i<cbsize; i++)
|
for (i=0; i<cbsize; i++)
|
||||||
@ -840,40 +842,32 @@ static int generate_codebook(RoqEncContext *enc,
|
|||||||
results->v = (*buf++ + CHROMA_BIAS/2)/CHROMA_BIAS;
|
results->v = (*buf++ + CHROMA_BIAS/2)/CHROMA_BIAS;
|
||||||
results++;
|
results++;
|
||||||
}
|
}
|
||||||
out:
|
return 0;
|
||||||
av_free(codebook);
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int generate_new_codebooks(RoqEncContext *enc, RoqTempdata *tempData)
|
static int generate_new_codebooks(RoqEncContext *enc)
|
||||||
{
|
{
|
||||||
int i, j, ret = 0;
|
int i, j, ret = 0;
|
||||||
RoqCodebooks *codebooks = &tempData->codebooks;
|
RoqCodebooks *codebooks = &enc->tmp_data.codebooks;
|
||||||
RoqContext *const roq = &enc->common;
|
RoqContext *const roq = &enc->common;
|
||||||
int max = roq->width * roq->height / 16;
|
int max = roq->width * roq->height / 16;
|
||||||
uint8_t mb2[3*4];
|
uint8_t mb2[3*4];
|
||||||
roq_cell *results4 = av_malloc(sizeof(roq_cell)*MAX_CBS_4x4*4);
|
|
||||||
int *points = enc->points;
|
int *points = enc->points;
|
||||||
|
|
||||||
if (!results4) {
|
|
||||||
ret = AVERROR(ENOMEM);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Subsample YUV data */
|
/* Subsample YUV data */
|
||||||
create_clusters(enc->frame_to_enc, roq->width, roq->height, points);
|
create_clusters(enc->frame_to_enc, roq->width, roq->height, points);
|
||||||
|
|
||||||
/* Create 4x4 codebooks */
|
|
||||||
if ((ret = generate_codebook(enc, points, max,
|
|
||||||
results4, 4, (enc->quake3_compat ? MAX_CBS_4x4-1 : MAX_CBS_4x4))) < 0)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
codebooks->numCB4 = (enc->quake3_compat ? MAX_CBS_4x4-1 : MAX_CBS_4x4);
|
codebooks->numCB4 = (enc->quake3_compat ? MAX_CBS_4x4-1 : MAX_CBS_4x4);
|
||||||
|
|
||||||
|
/* Create 4x4 codebooks */
|
||||||
|
if ((ret = generate_codebook(enc, points, max, enc->results4,
|
||||||
|
4, codebooks->numCB4)) < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
/* Create 2x2 codebooks */
|
/* Create 2x2 codebooks */
|
||||||
if ((ret = generate_codebook(enc, points, max * 4,
|
if ((ret = generate_codebook(enc, points, max * 4,
|
||||||
roq->cb2x2, 2, MAX_CBS_2x2)) < 0)
|
roq->cb2x2, 2, MAX_CBS_2x2)) < 0)
|
||||||
goto out;
|
return ret;
|
||||||
|
|
||||||
codebooks->numCB2 = MAX_CBS_2x2;
|
codebooks->numCB2 = MAX_CBS_2x2;
|
||||||
|
|
||||||
@ -884,7 +878,7 @@ static int generate_new_codebooks(RoqEncContext *enc, RoqTempdata *tempData)
|
|||||||
/* Index all 4x4 entries to the 2x2 entries, unpack, and enlarge */
|
/* Index all 4x4 entries to the 2x2 entries, unpack, and enlarge */
|
||||||
for (i=0; i<codebooks->numCB4; i++) {
|
for (i=0; i<codebooks->numCB4; i++) {
|
||||||
for (j=0; j<4; j++) {
|
for (j=0; j<4; j++) {
|
||||||
unpack_roq_cell(&results4[4*i + j], mb2);
|
unpack_roq_cell(&enc->results4[4*i + j], mb2);
|
||||||
index_mb(mb2, codebooks->unpacked_cb2, codebooks->numCB2,
|
index_mb(mb2, codebooks->unpacked_cb2, codebooks->numCB2,
|
||||||
&roq->cb4x4[i].idx[j], 2);
|
&roq->cb4x4[i].idx[j], 2);
|
||||||
}
|
}
|
||||||
@ -893,20 +887,19 @@ static int generate_new_codebooks(RoqEncContext *enc, RoqTempdata *tempData)
|
|||||||
enlarge_roq_mb4(codebooks->unpacked_cb4 + i*4*4*3,
|
enlarge_roq_mb4(codebooks->unpacked_cb4 + i*4*4*3,
|
||||||
codebooks->unpacked_cb4_enlarged + i*8*8*3);
|
codebooks->unpacked_cb4_enlarged + i*8*8*3);
|
||||||
}
|
}
|
||||||
out:
|
|
||||||
av_free(results4);
|
return 0;
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int roq_encode_video(RoqEncContext *enc)
|
static int roq_encode_video(RoqEncContext *enc)
|
||||||
{
|
{
|
||||||
RoqTempdata *tempData = enc->tmpData;
|
RoqTempData *const tempData = &enc->tmp_data;
|
||||||
RoqContext *const roq = &enc->common;
|
RoqContext *const roq = &enc->common;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
memset(tempData, 0, sizeof(*tempData));
|
memset(tempData, 0, sizeof(*tempData));
|
||||||
|
|
||||||
ret = generate_new_codebooks(enc, tempData);
|
ret = generate_new_codebooks(enc);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
@ -917,7 +910,7 @@ static int roq_encode_video(RoqEncContext *enc)
|
|||||||
|
|
||||||
retry_encode:
|
retry_encode:
|
||||||
for (int i = 0; i < roq->width * roq->height / 64; i++)
|
for (int i = 0; i < roq->width * roq->height / 64; i++)
|
||||||
gather_data_for_cel(enc->cel_evals + i, enc, tempData);
|
gather_data_for_cel(enc->cel_evals + i, enc);
|
||||||
|
|
||||||
/* Quake 3 can't handle chunks bigger than 65535 bytes */
|
/* Quake 3 can't handle chunks bigger than 65535 bytes */
|
||||||
if (tempData->mainChunkSize/8 > 65535 && enc->quake3_compat) {
|
if (tempData->mainChunkSize/8 > 65535 && enc->quake3_compat) {
|
||||||
@ -940,11 +933,11 @@ static int roq_encode_video(RoqEncContext *enc)
|
|||||||
goto retry_encode;
|
goto retry_encode;
|
||||||
}
|
}
|
||||||
|
|
||||||
remap_codebooks(enc, tempData);
|
remap_codebooks(enc);
|
||||||
|
|
||||||
write_codebooks(enc, tempData);
|
write_codebooks(enc);
|
||||||
|
|
||||||
reconstruct_and_encode_image(enc, tempData, roq->width, roq->height,
|
reconstruct_and_encode_image(enc, roq->width, roq->height,
|
||||||
roq->width * roq->height / 64);
|
roq->width * roq->height / 64);
|
||||||
|
|
||||||
/* Rotate frame history */
|
/* Rotate frame history */
|
||||||
@ -964,7 +957,6 @@ static av_cold int roq_encode_end(AVCodecContext *avctx)
|
|||||||
av_frame_free(&enc->common.current_frame);
|
av_frame_free(&enc->common.current_frame);
|
||||||
av_frame_free(&enc->common.last_frame);
|
av_frame_free(&enc->common.last_frame);
|
||||||
|
|
||||||
av_freep(&enc->tmpData);
|
|
||||||
av_freep(&enc->cel_evals);
|
av_freep(&enc->cel_evals);
|
||||||
av_freep(&enc->closest_cb);
|
av_freep(&enc->closest_cb);
|
||||||
av_freep(&enc->this_motion4);
|
av_freep(&enc->this_motion4);
|
||||||
@ -1009,8 +1001,6 @@ static av_cold int roq_encode_init(AVCodecContext *avctx)
|
|||||||
if (!roq->last_frame || !roq->current_frame)
|
if (!roq->last_frame || !roq->current_frame)
|
||||||
return AVERROR(ENOMEM);
|
return AVERROR(ENOMEM);
|
||||||
|
|
||||||
enc->tmpData = av_malloc(sizeof(RoqTempdata));
|
|
||||||
|
|
||||||
enc->this_motion4 =
|
enc->this_motion4 =
|
||||||
av_mallocz_array(roq->width * roq->height / 16, sizeof(motion_vect));
|
av_mallocz_array(roq->width * roq->height / 16, sizeof(motion_vect));
|
||||||
|
|
||||||
@ -1028,7 +1018,7 @@ static av_cold int roq_encode_init(AVCodecContext *avctx)
|
|||||||
enc->closest_cb =
|
enc->closest_cb =
|
||||||
av_malloc_array(roq->width * roq->height, 3 * sizeof(int));
|
av_malloc_array(roq->width * roq->height, 3 * sizeof(int));
|
||||||
|
|
||||||
if (!enc->tmpData || !enc->this_motion4 || !enc->last_motion4 ||
|
if (!enc->this_motion4 || !enc->last_motion4 ||
|
||||||
!enc->this_motion8 || !enc->last_motion8 || !enc->closest_cb)
|
!enc->this_motion8 || !enc->last_motion8 || !enc->closest_cb)
|
||||||
return AVERROR(ENOMEM);
|
return AVERROR(ENOMEM);
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user