mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
XvMC speedup by removing one memcpy and doing MB packing
Originally committed as revision 2442 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
6b56c616d9
commit
a579db0c4f
@ -72,6 +72,8 @@ static int mpeg_decode_motion(MpegEncContext *s, int fcode, int pred);
|
|||||||
#ifdef HAVE_XVMC
|
#ifdef HAVE_XVMC
|
||||||
extern int XVMC_field_start(MpegEncContext *s, AVCodecContext *avctx);
|
extern int XVMC_field_start(MpegEncContext *s, AVCodecContext *avctx);
|
||||||
extern int XVMC_field_end(MpegEncContext *s);
|
extern int XVMC_field_end(MpegEncContext *s);
|
||||||
|
extern void XVMC_pack_pblocks(MpegEncContext *s,int cbp);
|
||||||
|
extern void XVMC_init_block(s);//set s->block
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_ENCODERS
|
#ifdef CONFIG_ENCODERS
|
||||||
@ -1083,15 +1085,24 @@ static int mpeg_decode_mb(MpegEncContext *s,
|
|||||||
}else
|
}else
|
||||||
memset(s->last_mv, 0, sizeof(s->last_mv)); /* reset mv prediction */
|
memset(s->last_mv, 0, sizeof(s->last_mv)); /* reset mv prediction */
|
||||||
s->mb_intra = 1;
|
s->mb_intra = 1;
|
||||||
|
#ifdef HAVE_XVMC
|
||||||
|
//one 1 we memcpy blocks in xvmcvideo
|
||||||
|
if(s->avctx->xvmc_acceleration > 1){
|
||||||
|
XVMC_pack_pblocks(s,-1);//inter are always full blocks
|
||||||
|
if(s->swap_uv){
|
||||||
|
exchange_uv(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (s->codec_id == CODEC_ID_MPEG2VIDEO) {
|
if (s->codec_id == CODEC_ID_MPEG2VIDEO) {
|
||||||
for(i=0;i<6;i++) {
|
for(i=0;i<6;i++) {
|
||||||
if (mpeg2_decode_block_intra(s, block[i], i) < 0)
|
if (mpeg2_decode_block_intra(s, s->pblocks[i], i) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for(i=0;i<6;i++) {
|
for(i=0;i<6;i++) {
|
||||||
if (mpeg1_decode_block_intra(s, block[i], i) < 0)
|
if (mpeg1_decode_block_intra(s, s->pblocks[i], i) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1262,10 +1273,20 @@ static int mpeg_decode_mb(MpegEncContext *s,
|
|||||||
}
|
}
|
||||||
cbp++;
|
cbp++;
|
||||||
|
|
||||||
|
#ifdef HAVE_XVMC
|
||||||
|
//on 1 we memcpy blocks in xvmcvideo
|
||||||
|
if(s->avctx->xvmc_acceleration > 1){
|
||||||
|
XVMC_pack_pblocks(s,cbp);
|
||||||
|
if(s->swap_uv){
|
||||||
|
exchange_uv(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (s->codec_id == CODEC_ID_MPEG2VIDEO) {
|
if (s->codec_id == CODEC_ID_MPEG2VIDEO) {
|
||||||
for(i=0;i<6;i++) {
|
for(i=0;i<6;i++) {
|
||||||
if (cbp & 32) {
|
if (cbp & 32) {
|
||||||
if (mpeg2_decode_block_non_intra(s, block[i], i) < 0)
|
if (mpeg2_decode_block_non_intra(s, s->pblocks[i], i) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
} else {
|
} else {
|
||||||
s->block_last_index[i] = -1;
|
s->block_last_index[i] = -1;
|
||||||
@ -1275,7 +1296,7 @@ static int mpeg_decode_mb(MpegEncContext *s,
|
|||||||
} else {
|
} else {
|
||||||
for(i=0;i<6;i++) {
|
for(i=0;i<6;i++) {
|
||||||
if (cbp & 32) {
|
if (cbp & 32) {
|
||||||
if (mpeg1_decode_block_inter(s, block[i], i) < 0)
|
if (mpeg1_decode_block_inter(s, s->pblocks[i], i) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
} else {
|
} else {
|
||||||
s->block_last_index[i] = -1;
|
s->block_last_index[i] = -1;
|
||||||
@ -1960,10 +1981,12 @@ static void mpeg_decode_extension(AVCodecContext *avctx,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void exchange_uv(AVFrame *f){
|
static void exchange_uv(MpegEncContext *s){
|
||||||
uint8_t *t= f->data[1];
|
short * tmp;
|
||||||
f->data[1]= f->data[2];
|
|
||||||
f->data[2]= t;
|
tmp = s->pblocks[4];
|
||||||
|
s->pblocks[4] = s->pblocks[5];
|
||||||
|
s->pblocks[5] = tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define DECODE_SLICE_FATAL_ERROR -2
|
#define DECODE_SLICE_FATAL_ERROR -2
|
||||||
@ -2093,6 +2116,12 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
|
|||||||
ff_init_block_index(s);
|
ff_init_block_index(s);
|
||||||
|
|
||||||
for(;;) {
|
for(;;) {
|
||||||
|
#ifdef HAVE_XVMC
|
||||||
|
//one 1 we memcpy blocks in xvmcvideo
|
||||||
|
if(s->avctx->xvmc_acceleration > 1)
|
||||||
|
XVMC_init_block(s);//set s->block
|
||||||
|
#endif
|
||||||
|
|
||||||
s->dsp.clear_blocks(s->block[0]);
|
s->dsp.clear_blocks(s->block[0]);
|
||||||
|
|
||||||
ret = mpeg_decode_mb(s, s->block);
|
ret = mpeg_decode_mb(s, s->block);
|
||||||
@ -2133,14 +2162,9 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
|
|||||||
MPV_decode_mb(s, s->block);
|
MPV_decode_mb(s, s->block);
|
||||||
|
|
||||||
if (++s->mb_x >= s->mb_width) {
|
if (++s->mb_x >= s->mb_width) {
|
||||||
if(s->avctx->codec_tag == ff_get_fourcc("VCR2"))
|
|
||||||
exchange_uv((AVFrame*)s->current_picture_ptr);
|
|
||||||
|
|
||||||
ff_draw_horiz_band(s, 16*s->mb_y, 16);
|
ff_draw_horiz_band(s, 16*s->mb_y, 16);
|
||||||
|
|
||||||
if(s->avctx->codec_tag == ff_get_fourcc("VCR2"))
|
|
||||||
exchange_uv((AVFrame*)s->current_picture_ptr);
|
|
||||||
|
|
||||||
s->mb_x = 0;
|
s->mb_x = 0;
|
||||||
s->mb_y++;
|
s->mb_y++;
|
||||||
|
|
||||||
@ -2233,8 +2257,6 @@ static int slice_end(AVCodecContext *avctx, AVFrame *pict)
|
|||||||
ff_print_debug_info(s, s->last_picture_ptr);
|
ff_print_debug_info(s, s->last_picture_ptr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(s->avctx->codec_tag == ff_get_fourcc("VCR2"))
|
|
||||||
exchange_uv(pict);
|
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
} else {
|
} else {
|
||||||
@ -2294,11 +2316,13 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
|
|||||||
//get_format() or set_video(width,height,aspect,pix_fmt);
|
//get_format() or set_video(width,height,aspect,pix_fmt);
|
||||||
//until then pix_fmt may be changed right after codec init
|
//until then pix_fmt may be changed right after codec init
|
||||||
if( avctx->pix_fmt == PIX_FMT_XVMC_MPEG2_IDCT )
|
if( avctx->pix_fmt == PIX_FMT_XVMC_MPEG2_IDCT )
|
||||||
|
if( avctx->idct_algo == FF_IDCT_AUTO )
|
||||||
avctx->idct_algo = FF_IDCT_SIMPLE;
|
avctx->idct_algo = FF_IDCT_SIMPLE;
|
||||||
|
|
||||||
if (MPV_common_init(s) < 0)
|
if (MPV_common_init(s) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
s1->mpeg_enc_ctx_allocated = 1;
|
s1->mpeg_enc_ctx_allocated = 1;
|
||||||
|
s->swap_uv = 0;//just in case vcr2 and mpeg2 stream have been concatinated
|
||||||
}
|
}
|
||||||
|
|
||||||
skip_bits(&s->gb, 10); /* vbv_buffer_size */
|
skip_bits(&s->gb, 10); /* vbv_buffer_size */
|
||||||
@ -2378,10 +2402,13 @@ static int vcr2_init_sequence(AVCodecContext *avctx)
|
|||||||
//get_format() or set_video(width,height,aspect,pix_fmt);
|
//get_format() or set_video(width,height,aspect,pix_fmt);
|
||||||
//until then pix_fmt may be changed right after codec init
|
//until then pix_fmt may be changed right after codec init
|
||||||
if( avctx->pix_fmt == PIX_FMT_XVMC_MPEG2_IDCT )
|
if( avctx->pix_fmt == PIX_FMT_XVMC_MPEG2_IDCT )
|
||||||
|
if( avctx->idct_algo == FF_IDCT_AUTO )
|
||||||
avctx->idct_algo = FF_IDCT_SIMPLE;
|
avctx->idct_algo = FF_IDCT_SIMPLE;
|
||||||
|
|
||||||
if (MPV_common_init(s) < 0)
|
if (MPV_common_init(s) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
exchange_uv(s);//common init reset pblocks, so we swap them here
|
||||||
|
s->swap_uv = 1;// in case of xvmc we need to swap uv for each MB
|
||||||
s1->mpeg_enc_ctx_allocated = 1;
|
s1->mpeg_enc_ctx_allocated = 1;
|
||||||
|
|
||||||
for(i=0;i<64;i++) {
|
for(i=0;i<64;i++) {
|
||||||
@ -2634,14 +2661,14 @@ static int mpeg_mc_decode_init(AVCodecContext *avctx){
|
|||||||
|
|
||||||
if( !(avctx->slice_flags & SLICE_FLAG_CODED_ORDER) )
|
if( !(avctx->slice_flags & SLICE_FLAG_CODED_ORDER) )
|
||||||
return -1;
|
return -1;
|
||||||
if( !(avctx->slice_flags & SLICE_FLAG_ALLOW_FIELD) )
|
if( !(avctx->slice_flags & SLICE_FLAG_ALLOW_FIELD) ){
|
||||||
dprintf("mpeg12.c: XvMC decoder will work better if SLICE_FLAG_ALLOW_FIELD is set\n");
|
dprintf("mpeg12.c: XvMC decoder will work better if SLICE_FLAG_ALLOW_FIELD is set\n");
|
||||||
|
}
|
||||||
mpeg_decode_init(avctx);
|
mpeg_decode_init(avctx);
|
||||||
s = avctx->priv_data;
|
s = avctx->priv_data;
|
||||||
|
|
||||||
avctx->pix_fmt = PIX_FMT_XVMC_MPEG2_IDCT;
|
avctx->pix_fmt = PIX_FMT_XVMC_MPEG2_IDCT;
|
||||||
avctx->xvmc_acceleration = 1;
|
avctx->xvmc_acceleration = 2;//2 - the blocks are packed!
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -56,7 +56,7 @@ static int sse_mb(MpegEncContext *s);
|
|||||||
#ifdef HAVE_XVMC
|
#ifdef HAVE_XVMC
|
||||||
extern int XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
|
extern int XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
|
||||||
extern void XVMC_field_end(MpegEncContext *s);
|
extern void XVMC_field_end(MpegEncContext *s);
|
||||||
extern void XVMC_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
|
extern void XVMC_decode_mb(MpegEncContext *s);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
|
void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
|
||||||
@ -519,6 +519,10 @@ int MPV_common_init(MpegEncContext *s)
|
|||||||
|
|
||||||
s->block= s->blocks[0];
|
s->block= s->blocks[0];
|
||||||
|
|
||||||
|
for(i=0;i<12;i++){
|
||||||
|
s->pblocks[i] = (short *)(&s->block[i]);
|
||||||
|
}
|
||||||
|
|
||||||
s->parse_context.state= -1;
|
s->parse_context.state= -1;
|
||||||
|
|
||||||
s->context_initialized = 1;
|
s->context_initialized = 1;
|
||||||
@ -2485,7 +2489,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
|
|||||||
const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
|
const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
|
||||||
#ifdef HAVE_XVMC
|
#ifdef HAVE_XVMC
|
||||||
if(s->avctx->xvmc_acceleration){
|
if(s->avctx->xvmc_acceleration){
|
||||||
XVMC_decode_mb(s,block);
|
XVMC_decode_mb(s);//xvmc uses pblocks
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -655,6 +655,8 @@ typedef struct MpegEncContext {
|
|||||||
int rtp_payload_size;
|
int rtp_payload_size;
|
||||||
void (*rtp_callback)(void *data, int size, int packet_number);
|
void (*rtp_callback)(void *data, int size, int packet_number);
|
||||||
uint8_t *ptr_lastgob;
|
uint8_t *ptr_lastgob;
|
||||||
|
int swap_uv;//vcr2 codec is mpeg2 varint with UV swaped
|
||||||
|
short * pblocks[12];
|
||||||
|
|
||||||
DCTELEM (*block)[64]; ///< points to one of the following blocks
|
DCTELEM (*block)[64]; ///< points to one of the following blocks
|
||||||
DCTELEM (*blocks)[6][64]; // for HQ mode we need to keep the best block
|
DCTELEM (*blocks)[6][64]; // for HQ mode we need to keep the best block
|
||||||
|
@ -41,6 +41,33 @@
|
|||||||
|
|
||||||
//#include "xvmc_debug.h"
|
//#include "xvmc_debug.h"
|
||||||
|
|
||||||
|
//set s->block
|
||||||
|
inline void XVMC_init_block(MpegEncContext *s){
|
||||||
|
xvmc_render_state_t * render;
|
||||||
|
render = (xvmc_render_state_t*)s->current_picture.data[2];
|
||||||
|
assert(render != NULL);
|
||||||
|
if( (render == NULL) || (render->magic != MP_XVMC_RENDER_MAGIC) ){
|
||||||
|
assert(0);
|
||||||
|
return;//make sure that this is render packet
|
||||||
|
}
|
||||||
|
s->block =(DCTELEM *)(render->data_blocks+(render->next_free_data_block_num)*64);
|
||||||
|
}
|
||||||
|
|
||||||
|
void XVMC_pack_pblocks(MpegEncContext *s, int cbp){
|
||||||
|
int i,j;
|
||||||
|
#define numblocks 6
|
||||||
|
|
||||||
|
j=0;
|
||||||
|
for(i=0;i<numblocks;i++){
|
||||||
|
if(cbp & (1<<(numblocks-1-i)) ){
|
||||||
|
s->pblocks[i] = (short *)(&s->block[(j++)]);
|
||||||
|
}else{
|
||||||
|
s->pblocks[i] = NULL;
|
||||||
|
}
|
||||||
|
// printf("s->pblocks[%d]=%p ,s->block=%p cbp=%d\n",i,s->pblocks[i],s->block,cbp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int calc_cbp(MpegEncContext *s, int blocknum){
|
static int calc_cbp(MpegEncContext *s, int blocknum){
|
||||||
/* compute cbp */
|
/* compute cbp */
|
||||||
// for I420 bit_offset=5
|
// for I420 bit_offset=5
|
||||||
@ -110,7 +137,7 @@ xvmc_render_state_t * render;
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void XVMC_decode_mb(MpegEncContext *s, DCTELEM block[6][64]){
|
void XVMC_decode_mb(MpegEncContext *s){
|
||||||
XvMCMacroBlock * mv_block;
|
XvMCMacroBlock * mv_block;
|
||||||
xvmc_render_state_t * render;
|
xvmc_render_state_t * render;
|
||||||
int i,cbp,blocks_per_mb;
|
int i,cbp,blocks_per_mb;
|
||||||
@ -242,14 +269,14 @@ const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
|
|||||||
*/
|
*/
|
||||||
if(s->flags & CODEC_FLAG_GRAY){
|
if(s->flags & CODEC_FLAG_GRAY){
|
||||||
if(s->mb_intra){//intra frames are alwasy full chroma block
|
if(s->mb_intra){//intra frames are alwasy full chroma block
|
||||||
memset(block[4],0,sizeof(short)*8*8);//so we need to clear them
|
for(i=4; i<blocks_per_mb; i++){
|
||||||
memset(block[5],0,sizeof(short)*8*8);
|
memset(s->pblocks[i],0,sizeof(short)*8*8);//so we need to clear them
|
||||||
if(!render->unsigned_intra)
|
if(!render->unsigned_intra)
|
||||||
block[4][0] = block[5][0] = 1<<10;
|
s->pblocks[i][0] = 1<<10;
|
||||||
}
|
}
|
||||||
else
|
}else
|
||||||
blocks_per_mb = 4;//Luminance blocks only
|
blocks_per_mb = 4;//Luminance blocks only
|
||||||
};
|
}
|
||||||
cbp = calc_cbp(s,blocks_per_mb);
|
cbp = calc_cbp(s,blocks_per_mb);
|
||||||
mv_block->coded_block_pattern = cbp;
|
mv_block->coded_block_pattern = cbp;
|
||||||
if(cbp == 0)
|
if(cbp == 0)
|
||||||
@ -259,14 +286,24 @@ const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
|
|||||||
if(s->block_last_index[i] >= 0){
|
if(s->block_last_index[i] >= 0){
|
||||||
// i do not have unsigned_intra MOCO to test, hope it is OK
|
// i do not have unsigned_intra MOCO to test, hope it is OK
|
||||||
if( (s->mb_intra) && ( render->idct || (!render->idct && !render->unsigned_intra)) )
|
if( (s->mb_intra) && ( render->idct || (!render->idct && !render->unsigned_intra)) )
|
||||||
block[i][0]-=1<<10;
|
s->pblocks[i][0]-=1<<10;
|
||||||
if(!render->idct){
|
if(!render->idct){
|
||||||
s->dsp.idct(block[i]);
|
s->dsp.idct(s->pblocks[i]);
|
||||||
//!!TODO!clip!!!
|
//!!TODO!clip!!!
|
||||||
}
|
}
|
||||||
//TODO:avoid block copy by modifying s->block pointer
|
//copy blocks only if the codec doesn't support pblocks reordering
|
||||||
memcpy(&render->data_blocks[(render->next_free_data_block_num++)*64],
|
if(s->avctx->xvmc_acceleration == 1){
|
||||||
block[i],sizeof(short)*8*8);
|
memcpy(&render->data_blocks[(render->next_free_data_block_num)*64],
|
||||||
|
s->pblocks[i],sizeof(short)*8*8);
|
||||||
|
}else{
|
||||||
|
/* if(s->pblocks[i] != &render->data_blocks[
|
||||||
|
(render->next_free_data_block_num)*64]){
|
||||||
|
printf("ERROR mb(%d,%d) s->pblocks[i]=%p data_block[]=%p\n",
|
||||||
|
s->mb_x,s->mb_y, s->pblocks[i],
|
||||||
|
&render->data_blocks[(render->next_free_data_block_num)*64]);
|
||||||
|
}*/
|
||||||
|
}
|
||||||
|
render->next_free_data_block_num++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
render->filled_mv_blocks_num++;
|
render->filled_mv_blocks_num++;
|
||||||
|
Loading…
Reference in New Issue
Block a user