mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
sizeof(s->block) isnt 64*6*2 anymore bugfix
mpeg12 decoding optimization Originally committed as revision 364 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
cf8039b2cf
commit
649c00c96d
@ -30,6 +30,7 @@ void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
|
|||||||
void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
|
void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
|
||||||
void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
|
void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
|
||||||
void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
|
void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
|
||||||
|
void (*clear_blocks)(DCTELEM *blocks);
|
||||||
|
|
||||||
op_pixels_abs_func pix_abs16x16;
|
op_pixels_abs_func pix_abs16x16;
|
||||||
op_pixels_abs_func pix_abs16x16_x2;
|
op_pixels_abs_func pix_abs16x16_x2;
|
||||||
@ -866,6 +867,11 @@ void block_permute(INT16 *block)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
void clear_blocks_c(DCTELEM *blocks)
|
||||||
|
{
|
||||||
|
memset(blocks, 0, sizeof(DCTELEM)*6*64);
|
||||||
|
}
|
||||||
|
|
||||||
void dsputil_init(void)
|
void dsputil_init(void)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
@ -890,6 +896,7 @@ void dsputil_init(void)
|
|||||||
put_pixels_clamped = put_pixels_clamped_c;
|
put_pixels_clamped = put_pixels_clamped_c;
|
||||||
add_pixels_clamped = add_pixels_clamped_c;
|
add_pixels_clamped = add_pixels_clamped_c;
|
||||||
gmc1= gmc1_c;
|
gmc1= gmc1_c;
|
||||||
|
clear_blocks= clear_blocks_c;
|
||||||
|
|
||||||
pix_abs16x16 = pix_abs16x16_c;
|
pix_abs16x16 = pix_abs16x16_c;
|
||||||
pix_abs16x16_x2 = pix_abs16x16_x2_c;
|
pix_abs16x16_x2 = pix_abs16x16_x2_c;
|
||||||
|
@ -40,11 +40,13 @@ extern void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
|
|||||||
extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
|
extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
|
||||||
extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
|
extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
|
||||||
extern void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
|
extern void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
|
||||||
|
extern void (*clear_blocks)(DCTELEM *blocks);
|
||||||
|
|
||||||
|
|
||||||
void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size);
|
void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size);
|
||||||
void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
|
void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
|
||||||
void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
|
void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
|
||||||
|
void clear_blocks_c(DCTELEM *blocks);
|
||||||
|
|
||||||
/* add and put pixel (decoding) */
|
/* add and put pixel (decoding) */
|
||||||
typedef void (*op_pixels_func)(UINT8 *block, const UINT8 *pixels, int line_size, int h);
|
typedef void (*op_pixels_func)(UINT8 *block, const UINT8 *pixels, int line_size, int h);
|
||||||
|
@ -156,6 +156,7 @@ static int h263_decode_frame(AVCodecContext *avctx,
|
|||||||
if (s->mb_y && !s->h263_pred) {
|
if (s->mb_y && !s->h263_pred) {
|
||||||
s->first_gob_line = h263_decode_gob_header(s);
|
s->first_gob_line = h263_decode_gob_header(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1;
|
s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1;
|
||||||
s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1);
|
s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1);
|
||||||
s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1;
|
s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1;
|
||||||
@ -183,28 +184,8 @@ static int h263_decode_frame(AVCodecContext *avctx,
|
|||||||
s->y_dc_scale = 8;
|
s->y_dc_scale = 8;
|
||||||
s->c_dc_scale = 8;
|
s->c_dc_scale = 8;
|
||||||
}
|
}
|
||||||
|
clear_blocks(s->block[0]);
|
||||||
|
|
||||||
#ifdef HAVE_MMX
|
|
||||||
if (mm_flags & MM_MMX) {
|
|
||||||
asm volatile(
|
|
||||||
"pxor %%mm7, %%mm7 \n\t"
|
|
||||||
"movl $-128*6, %%eax \n\t"
|
|
||||||
"1: \n\t"
|
|
||||||
"movq %%mm7, (%0, %%eax) \n\t"
|
|
||||||
"movq %%mm7, 8(%0, %%eax) \n\t"
|
|
||||||
"movq %%mm7, 16(%0, %%eax) \n\t"
|
|
||||||
"movq %%mm7, 24(%0, %%eax) \n\t"
|
|
||||||
"addl $32, %%eax \n\t"
|
|
||||||
" js 1b \n\t"
|
|
||||||
: : "r" (((int)s->block)+128*6)
|
|
||||||
: "%eax"
|
|
||||||
);
|
|
||||||
}else{
|
|
||||||
memset(s->block, 0, sizeof(s->block));
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
memset(s->block, 0, sizeof(s->block));
|
|
||||||
#endif
|
|
||||||
s->mv_dir = MV_DIR_FORWARD;
|
s->mv_dir = MV_DIR_FORWARD;
|
||||||
s->mv_type = MV_TYPE_16X16;
|
s->mv_type = MV_TYPE_16X16;
|
||||||
if (s->h263_msmpeg4) {
|
if (s->h263_msmpeg4) {
|
||||||
|
@ -1025,6 +1025,23 @@ static void sub_pixels_xy2_mmx( DCTELEM *block, const UINT8 *pixels, int line
|
|||||||
} while(--h);
|
} while(--h);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void clear_blocks_mmx(DCTELEM *blocks)
|
||||||
|
{
|
||||||
|
asm volatile(
|
||||||
|
"pxor %%mm7, %%mm7 \n\t"
|
||||||
|
"movl $-128*6, %%eax \n\t"
|
||||||
|
"1: \n\t"
|
||||||
|
"movq %%mm7, (%0, %%eax) \n\t"
|
||||||
|
"movq %%mm7, 8(%0, %%eax) \n\t"
|
||||||
|
"movq %%mm7, 16(%0, %%eax) \n\t"
|
||||||
|
"movq %%mm7, 24(%0, %%eax) \n\t"
|
||||||
|
"addl $32, %%eax \n\t"
|
||||||
|
" js 1b \n\t"
|
||||||
|
: : "r" (((int)blocks)+128*6)
|
||||||
|
: "%eax"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
static void just_return() { return; }
|
static void just_return() { return; }
|
||||||
|
|
||||||
void dsputil_init_mmx(void)
|
void dsputil_init_mmx(void)
|
||||||
@ -1049,6 +1066,7 @@ void dsputil_init_mmx(void)
|
|||||||
get_pixels = get_pixels_mmx;
|
get_pixels = get_pixels_mmx;
|
||||||
put_pixels_clamped = put_pixels_clamped_mmx;
|
put_pixels_clamped = put_pixels_clamped_mmx;
|
||||||
add_pixels_clamped = add_pixels_clamped_mmx;
|
add_pixels_clamped = add_pixels_clamped_mmx;
|
||||||
|
clear_blocks= clear_blocks_mmx;
|
||||||
|
|
||||||
pix_abs16x16 = pix_abs16x16_mmx;
|
pix_abs16x16 = pix_abs16x16_mmx;
|
||||||
pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
|
pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
|
||||||
|
@ -1402,7 +1402,7 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for(;;) {
|
for(;;) {
|
||||||
memset(s->block, 0, sizeof(s->block));
|
clear_blocks(s->block[0]);
|
||||||
ret = mpeg_decode_mb(s, s->block);
|
ret = mpeg_decode_mb(s, s->block);
|
||||||
dprintf("ret=%d\n", ret);
|
dprintf("ret=%d\n", ret);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
|
Loading…
Reference in New Issue
Block a user