mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-26 19:01:44 +02:00
MMX/MMXEXT iDCT support, using external functions currently defined in libmpeg2
Gives average 13-20% mpeg decoding speedup on x86 systems. Originally committed as revision 30 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
2d6d0c1d66
commit
4af7bcc185
@ -21,6 +21,7 @@
|
|||||||
#include "avcodec.h"
|
#include "avcodec.h"
|
||||||
#include "dsputil.h"
|
#include "dsputil.h"
|
||||||
|
|
||||||
|
void (*ff_idct)(DCTELEM *block);
|
||||||
void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
|
void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
|
||||||
void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
|
void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
|
||||||
void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
|
void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
|
||||||
@ -363,6 +364,7 @@ void dsputil_init(void)
|
|||||||
squareTbl[i] = (i - 256) * (i - 256);
|
squareTbl[i] = (i - 256) * (i - 256);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ff_idct = j_rev_dct;
|
||||||
get_pixels = get_pixels_c;
|
get_pixels = get_pixels_c;
|
||||||
put_pixels_clamped = put_pixels_clamped_c;
|
put_pixels_clamped = put_pixels_clamped_c;
|
||||||
add_pixels_clamped = add_pixels_clamped_c;
|
add_pixels_clamped = add_pixels_clamped_c;
|
||||||
|
@ -25,6 +25,7 @@ void dsputil_init(void);
|
|||||||
|
|
||||||
/* pixel ops : interface with DCT */
|
/* pixel ops : interface with DCT */
|
||||||
|
|
||||||
|
extern void (*ff_idct)(DCTELEM *block);
|
||||||
extern void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
|
extern void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
|
||||||
extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
|
extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
|
||||||
extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
|
extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
|
||||||
|
@ -29,6 +29,16 @@ int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h);
|
|||||||
int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h);
|
int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h);
|
||||||
int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h);
|
int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h);
|
||||||
|
|
||||||
|
#ifdef USE_MMX_IDCT
|
||||||
|
/* external functions, defined in libmpeg2 */
|
||||||
|
void mmx_idct(DCTELEM *block);
|
||||||
|
void mmxext_idct(DCTELEM *block);
|
||||||
|
/* this should be in dsputil.h? -- A'rpi */
|
||||||
|
extern UINT8 ff_alternate_horizontal_scan[64];
|
||||||
|
extern UINT8 ff_alternate_vertical_scan[64];
|
||||||
|
extern UINT8 zigzag_direct[64];
|
||||||
|
#endif
|
||||||
|
|
||||||
/* pixel operations */
|
/* pixel operations */
|
||||||
static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001;
|
static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001;
|
||||||
static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002;
|
static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002;
|
||||||
@ -1039,5 +1049,23 @@ void dsputil_init_mmx(void)
|
|||||||
sub_pixels_tab[1] = sub_pixels_x2_3dnow;
|
sub_pixels_tab[1] = sub_pixels_x2_3dnow;
|
||||||
sub_pixels_tab[2] = sub_pixels_y2_3dnow;
|
sub_pixels_tab[2] = sub_pixels_y2_3dnow;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef USE_MMX_IDCT
|
||||||
|
/* use MMX / MMXEXT iDCT code from libmpeg2 */
|
||||||
|
//printf("LIBAVCODEC: Using MMX%s iDCT code\n",(mm_flags & MM_MMXEXT)?"EXT":"");
|
||||||
|
ff_idct = (mm_flags & MM_MMXEXT) ? mmxext_idct : mmx_idct;
|
||||||
|
/* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
|
||||||
|
{ int i,j;
|
||||||
|
for (i = 0; i < 64; i++) {
|
||||||
|
j = zigzag_direct[i];
|
||||||
|
zigzag_direct[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
|
||||||
|
j = ff_alternate_horizontal_scan[i];
|
||||||
|
ff_alternate_horizontal_scan[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
|
||||||
|
j = ff_alternate_vertical_scan[i];
|
||||||
|
ff_alternate_vertical_scan[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -331,7 +331,8 @@ static const UINT8 mbMotionVectorTable[17][2] = {
|
|||||||
{ 0xc, 10 },
|
{ 0xc, 10 },
|
||||||
};
|
};
|
||||||
|
|
||||||
const UINT8 zigzag_direct[64] = {
|
//const
|
||||||
|
UINT8 zigzag_direct[64] = {
|
||||||
0, 1, 8, 16, 9, 2, 3, 10,
|
0, 1, 8, 16, 9, 2, 3, 10,
|
||||||
17, 24, 32, 25, 18, 11, 4, 5,
|
17, 24, 32, 25, 18, 11, 4, 5,
|
||||||
12, 19, 26, 33, 40, 48, 41, 34,
|
12, 19, 26, 33, 40, 48, 41, 34,
|
||||||
|
@ -634,7 +634,7 @@ static inline void put_dct(MpegEncContext *s,
|
|||||||
{
|
{
|
||||||
if (!s->mpeg2)
|
if (!s->mpeg2)
|
||||||
s->dct_unquantize(s, block, i, s->qscale);
|
s->dct_unquantize(s, block, i, s->qscale);
|
||||||
j_rev_dct (block);
|
ff_idct (block);
|
||||||
put_pixels_clamped(block, dest, line_size);
|
put_pixels_clamped(block, dest, line_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -645,7 +645,7 @@ static inline void add_dct(MpegEncContext *s,
|
|||||||
if (s->block_last_index[i] >= 0) {
|
if (s->block_last_index[i] >= 0) {
|
||||||
if (!s->mpeg2)
|
if (!s->mpeg2)
|
||||||
s->dct_unquantize(s, block, i, s->qscale);
|
s->dct_unquantize(s, block, i, s->qscale);
|
||||||
j_rev_dct (block);
|
ff_idct (block);
|
||||||
add_pixels_clamped(block, dest, line_size);
|
add_pixels_clamped(block, dest, line_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -179,7 +179,8 @@ typedef struct MpegEncContext {
|
|||||||
DCTELEM *block, int n, int qscale);
|
DCTELEM *block, int n, int qscale);
|
||||||
} MpegEncContext;
|
} MpegEncContext;
|
||||||
|
|
||||||
extern const UINT8 zigzag_direct[64];
|
//const
|
||||||
|
extern UINT8 zigzag_direct[64];
|
||||||
|
|
||||||
int MPV_common_init(MpegEncContext *s);
|
int MPV_common_init(MpegEncContext *s);
|
||||||
void MPV_common_end(MpegEncContext *s);
|
void MPV_common_end(MpegEncContext *s);
|
||||||
|
Loading…
Reference in New Issue
Block a user