From d962f6fd3a95e3345a7c17df938c75f67a6c6363 Mon Sep 17 00:00:00 2001 From: Arpi Date: Sun, 9 Dec 2001 12:04:09 +0000 Subject: [PATCH] new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable Originally committed as revision 234 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/Makefile | 5 +++-- libavcodec/dsputil.c | 31 +++++++++++++++++++++++++++++++ libavcodec/dsputil.h | 23 +++++++++++++++++++++++ libavcodec/h263.c | 10 +++++----- libavcodec/i386/dsputil_mmx.c | 5 +++++ 5 files changed, 67 insertions(+), 7 deletions(-) diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 7c3133a196..5dbd4ed8c3 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -6,7 +6,7 @@ LDFLAGS= -g OBJS= common.o utils.o mpegvideo.o h263.o jrevdct.o jfdctfst.o \ mpegaudio.o ac3enc.o mjpeg.o resample.o dsputil.o \ motion_est.o imgconvert.o imgresample.o msmpeg4.o \ - mpeg12.o h263dec.o rv10.o mpegaudiodec.o pcm.o + mpeg12.o h263dec.o rv10.o mpegaudiodec.o pcm.o simple_idct.o ASM_OBJS= # currently using libac3 for ac3 decoding @@ -20,7 +20,8 @@ endif ifeq ($(TARGET_MMX),yes) OBJS += i386/fdct_mmx.o i386/cputest.o \ i386/dsputil_mmx.o i386/mpegvideo_mmx.o \ - i386/idct_mmx.o i386/motion_est_mmx.o + i386/idct_mmx.o i386/motion_est_mmx.o \ + i386/simple_idct_mmx.o endif # armv4l specific stuff diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index b0cea56bda..f699b2ef68 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -20,6 +20,7 @@ #include #include "avcodec.h" #include "dsputil.h" +#include "simple_idct.h" void (*ff_idct)(DCTELEM *block); void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); @@ -388,6 +389,27 @@ int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) /* permute block according so that it corresponds to the MMX idct order */ +#ifdef SIMPLE_IDCT +void block_permute(INT16 *block) +{ + int i; + INT16 temp[64]; + +// for(i=0; i<64; i++) temp[i] = block[ block_permute_op(i) ]; + for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i]; + + for(i=0; i<64; i++) block[i] = temp[i]; +/* + for(i=0; i<64; i++) + { + if((i&7)==0) printf("\n"); + printf("%2d ", block[i]); + } +*/ +} + +#else + void block_permute(INT16 *block) { int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; @@ -409,6 +431,7 @@ void block_permute(INT16 *block) block += 8; } } +#endif void dsputil_init(void) { @@ -425,7 +448,11 @@ void dsputil_init(void) squareTbl[i] = (i - 256) * (i - 256); } +#ifdef SIMPLE_IDCT + ff_idct = simple_idct; +#else ff_idct = j_rev_dct; +#endif get_pixels = get_pixels_c; put_pixels_clamped = put_pixels_clamped_c; add_pixels_clamped = add_pixels_clamped_c; @@ -449,6 +476,10 @@ void dsputil_init(void) use_permuted_idct = 0; #endif +#ifdef SIMPLE_IDCT + if(ff_idct == simple_idct) use_permuted_idct=0; +#endif + if (use_permuted_idct) { /* permute for IDCT */ for(i=0;i<64;i++) { diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index dd2cc945ce..12f0e3f2bb 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -67,10 +67,33 @@ int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx, int h); int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx, int h); int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx, int h); +#if defined (SIMPLE_IDCT) && defined (HAVE_MMX) +static inline int block_permute_op(int j) +{ +static const int table[64]={ + 0x00, 0x08, 0x01, 0x09, 0x04, 0x0C, 0x05, 0x0D, + 0x10, 0x18, 0x11, 0x19, 0x14, 0x1C, 0x15, 0x1D, + 0x02, 0x0A, 0x03, 0x0B, 0x06, 0x0E, 0x07, 0x0F, + 0x12, 0x1A, 0x13, 0x1B, 0x16, 0x1E, 0x17, 0x1F, + 0x20, 0x28, 0x21, 0x29, 0x24, 0x2C, 0x25, 0x2D, + 0x30, 0x38, 0x31, 0x39, 0x34, 0x3C, 0x35, 0x3D, + 0x22, 0x2A, 0x23, 0x2B, 0x26, 0x2E, 0x27, 0x2F, + 0x32, 0x3A, 0x33, 0x3B, 0x36, 0x3E, 0x37, 0x3F, +}; + + return table[j]; +} +#elif defined (SIMPLE_IDCT) +static inline int block_permute_op(int j) +{ + return j; +} +#else static inline int block_permute_op(int j) { return (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2); } +#endif void block_permute(INT16 *block); diff --git a/libavcodec/h263.c b/libavcodec/h263.c index 3461b4f676..da694411f0 100644 --- a/libavcodec/h263.c +++ b/libavcodec/h263.c @@ -554,7 +554,7 @@ static int mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *di return pred; } -void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n, +void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n, int dir) { int x, y, wrap, i; @@ -579,22 +579,22 @@ void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n, /* left prediction */ ac_val -= 16; for(i=1;i<8;i++) { - block[i*8] += ac_val[i]; + block[block_permute_op(i*8)] += ac_val[i]; } } else { /* top prediction */ ac_val -= 16 * wrap; for(i=1;i<8;i++) { - block[i] += ac_val[i + 8]; + block[block_permute_op(i)] += ac_val[i + 8]; } } } /* left copy */ for(i=1;i<8;i++) - ac_val1[i] = block[i * 8]; + ac_val1[i] = block[block_permute_op(i * 8)]; /* top copy */ for(i=1;i<8;i++) - ac_val1[8 + i] = block[i]; + ac_val1[8 + i] = block[block_permute_op(i)]; } static inline void mpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr) diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c index 2197b8fe7e..8647ed187b 100644 --- a/libavcodec/i386/dsputil_mmx.c +++ b/libavcodec/i386/dsputil_mmx.c @@ -20,6 +20,7 @@ */ #include "../dsputil.h" +#include "../simple_idct.h" int mm_flags; /* multimedia extension flags */ @@ -1047,5 +1048,9 @@ void dsputil_init_mmx(void) } else { ff_idct = ff_mmx_idct; } +#ifdef SIMPLE_IDCT +// ff_idct = simple_idct; + ff_idct = simple_idct_mmx; +#endif } }