1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-08 13:22:53 +02:00

idct permutation cleanup, idct can be selected per context now

fixing some threadunsafe code

Originally committed as revision 980 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
Michael Niedermayer 2002-09-29 22:44:22 +00:00
parent f9bb4bdffc
commit 2ad1516a6c
20 changed files with 586 additions and 516 deletions

View File

@ -117,6 +117,7 @@ static int do_deinterlace = 0;
static int workaround_bugs = 0; static int workaround_bugs = 0;
static int error_resilience = 0; static int error_resilience = 0;
static int dct_algo = 0; static int dct_algo = 0;
static int idct_algo = 0;
static int use_part = 0; static int use_part = 0;
static int packet_size = 0; static int packet_size = 0;
@ -1392,6 +1393,12 @@ void opt_dct_algo(const char *arg)
dct_algo = atoi(arg); dct_algo = atoi(arg);
} }
void opt_idct_algo(const char *arg)
{
idct_algo = atoi(arg);
}
void opt_error_resilience(const char *arg) void opt_error_resilience(const char *arg)
{ {
error_resilience = atoi(arg); error_resilience = atoi(arg);
@ -1750,6 +1757,7 @@ void opt_input_file(const char *filename)
rfps = ic->streams[i]->r_frame_rate; rfps = ic->streams[i]->r_frame_rate;
enc->workaround_bugs = workaround_bugs; enc->workaround_bugs = workaround_bugs;
enc->error_resilience = error_resilience; enc->error_resilience = error_resilience;
enc->idct_algo= idct_algo;
if (enc->frame_rate != rfps) { if (enc->frame_rate != rfps) {
fprintf(stderr,"\nSeems that stream %d comes from film source: %2.2f->%2.2f\n", fprintf(stderr,"\nSeems that stream %d comes from film source: %2.2f->%2.2f\n",
i, (float)enc->frame_rate / FRAME_RATE_BASE, i, (float)enc->frame_rate / FRAME_RATE_BASE,
@ -1922,6 +1930,7 @@ void opt_output_file(const char *filename)
video_enc->i_quant_offset = video_i_qoffset; video_enc->i_quant_offset = video_i_qoffset;
video_enc->b_quant_offset = video_b_qoffset; video_enc->b_quant_offset = video_b_qoffset;
video_enc->dct_algo = dct_algo; video_enc->dct_algo = dct_algo;
video_enc->idct_algo = idct_algo;
if(packet_size){ if(packet_size){
video_enc->rtp_mode= 1; video_enc->rtp_mode= 1;
video_enc->rtp_payload_size= packet_size; video_enc->rtp_payload_size= packet_size;
@ -2287,6 +2296,7 @@ const OptionDef options[] = {
{ "me", HAS_ARG | OPT_EXPERT, {(void*)opt_motion_estimation}, "set motion estimation method", { "me", HAS_ARG | OPT_EXPERT, {(void*)opt_motion_estimation}, "set motion estimation method",
"method" }, "method" },
{ "dct_algo", HAS_ARG | OPT_EXPERT, {(void*)opt_dct_algo}, "set dct algo", "algo" }, { "dct_algo", HAS_ARG | OPT_EXPERT, {(void*)opt_dct_algo}, "set dct algo", "algo" },
{ "idct_algo", HAS_ARG | OPT_EXPERT, {(void*)opt_idct_algo}, "set idct algo", "algo" },
{ "er", HAS_ARG | OPT_EXPERT, {(void*)opt_error_resilience}, "set error resilience", "" }, { "er", HAS_ARG | OPT_EXPERT, {(void*)opt_error_resilience}, "set error resilience", "" },
{ "bf", HAS_ARG | OPT_EXPERT, {(void*)opt_b_frames}, "use 'frames' B frames (only MPEG-4)", "frames" }, { "bf", HAS_ARG | OPT_EXPERT, {(void*)opt_b_frames}, "use 'frames' B frames (only MPEG-4)", "frames" },
{ "hq", OPT_BOOL | OPT_EXPERT, {(void*)&use_hq}, "activate high quality settings" }, { "hq", OPT_BOOL | OPT_EXPERT, {(void*)&use_hq}, "activate high quality settings" },

View File

@ -5,8 +5,8 @@
#define LIBAVCODEC_VERSION_INT 0x000406 #define LIBAVCODEC_VERSION_INT 0x000406
#define LIBAVCODEC_VERSION "0.4.6" #define LIBAVCODEC_VERSION "0.4.6"
#define LIBAVCODEC_BUILD 4628 #define LIBAVCODEC_BUILD 4629
#define LIBAVCODEC_BUILD_STR "4628" #define LIBAVCODEC_BUILD_STR "4629"
enum CodecID { enum CodecID {
CODEC_ID_NONE, CODEC_ID_NONE,
@ -684,6 +684,21 @@ typedef struct AVCodecContext {
*/ */
int fourcc; int fourcc;
/**
* idct algorithm, see FF_IDCT_* below
* encoding: set by user
* decoding: set by user
*/
int idct_algo;
#define FF_IDCT_AUTO 0
#define FF_IDCT_INT 1
#define FF_IDCT_SIMPLE 2
#define FF_IDCT_SIMPLEMMX 3
#define FF_IDCT_LIBMPEG2MMX 4
#define FF_IDCT_PS2 5
#define FF_IDCT_MLIB 6
#define FF_IDCT_ARM 7
//FIXME this should be reordered after kabis API is finished ... //FIXME this should be reordered after kabis API is finished ...
//TODO kill kabi //TODO kill kabi
/* /*

View File

@ -932,6 +932,22 @@ static inline int ff_get_fourcc(char *s){
return (s[0]) + (s[1]<<8) + (s[2]<<16) + (s[3]<<24); return (s[0]) + (s[1]<<8) + (s[2]<<16) + (s[3]<<24);
} }
#ifdef ARCH_X86
#define MASK_ABS(mask, level)\
asm volatile(\
"cdq \n\t"\
"xorl %1, %0 \n\t"\
"subl %1, %0 \n\t"\
: "+a" (level), "=&d" (mask)\
);
#else
#define MASK_ABS(mask, level)\
mask= level>>31;\
level= (level^mask)-mask;
#endif
#if __CPU__ >= 686 && !defined(RUNTIME_CPUDETECT) #if __CPU__ >= 686 && !defined(RUNTIME_CPUDETECT)
#define COPY3_IF_LT(x,y,a,b,c,d)\ #define COPY3_IF_LT(x,y,a,b,c,d)\
asm volatile (\ asm volatile (\

View File

@ -20,11 +20,7 @@
*/ */
#include "avcodec.h" #include "avcodec.h"
#include "dsputil.h" #include "dsputil.h"
#include "simple_idct.h"
void (*ff_idct)(DCTELEM *block);
void (*ff_idct_put)(UINT8 *dest, int line_size, DCTELEM *block);
void (*ff_idct_add)(UINT8 *dest, int line_size, DCTELEM *block);
void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
@ -49,16 +45,11 @@ int ff_bit_exact=0;
UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
UINT32 squareTbl[512]; UINT32 squareTbl[512];
extern INT16 ff_mpeg1_default_intra_matrix[64]; const UINT8 ff_zigzag_direct[64] = {
extern INT16 ff_mpeg1_default_non_intra_matrix[64]; 0, 1, 8, 16, 9, 2, 3, 10,
extern INT16 ff_mpeg4_default_intra_matrix[64]; 17, 24, 32, 25, 18, 11, 4, 5,
extern INT16 ff_mpeg4_default_non_intra_matrix[64];
UINT8 zigzag_direct[64] = {
0, 1, 8, 16, 9, 2, 3, 10,
17, 24, 32, 25, 18, 11, 4, 5,
12, 19, 26, 33, 40, 48, 41, 34, 12, 19, 26, 33, 40, 48, 41, 34,
27, 20, 13, 6, 7, 14, 21, 28, 27, 20, 13, 6, 7, 14, 21, 28,
35, 42, 49, 56, 57, 50, 43, 36, 35, 42, 49, 56, 57, 50, 43, 36,
29, 22, 15, 23, 30, 37, 44, 51, 29, 22, 15, 23, 30, 37, 44, 51,
58, 59, 52, 45, 38, 31, 39, 46, 58, 59, 52, 45, 38, 31, 39, 46,
@ -68,11 +59,8 @@ UINT8 zigzag_direct[64] = {
/* not permutated inverse zigzag_direct + 1 for MMX quantizer */ /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
UINT16 __align8 inv_zigzag_direct16[64]; UINT16 __align8 inv_zigzag_direct16[64];
/* not permutated zigzag_direct for MMX quantizer */ const UINT8 ff_alternate_horizontal_scan[64] = {
UINT8 zigzag_direct_noperm[64]; 0, 1, 2, 3, 8, 9, 16, 17,
UINT8 ff_alternate_horizontal_scan[64] = {
0, 1, 2, 3, 8, 9, 16, 17,
10, 11, 4, 5, 6, 7, 15, 14, 10, 11, 4, 5, 6, 7, 15, 14,
13, 12, 19, 18, 24, 25, 32, 33, 13, 12, 19, 18, 24, 25, 32, 33,
26, 27, 20, 21, 22, 23, 28, 29, 26, 27, 20, 21, 22, 23, 28, 29,
@ -82,8 +70,8 @@ UINT8 ff_alternate_horizontal_scan[64] = {
52, 53, 54, 55, 60, 61, 62, 63, 52, 53, 54, 55, 60, 61, 62, 63,
}; };
UINT8 ff_alternate_vertical_scan[64] = { const UINT8 ff_alternate_vertical_scan[64] = {
0, 8, 16, 24, 1, 9, 2, 10, 0, 8, 16, 24, 1, 9, 2, 10,
17, 25, 32, 40, 48, 56, 57, 49, 17, 25, 32, 40, 48, 56, 57, 49,
41, 33, 26, 18, 3, 11, 4, 12, 41, 33, 26, 18, 3, 11, 4, 12,
19, 27, 34, 42, 50, 58, 35, 43, 19, 27, 34, 42, 50, 58, 35, 43,
@ -93,21 +81,6 @@ UINT8 ff_alternate_vertical_scan[64] = {
38, 46, 54, 62, 39, 47, 55, 63, 38, 46, 54, 62, 39, 47, 55, 63,
}; };
#ifdef SIMPLE_IDCT
/* Input permutation for the simple_idct_mmx */
static UINT8 simple_mmx_permutation[64]={
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
};
#endif
/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
UINT32 inverse[256]={ UINT32 inverse[256]={
0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
@ -144,24 +117,6 @@ UINT32 inverse[256]={
17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
}; };
/* used to skip zeros at the end */
UINT8 zigzag_end[64];
UINT8 permutation[64];
//UINT8 invPermutation[64];
static void build_zigzag_end(void)
{
int lastIndex;
int lastIndexAfterPerm=0;
for(lastIndex=0; lastIndex<64; lastIndex++)
{
if(zigzag_direct[lastIndex] > lastIndexAfterPerm)
lastIndexAfterPerm= zigzag_direct[lastIndex];
zigzag_end[lastIndex]= lastIndexAfterPerm + 1;
}
}
int pix_sum_c(UINT8 * pix, int line_size) int pix_sum_c(UINT8 * pix, int line_size)
{ {
int s, i, j; int s, i, j;
@ -1540,65 +1495,24 @@ int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
/* permute block according so that it corresponds to the MMX idct /* permute block according so that it corresponds to the MMX idct
order */ order */
#ifdef SIMPLE_IDCT void block_permute(INT16 *block, UINT8 *permutation)
/* general permutation, but perhaps slightly slower */
void block_permute(INT16 *block)
{ {
int i; int i;
INT16 temp[64]; INT16 temp[64];
for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i]; for(i=0; i<64; i++) temp[ permutation[i] ] = block[i];
for(i=0; i<64; i++) block[i] = temp[i]; for(i=0; i<64; i++) block[i] = temp[i];
} }
#else
void block_permute(INT16 *block)
{
int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
int i;
for(i=0;i<8;i++) {
tmp1 = block[1];
tmp2 = block[2];
tmp3 = block[3];
tmp4 = block[4];
tmp5 = block[5];
tmp6 = block[6];
block[1] = tmp2;
block[2] = tmp4;
block[3] = tmp6;
block[4] = tmp1;
block[5] = tmp3;
block[6] = tmp5;
block += 8;
}
}
#endif
void clear_blocks_c(DCTELEM *blocks) void clear_blocks_c(DCTELEM *blocks)
{ {
memset(blocks, 0, sizeof(DCTELEM)*6*64); memset(blocks, 0, sizeof(DCTELEM)*6*64);
} }
/* XXX: those functions should be suppressed ASAP when all IDCTs are
converted */
void gen_idct_put(UINT8 *dest, int line_size, DCTELEM *block)
{
ff_idct (block);
put_pixels_clamped(block, dest, line_size);
}
void gen_idct_add(UINT8 *dest, int line_size, DCTELEM *block)
{
ff_idct (block);
add_pixels_clamped(block, dest, line_size);
}
void dsputil_init(void) void dsputil_init(void)
{ {
int i, j; int i, j;
int use_permuted_idct;
for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
for(i=0;i<MAX_NEG_CROP;i++) { for(i=0;i<MAX_NEG_CROP;i++) {
@ -1610,11 +1524,6 @@ void dsputil_init(void)
squareTbl[i] = (i - 256) * (i - 256); squareTbl[i] = (i - 256) * (i - 256);
} }
#ifdef SIMPLE_IDCT
ff_idct = NULL;
#else
ff_idct = j_rev_dct;
#endif
get_pixels = get_pixels_c; get_pixels = get_pixels_c;
diff_pixels = diff_pixels_c; diff_pixels = diff_pixels_c;
put_pixels_clamped = put_pixels_clamped_c; put_pixels_clamped = put_pixels_clamped_c;
@ -1633,8 +1542,6 @@ void dsputil_init(void)
pix_abs8x8_y2 = pix_abs8x8_y2_c; pix_abs8x8_y2 = pix_abs8x8_y2_c;
pix_abs8x8_xy2 = pix_abs8x8_xy2_c; pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
use_permuted_idct = 1;
#ifdef HAVE_MMX #ifdef HAVE_MMX
dsputil_init_mmx(); dsputil_init_mmx();
#endif #endif
@ -1643,61 +1550,18 @@ void dsputil_init(void)
#endif #endif
#ifdef HAVE_MLIB #ifdef HAVE_MLIB
dsputil_init_mlib(); dsputil_init_mlib();
use_permuted_idct = 0;
#endif #endif
#ifdef ARCH_ALPHA #ifdef ARCH_ALPHA
dsputil_init_alpha(); dsputil_init_alpha();
use_permuted_idct = 0;
#endif #endif
#ifdef ARCH_POWERPC #ifdef ARCH_POWERPC
dsputil_init_ppc(); dsputil_init_ppc();
#endif #endif
#ifdef HAVE_MMI #ifdef HAVE_MMI
dsputil_init_mmi(); dsputil_init_mmi();
use_permuted_idct = 0;
#endif #endif
#ifdef SIMPLE_IDCT for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
if (ff_idct == NULL) {
ff_idct_put = simple_idct_put;
ff_idct_add = simple_idct_add;
use_permuted_idct=0;
}
#endif
if(ff_idct != NULL) {
ff_idct_put = gen_idct_put;
ff_idct_add = gen_idct_add;
}
if(use_permuted_idct)
#ifdef SIMPLE_IDCT
for(i=0; i<64; i++) permutation[i]= simple_mmx_permutation[i];
#else
for(i=0; i<64; i++) permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
#endif
else
for(i=0; i<64; i++) permutation[i]=i;
for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1;
for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i];
if (use_permuted_idct) {
/* permute for IDCT */
for(i=0;i<64;i++) {
j = zigzag_direct[i];
zigzag_direct[i] = block_permute_op(j);
j = ff_alternate_horizontal_scan[i];
ff_alternate_horizontal_scan[i] = block_permute_op(j);
j = ff_alternate_vertical_scan[i];
ff_alternate_vertical_scan[i] = block_permute_op(j);
}
block_permute(ff_mpeg1_default_intra_matrix);
block_permute(ff_mpeg1_default_non_intra_matrix);
block_permute(ff_mpeg4_default_intra_matrix);
block_permute(ff_mpeg4_default_non_intra_matrix);
}
build_zigzag_end();
} }
/* remove any non bit exact operation (testing purpose) */ /* remove any non bit exact operation (testing purpose) */

View File

@ -34,12 +34,9 @@ void j_rev_dct (DCTELEM *data);
void ff_fdct_mmx(DCTELEM *block); void ff_fdct_mmx(DCTELEM *block);
/* encoding scans */ /* encoding scans */
extern UINT8 ff_alternate_horizontal_scan[64]; extern const UINT8 ff_alternate_horizontal_scan[64];
extern UINT8 ff_alternate_vertical_scan[64]; extern const UINT8 ff_alternate_vertical_scan[64];
extern UINT8 zigzag_direct[64]; extern const UINT8 ff_zigzag_direct[64];
/* permutation table */
extern UINT8 permutation[64];
/* pixel operations */ /* pixel operations */
#define MAX_NEG_CROP 384 #define MAX_NEG_CROP 384
@ -61,9 +58,6 @@ i (michael) didnt check them, these are just the alignents which i think could b
*/ */
/* pixel ops : interface with DCT */ /* pixel ops : interface with DCT */
extern void (*ff_idct)(DCTELEM *block/*align 16*/);
extern void (*ff_idct_put)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
extern void (*ff_idct_add)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
extern void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size); extern void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size);
extern void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride); extern void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride);
extern void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size); extern void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
@ -119,12 +113,7 @@ int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx); int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx); int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx);
static inline int block_permute_op(int j) void block_permute(INT16 *block, UINT8 *permutation);
{
return permutation[j];
}
void block_permute(INT16 *block);
#if defined(HAVE_MMX) #if defined(HAVE_MMX)

View File

@ -287,19 +287,19 @@ static inline int decide_ac_pred(MpegEncContext * s, DCTELEM block[6][64], int d
if(s->mb_y==0 || s->qscale == s->qscale_table[xy] || n==2 || n==3){ if(s->mb_y==0 || s->qscale == s->qscale_table[xy] || n==2 || n==3){
/* same qscale */ /* same qscale */
for(i=1; i<8; i++){ for(i=1; i<8; i++){
const int level= block[n][block_permute_op(i )]; const int level= block[n][s->idct_permutation[i ]];
score0+= ABS(level); score0+= ABS(level);
score1+= ABS(level - ac_val[i+8]); score1+= ABS(level - ac_val[i+8]);
ac_val1[i ]= block[n][block_permute_op(i<<3)]; ac_val1[i ]= block[n][s->idct_permutation[i<<3]];
ac_val1[i+8]= level; ac_val1[i+8]= level;
} }
}else{ }else{
/* different qscale, we must rescale */ /* different qscale, we must rescale */
for(i=1; i<8; i++){ for(i=1; i<8; i++){
const int level= block[n][block_permute_op(i )]; const int level= block[n][s->idct_permutation[i ]];
score0+= ABS(level); score0+= ABS(level);
score1+= ABS(level - ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale)); score1+= ABS(level - ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale));
ac_val1[i ]= block[n][block_permute_op(i<<3)]; ac_val1[i ]= block[n][s->idct_permutation[i<<3]];
ac_val1[i+8]= level; ac_val1[i+8]= level;
} }
} }
@ -310,20 +310,20 @@ static inline int decide_ac_pred(MpegEncContext * s, DCTELEM block[6][64], int d
if(s->mb_x==0 || s->qscale == s->qscale_table[xy] || n==1 || n==3){ if(s->mb_x==0 || s->qscale == s->qscale_table[xy] || n==1 || n==3){
/* same qscale */ /* same qscale */
for(i=1; i<8; i++){ for(i=1; i<8; i++){
const int level= block[n][block_permute_op(i<<3)]; const int level= block[n][s->idct_permutation[i<<3]];
score0+= ABS(level); score0+= ABS(level);
score1+= ABS(level - ac_val[i]); score1+= ABS(level - ac_val[i]);
ac_val1[i ]= level; ac_val1[i ]= level;
ac_val1[i+8]= block[n][block_permute_op(i )]; ac_val1[i+8]= block[n][s->idct_permutation[i ]];
} }
}else{ }else{
/* different qscale, we must rescale */ /* different qscale, we must rescale */
for(i=1; i<8; i++){ for(i=1; i<8; i++){
const int level= block[n][block_permute_op(i<<3)]; const int level= block[n][s->idct_permutation[i<<3]];
score0+= ABS(level); score0+= ABS(level);
score1+= ABS(level - ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale)); score1+= ABS(level - ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale));
ac_val1[i ]= level; ac_val1[i ]= level;
ac_val1[i+8]= block[n][block_permute_op(i )]; ac_val1[i+8]= block[n][s->idct_permutation[i ]];
} }
} }
} }
@ -519,7 +519,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
/* encode each block */ /* encode each block */
for (i = 0; i < 6; i++) { for (i = 0; i < 6; i++) {
mpeg4_encode_block(s, block[i], i, 0, zigzag_direct, NULL, &s->pb); mpeg4_encode_block(s, block[i], i, 0, s->intra_scantable.permutated, NULL, &s->pb);
} }
if(interleaved_stats){ if(interleaved_stats){
@ -637,7 +637,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
/* encode each block */ /* encode each block */
for (i = 0; i < 6; i++) { for (i = 0; i < 6; i++) {
mpeg4_encode_block(s, block[i], i, 0, zigzag_direct, NULL, tex_pb); mpeg4_encode_block(s, block[i], i, 0, s->intra_scantable.permutated, NULL, tex_pb);
} }
if(interleaved_stats){ if(interleaved_stats){
@ -674,8 +674,8 @@ void mpeg4_encode_mb(MpegEncContext * s,
int last_index; int last_index;
mpeg4_inv_pred_ac(s, block[i], i, dir[i]); mpeg4_inv_pred_ac(s, block[i], i, dir[i]);
if (dir[i]==0) st = ff_alternate_vertical_scan; /* left */ if (dir[i]==0) st = s->intra_v_scantable.permutated; /* left */
else st = ff_alternate_horizontal_scan; /* top */ else st = s->intra_h_scantable.permutated; /* top */
for(last_index=63; last_index>=0; last_index--) //FIXME optimize for(last_index=63; last_index>=0; last_index--) //FIXME optimize
if(block[i][st[last_index]]) break; if(block[i][st[last_index]]) break;
@ -685,7 +685,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
} }
}else{ }else{
for(i=0; i<6; i++) for(i=0; i<6; i++)
scan_table[i]= zigzag_direct; scan_table[i]= s->intra_scantable.permutated;
} }
/* compute cbp */ /* compute cbp */
@ -746,10 +746,10 @@ void mpeg4_encode_mb(MpegEncContext * s,
if(dir[i]){ if(dir[i]){
for(j=1; j<8; j++) for(j=1; j<8; j++)
block[i][block_permute_op(j )]= ac_val[j+8]; block[i][s->idct_permutation[j ]]= ac_val[j+8];
}else{ }else{
for(j=1; j<8; j++) for(j=1; j<8; j++)
block[i][block_permute_op(j<<3)]= ac_val[j ]; block[i][s->idct_permutation[j<<3]]= ac_val[j ];
} }
s->block_last_index[i]= zigzag_last_index[i]; s->block_last_index[i]= zigzag_last_index[i];
} }
@ -974,7 +974,7 @@ void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n)
if (a != 1024) { if (a != 1024) {
ac_val -= 16; ac_val -= 16;
for(i=1;i<8;i++) { for(i=1;i<8;i++) {
block[block_permute_op(i*8)] += ac_val[i]; block[s->idct_permutation[i<<3]] += ac_val[i];
} }
pred_dc = a; pred_dc = a;
} }
@ -983,7 +983,7 @@ void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n)
if (c != 1024) { if (c != 1024) {
ac_val -= 16 * wrap; ac_val -= 16 * wrap;
for(i=1;i<8;i++) { for(i=1;i<8;i++) {
block[block_permute_op(i)] += ac_val[i + 8]; block[s->idct_permutation[i ]] += ac_val[i + 8];
} }
pred_dc = c; pred_dc = c;
} }
@ -1011,10 +1011,10 @@ void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n)
/* left copy */ /* left copy */
for(i=1;i<8;i++) for(i=1;i<8;i++)
ac_val1[i] = block[block_permute_op(i * 8)]; ac_val1[i ] = block[s->idct_permutation[i<<3]];
/* top copy */ /* top copy */
for(i=1;i<8;i++) for(i=1;i<8;i++)
ac_val1[8 + i] = block[block_permute_op(i)]; ac_val1[8 + i] = block[s->idct_permutation[i ]];
} }
INT16 *h263_pred_motion(MpegEncContext * s, int block, INT16 *h263_pred_motion(MpegEncContext * s, int block,
@ -1425,7 +1425,7 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
last_index = s->block_last_index[n]; last_index = s->block_last_index[n];
last_non_zero = i - 1; last_non_zero = i - 1;
for (; i <= last_index; i++) { for (; i <= last_index; i++) {
j = zigzag_direct[i]; j = s->intra_scantable.permutated[i];
level = block[j]; level = block[j];
if (level) { if (level) {
run = i - last_non_zero - 1; run = i - last_non_zero - 1;
@ -1710,12 +1710,12 @@ void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n,
if(s->mb_x==0 || s->qscale == s->qscale_table[xy] || n==1 || n==3){ if(s->mb_x==0 || s->qscale == s->qscale_table[xy] || n==1 || n==3){
/* same qscale */ /* same qscale */
for(i=1;i<8;i++) { for(i=1;i<8;i++) {
block[block_permute_op(i*8)] += ac_val[i]; block[s->idct_permutation[i<<3]] += ac_val[i];
} }
}else{ }else{
/* different qscale, we must rescale */ /* different qscale, we must rescale */
for(i=1;i<8;i++) { for(i=1;i<8;i++) {
block[block_permute_op(i*8)] += ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale); block[s->idct_permutation[i<<3]] += ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale);
} }
} }
} else { } else {
@ -1726,23 +1726,23 @@ void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n,
if(s->mb_y==0 || s->qscale == s->qscale_table[xy] || n==2 || n==3){ if(s->mb_y==0 || s->qscale == s->qscale_table[xy] || n==2 || n==3){
/* same qscale */ /* same qscale */
for(i=1;i<8;i++) { for(i=1;i<8;i++) {
block[block_permute_op(i)] += ac_val[i + 8]; block[s->idct_permutation[i]] += ac_val[i + 8];
} }
}else{ }else{
/* different qscale, we must rescale */ /* different qscale, we must rescale */
for(i=1;i<8;i++) { for(i=1;i<8;i++) {
block[block_permute_op(i)] += ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale); block[s->idct_permutation[i]] += ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale);
} }
} }
} }
} }
/* left copy */ /* left copy */
for(i=1;i<8;i++) for(i=1;i<8;i++)
ac_val1[i] = block[block_permute_op(i * 8)]; ac_val1[i ] = block[s->idct_permutation[i<<3]];
/* top copy */ /* top copy */
for(i=1;i<8;i++) for(i=1;i<8;i++)
ac_val1[8 + i] = block[block_permute_op(i)]; ac_val1[8 + i] = block[s->idct_permutation[i ]];
} }
@ -1762,12 +1762,12 @@ static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n,
if(s->mb_x==0 || s->qscale == s->qscale_table[xy] || n==1 || n==3){ if(s->mb_x==0 || s->qscale == s->qscale_table[xy] || n==1 || n==3){
/* same qscale */ /* same qscale */
for(i=1;i<8;i++) { for(i=1;i<8;i++) {
block[block_permute_op(i*8)] -= ac_val[i]; block[s->idct_permutation[i<<3]] -= ac_val[i];
} }
}else{ }else{
/* different qscale, we must rescale */ /* different qscale, we must rescale */
for(i=1;i<8;i++) { for(i=1;i<8;i++) {
block[block_permute_op(i*8)] -= ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale); block[s->idct_permutation[i<<3]] -= ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale);
} }
} }
} else { } else {
@ -1777,12 +1777,12 @@ static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n,
if(s->mb_y==0 || s->qscale == s->qscale_table[xy] || n==2 || n==3){ if(s->mb_y==0 || s->qscale == s->qscale_table[xy] || n==2 || n==3){
/* same qscale */ /* same qscale */
for(i=1;i<8;i++) { for(i=1;i<8;i++) {
block[block_permute_op(i)] -= ac_val[i + 8]; block[s->idct_permutation[i]] -= ac_val[i + 8];
} }
}else{ }else{
/* different qscale, we must rescale */ /* different qscale, we must rescale */
for(i=1;i<8;i++) { for(i=1;i<8;i++) {
block[block_permute_op(i)] -= ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale); block[s->idct_permutation[i]] -= ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale);
} }
} }
} }
@ -3192,13 +3192,13 @@ intra:
static int h263_decode_motion(MpegEncContext * s, int pred, int f_code) static int h263_decode_motion(MpegEncContext * s, int pred, int f_code)
{ {
int code, val, sign, shift, l; int code, val, sign, shift, l;
code = get_vlc2(&s->gb, mv_vlc.table, MV_VLC_BITS, 2); code = get_vlc2(&s->gb, mv_vlc.table, MV_VLC_BITS, 2);
if (code < 0) if (code < 0)
return 0xffff; return 0xffff;
if (code == 0) if (code == 0)
return pred; return pred;
sign = get_bits1(&s->gb); sign = get_bits1(&s->gb);
shift = f_code - 1; shift = f_code - 1;
val = (code - 1) << shift; val = (code - 1) << shift;
@ -3211,7 +3211,7 @@ static int h263_decode_motion(MpegEncContext * s, int pred, int f_code)
/* modulo decoding */ /* modulo decoding */
if (!s->h263_long_vectors) { if (!s->h263_long_vectors) {
l = (1 << (f_code - 1)) * 32; l = 1 << (f_code + 4);
if (val < -l) { if (val < -l) {
val += l<<1; val += l<<1;
} else if (val >= l) { } else if (val >= l) {
@ -3261,15 +3261,15 @@ static int h263_decode_block(MpegEncContext * s, DCTELEM * block,
RLTable *rl = &rl_inter; RLTable *rl = &rl_inter;
const UINT8 *scan_table; const UINT8 *scan_table;
scan_table = zigzag_direct; scan_table = s->intra_scantable.permutated;
if (s->h263_aic && s->mb_intra) { if (s->h263_aic && s->mb_intra) {
rl = &rl_intra_aic; rl = &rl_intra_aic;
i = 0; i = 0;
if (s->ac_pred) { if (s->ac_pred) {
if (s->h263_aic_dir) if (s->h263_aic_dir)
scan_table = ff_alternate_vertical_scan; /* left */ scan_table = s->intra_v_scantable.permutated; /* left */
else else
scan_table = ff_alternate_horizontal_scan; /* top */ scan_table = s->intra_h_scantable.permutated; /* top */
} }
} else if (s->mb_intra) { } else if (s->mb_intra) {
/* DC coef */ /* DC coef */
@ -3417,14 +3417,14 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
rl = &rl_intra; rl = &rl_intra;
rl_vlc = rl_intra.rl_vlc[0]; rl_vlc = rl_intra.rl_vlc[0];
if(s->alternate_scan) if(s->alternate_scan)
scan_table = ff_alternate_vertical_scan; /* left */ scan_table = s->intra_v_scantable.permutated; /* left */
else if (s->ac_pred) { else if (s->ac_pred) {
if (dc_pred_dir == 0) if (dc_pred_dir == 0)
scan_table = ff_alternate_vertical_scan; /* left */ scan_table = s->intra_v_scantable.permutated; /* left */
else else
scan_table = ff_alternate_horizontal_scan; /* top */ scan_table = s->intra_h_scantable.permutated; /* top */
} else { } else {
scan_table = zigzag_direct; scan_table = s->intra_scantable.permutated;
} }
qmul=1; qmul=1;
qadd=0; qadd=0;
@ -3437,9 +3437,9 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
rl = &rl_inter; rl = &rl_inter;
if(s->alternate_scan) if(s->alternate_scan)
scan_table = ff_alternate_vertical_scan; /* left */ scan_table = s->intra_v_scantable.permutated; /* left */
else else
scan_table = zigzag_direct; scan_table = s->intra_scantable.permutated;
if(s->mpeg_quant){ if(s->mpeg_quant){
qmul=1; qmul=1;
@ -4081,13 +4081,14 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
/* load default matrixes */ /* load default matrixes */
for(i=0; i<64; i++){ for(i=0; i<64; i++){
int j= s->idct_permutation[i];
v= ff_mpeg4_default_intra_matrix[i]; v= ff_mpeg4_default_intra_matrix[i];
s->intra_matrix[i]= v; s->intra_matrix[j]= v;
s->chroma_intra_matrix[i]= v; s->chroma_intra_matrix[j]= v;
v= ff_mpeg4_default_non_intra_matrix[i]; v= ff_mpeg4_default_non_intra_matrix[i];
s->inter_matrix[i]= v; s->inter_matrix[j]= v;
s->chroma_inter_matrix[i]= v; s->chroma_inter_matrix[j]= v;
} }
/* load custom intra matrix */ /* load custom intra matrix */
@ -4096,7 +4097,7 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
v= get_bits(&s->gb, 8); v= get_bits(&s->gb, 8);
if(v==0) break; if(v==0) break;
j= zigzag_direct[i]; j= s->intra_scantable.permutated[i];
s->intra_matrix[j]= v; s->intra_matrix[j]= v;
s->chroma_intra_matrix[j]= v; s->chroma_intra_matrix[j]= v;
} }
@ -4108,14 +4109,14 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
v= get_bits(&s->gb, 8); v= get_bits(&s->gb, 8);
if(v==0) break; if(v==0) break;
j= zigzag_direct[i]; j= s->intra_scantable.permutated[i];
s->inter_matrix[j]= v; s->inter_matrix[j]= v;
s->chroma_inter_matrix[j]= v; s->chroma_inter_matrix[j]= v;
} }
/* replicate last value */ /* replicate last value */
for(; i<64; i++){ for(; i<64; i++){
j= zigzag_direct[i]; j= s->intra_scantable.permutated[i];
s->inter_matrix[j]= v; s->inter_matrix[j]= v;
s->chroma_inter_matrix[j]= v; s->chroma_inter_matrix[j]= v;
} }

View File

@ -20,7 +20,6 @@
*/ */
#include "../dsputil.h" #include "../dsputil.h"
#include "../simple_idct.h"
int mm_flags; /* multimedia extension flags */ int mm_flags; /* multimedia extension flags */
@ -44,10 +43,6 @@ int pix_abs8x8_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
/* external functions, from idct_mmx.c */
void ff_mmx_idct(DCTELEM *block);
void ff_mmxext_idct(DCTELEM *block);
/* pixel operations */ /* pixel operations */
static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL; static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL;
static const uint64_t mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL; static const uint64_t mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
@ -588,17 +583,6 @@ void dsputil_init_mmx(void)
avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow; avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow;
avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
} }
/* idct */
if (mm_flags & MM_MMXEXT) {
ff_idct = ff_mmxext_idct;
} else {
ff_idct = ff_mmx_idct;
}
#ifdef SIMPLE_IDCT
// ff_idct = simple_idct;
ff_idct = simple_idct_mmx;
#endif
} }
#if 0 #if 0
@ -637,28 +621,6 @@ void dsputil_init_mmx(void)
#endif #endif
} }
void gen_idct_put(UINT8 *dest, int line_size, DCTELEM *block);
/**
* this will send coeff matrixes which would have different results for the 16383 type MMX vs C IDCTs to the C IDCT
*/
void bit_exact_idct_put(UINT8 *dest, int line_size, INT16 *block){
if( block[0]>1022 && block[1]==0 && block[4 ]==0 && block[5 ]==0
&& block[8]==0 && block[9]==0 && block[12]==0 && block[13]==0){
int16_t tmp[64];
int i;
for(i=0; i<64; i++)
tmp[i]= block[i];
for(i=0; i<64; i++)
block[i]= tmp[block_permute_op(i)];
simple_idct_put(dest, line_size, block);
}
else
gen_idct_put(dest, line_size, block);
}
/* remove any non bit exact operation (testing purpose). NOTE that /* remove any non bit exact operation (testing purpose). NOTE that
this function should be kept as small as possible because it is this function should be kept as small as possible because it is
always difficult to test automatically non bit exact cases. */ always difficult to test automatically non bit exact cases. */
@ -682,9 +644,5 @@ void dsputil_set_bit_exact_mmx(void)
pix_abs8x8_y2 = pix_abs8x8_y2_mmx; pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
pix_abs8x8_xy2= pix_abs8x8_xy2_mmx; pix_abs8x8_xy2= pix_abs8x8_xy2_mmx;
} }
#ifdef SIMPLE_IDCT
if(ff_idct_put==gen_idct_put && ff_idct == simple_idct_mmx)
ff_idct_put= bit_exact_idct_put;
#endif
} }
} }

View File

@ -23,53 +23,24 @@
#include "../dsputil.h" #include "../dsputil.h"
#include "../mpegvideo.h" #include "../mpegvideo.h"
#include "../avcodec.h" #include "../avcodec.h"
#include "../simple_idct.h"
extern UINT8 zigzag_end[64]; /* Input permutation for the simple_idct_mmx */
static UINT8 simple_mmx_permutation[64]={
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
};
extern UINT8 zigzag_direct_noperm[64]; extern UINT8 zigzag_direct_noperm[64];
extern UINT16 inv_zigzag_direct16[64]; extern UINT16 inv_zigzag_direct16[64];
extern UINT32 inverse[256]; extern UINT32 inverse[256];
#if 0
/* XXX: GL: I don't understand why this function needs optimization
(it is called only once per frame!), so I disabled it */
void MPV_frame_start(MpegEncContext *s)
{
if (s->pict_type == B_TYPE) {
__asm __volatile(
"movl (%1), %%eax\n\t"
"movl 4(%1), %%edx\n\t"
"movl 8(%1), %%ecx\n\t"
"movl %%eax, (%0)\n\t"
"movl %%edx, 4(%0)\n\t"
"movl %%ecx, 8(%0)\n\t"
:
:"r"(s->current_picture), "r"(s->aux_picture)
:"eax","edx","ecx","memory");
} else {
/* swap next and last */
__asm __volatile(
"movl (%1), %%eax\n\t"
"movl 4(%1), %%edx\n\t"
"movl 8(%1), %%ecx\n\t"
"xchgl (%0), %%eax\n\t"
"xchgl 4(%0), %%edx\n\t"
"xchgl 8(%0), %%ecx\n\t"
"movl %%eax, (%1)\n\t"
"movl %%edx, 4(%1)\n\t"
"movl %%ecx, 8(%1)\n\t"
"movl %%eax, (%2)\n\t"
"movl %%edx, 4(%2)\n\t"
"movl %%ecx, 8(%2)\n\t"
:
:"r"(s->last_picture), "r"(s->next_picture), "r"(s->current_picture)
:"eax","edx","ecx","memory");
}
}
#endif
static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL; static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL;
static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL; static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
@ -77,36 +48,26 @@ static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x000
static void dct_unquantize_h263_mmx(MpegEncContext *s, static void dct_unquantize_h263_mmx(MpegEncContext *s,
DCTELEM *block, int n, int qscale) DCTELEM *block, int n, int qscale)
{ {
int i, level, qmul, qadd, nCoeffs; int level, qmul, qadd, nCoeffs;
qmul = s->qscale << 1;
if (s->h263_aic && s->mb_intra)
qadd = 0;
else
qadd = (s->qscale - 1) | 1;
qmul = qscale << 1;
qadd = (qscale - 1) | 1;
assert(s->block_last_index[n]>=0);
if (s->mb_intra) { if (s->mb_intra) {
if (!s->h263_aic) { if (!s->h263_aic) {
if (n < 4) if (n < 4)
block[0] = block[0] * s->y_dc_scale; level = block[0] * s->y_dc_scale;
else else
block[0] = block[0] * s->c_dc_scale; level = block[0] * s->c_dc_scale;
}else{
qadd = 0;
level= block[0];
} }
for(i=1; i<8; i++) { nCoeffs=63;
level = block[i];
if (level) {
if (level < 0) {
level = level * qmul - qadd;
} else {
level = level * qmul + qadd;
}
block[i] = level;
}
}
nCoeffs=64;
} else { } else {
i = 0; nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
nCoeffs= zigzag_end[ s->block_last_index[n] ];
} }
//printf("%d %d ", qmul, qadd); //printf("%d %d ", qmul, qadd);
asm volatile( asm volatile(
@ -152,10 +113,12 @@ asm volatile(
"movq %%mm1, 8(%0, %3) \n\t" "movq %%mm1, 8(%0, %3) \n\t"
"addl $16, %3 \n\t" "addl $16, %3 \n\t"
"js 1b \n\t" "jng 1b \n\t"
::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(i-nCoeffs)) ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
: "memory" : "memory"
); );
if(s->mb_intra)
block[0]= level;
} }
@ -193,9 +156,10 @@ static void dct_unquantize_mpeg1_mmx(MpegEncContext *s,
{ {
int nCoeffs; int nCoeffs;
const UINT16 *quant_matrix; const UINT16 *quant_matrix;
if(s->alternate_scan) nCoeffs= 64; assert(s->block_last_index[n]>=0);
else nCoeffs= zigzag_end[ s->block_last_index[n] ];
nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
if (s->mb_intra) { if (s->mb_intra) {
int block0; int block0;
@ -312,6 +276,7 @@ asm volatile(
: "%eax", "memory" : "%eax", "memory"
); );
} }
} }
static void dct_unquantize_mpeg2_mmx(MpegEncContext *s, static void dct_unquantize_mpeg2_mmx(MpegEncContext *s,
@ -320,8 +285,10 @@ static void dct_unquantize_mpeg2_mmx(MpegEncContext *s,
int nCoeffs; int nCoeffs;
const UINT16 *quant_matrix; const UINT16 *quant_matrix;
if(s->alternate_scan) nCoeffs= 64; assert(s->block_last_index[n]>=0);
else nCoeffs= zigzag_end[ s->block_last_index[n] ];
if(s->alternate_scan) nCoeffs= 63; //FIXME
else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
if (s->mb_intra) { if (s->mb_intra) {
int block0; int block0;
@ -371,7 +338,7 @@ asm volatile(
"movq %%mm5, 8(%0, %%eax) \n\t" "movq %%mm5, 8(%0, %%eax) \n\t"
"addl $16, %%eax \n\t" "addl $16, %%eax \n\t"
"js 1b \n\t" "jng 1b \n\t"
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
: "%eax", "memory" : "%eax", "memory"
); );
@ -427,7 +394,7 @@ asm volatile(
"movq %%mm5, 8(%0, %%eax) \n\t" "movq %%mm5, 8(%0, %%eax) \n\t"
"addl $16, %%eax \n\t" "addl $16, %%eax \n\t"
"js 1b \n\t" "jng 1b \n\t"
"movd 124(%0, %3), %%mm0 \n\t" "movd 124(%0, %3), %%mm0 \n\t"
"movq %%mm7, %%mm6 \n\t" "movq %%mm7, %%mm6 \n\t"
"psrlq $32, %%mm7 \n\t" "psrlq $32, %%mm7 \n\t"
@ -534,12 +501,6 @@ static void draw_edges_mmx(UINT8 *buf, int wrap, int width, int height, int w)
} }
} }
static volatile int esp_temp;
void unused_var_warning_killer(){
esp_temp++;
}
#undef HAVE_MMX2 #undef HAVE_MMX2
#define RENAME(a) a ## _MMX #define RENAME(a) a ## _MMX
#include "mpegvideo_mmx_template.c" #include "mpegvideo_mmx_template.c"
@ -549,10 +510,40 @@ void unused_var_warning_killer(){
#define RENAME(a) a ## _MMX2 #define RENAME(a) a ## _MMX2
#include "mpegvideo_mmx_template.c" #include "mpegvideo_mmx_template.c"
/* external functions, from idct_mmx.c */
void ff_mmx_idct(DCTELEM *block);
void ff_mmxext_idct(DCTELEM *block);
/* XXX: those functions should be suppressed ASAP when all IDCTs are
converted */
static void ff_libmpeg2mmx_idct_put(UINT8 *dest, int line_size, DCTELEM *block)
{
ff_mmx_idct (block);
put_pixels_clamped(block, dest, line_size);
}
static void ff_libmpeg2mmx_idct_add(UINT8 *dest, int line_size, DCTELEM *block)
{
ff_mmx_idct (block);
add_pixels_clamped(block, dest, line_size);
}
static void ff_libmpeg2mmx2_idct_put(UINT8 *dest, int line_size, DCTELEM *block)
{
ff_mmxext_idct (block);
put_pixels_clamped(block, dest, line_size);
}
static void ff_libmpeg2mmx2_idct_add(UINT8 *dest, int line_size, DCTELEM *block)
{
ff_mmxext_idct (block);
add_pixels_clamped(block, dest, line_size);
}
void MPV_common_init_mmx(MpegEncContext *s) void MPV_common_init_mmx(MpegEncContext *s)
{ {
if (mm_flags & MM_MMX) { if (mm_flags & MM_MMX) {
const int dct_algo= s->avctx->dct_algo; int i;
const int dct_algo = s->avctx->dct_algo;
const int idct_algo= s->avctx->idct_algo;
s->dct_unquantize_h263 = dct_unquantize_h263_mmx; s->dct_unquantize_h263 = dct_unquantize_h263_mmx;
s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_mmx; s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_mmx;
s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_mmx; s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_mmx;
@ -568,5 +559,22 @@ void MPV_common_init_mmx(MpegEncContext *s)
s->dct_quantize= dct_quantize_MMX; s->dct_quantize= dct_quantize_MMX;
} }
} }
if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){
s->idct_put= ff_simple_idct_put_mmx;
s->idct_add= ff_simple_idct_add_mmx;
for(i=0; i<64; i++)
s->idct_permutation[i]= simple_mmx_permutation[i];
}else if(idct_algo==FF_IDCT_LIBMPEG2MMX){
if(mm_flags & MM_MMXEXT){
s->idct_put= ff_libmpeg2mmx2_idct_put;
s->idct_add= ff_libmpeg2mmx2_idct_add;
}else{
s->idct_put= ff_libmpeg2mmx_idct_put;
s->idct_add= ff_libmpeg2mmx_idct_add;
}
for(i=0; i<64; i++)
s->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
}
} }
} }

View File

@ -189,31 +189,143 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
); );
} }
if(s->mb_intra) temp_block[0]= level; //FIXME move afer permute if(s->mb_intra) block[0]= level;
else block[0]= temp_block[0];
// last_non_zero_p1=64;
/* permute for IDCT */ if(s->idct_permutation[1]==8){
asm volatile( if(last_non_zero_p1 <= 1) goto end;
"movl %0, %%eax \n\t" block[0x08] = temp_block[0x01]; block[0x10] = temp_block[0x08];
"pushl %%ebp \n\t" block[0x20] = temp_block[0x10];
"movl %%esp, " MANGLE(esp_temp) "\n\t" if(last_non_zero_p1 <= 4) goto end;
"1: \n\t" block[0x18] = temp_block[0x09]; block[0x04] = temp_block[0x02];
"movzbl (%1, %%eax), %%ebx \n\t" block[0x09] = temp_block[0x03];
"movzbl 1(%1, %%eax), %%ebp \n\t" if(last_non_zero_p1 <= 7) goto end;
"movw (%2, %%ebx, 2), %%cx \n\t" block[0x14] = temp_block[0x0A]; block[0x28] = temp_block[0x11];
"movw (%2, %%ebp, 2), %%sp \n\t" block[0x12] = temp_block[0x18]; block[0x02] = temp_block[0x20];
"movzbl " MANGLE(permutation) "(%%ebx), %%ebx\n\t" if(last_non_zero_p1 <= 11) goto end;
"movzbl " MANGLE(permutation) "(%%ebp), %%ebp\n\t" block[0x1A] = temp_block[0x19]; block[0x24] = temp_block[0x12];
"movw %%cx, (%3, %%ebx, 2) \n\t" block[0x19] = temp_block[0x0B]; block[0x01] = temp_block[0x04];
"movw %%sp, (%3, %%ebp, 2) \n\t" block[0x0C] = temp_block[0x05];
"addl $2, %%eax \n\t" if(last_non_zero_p1 <= 16) goto end;
" js 1b \n\t" block[0x11] = temp_block[0x0C]; block[0x29] = temp_block[0x13];
"movl " MANGLE(esp_temp) ", %%esp\n\t" block[0x16] = temp_block[0x1A]; block[0x0A] = temp_block[0x21];
"popl %%ebp \n\t" block[0x30] = temp_block[0x28]; block[0x22] = temp_block[0x30];
: block[0x38] = temp_block[0x29]; block[0x06] = temp_block[0x22];
: "g" (-last_non_zero_p1), "d" (zigzag_direct_noperm+last_non_zero_p1), "S" (temp_block), "D" (block) if(last_non_zero_p1 <= 24) goto end;
: "%eax", "%ebx", "%ecx" block[0x1B] = temp_block[0x1B]; block[0x21] = temp_block[0x14];
); block[0x1C] = temp_block[0x0D]; block[0x05] = temp_block[0x06];
block[0x0D] = temp_block[0x07]; block[0x15] = temp_block[0x0E];
block[0x2C] = temp_block[0x15]; block[0x13] = temp_block[0x1C];
if(last_non_zero_p1 <= 32) goto end;
block[0x0B] = temp_block[0x23]; block[0x34] = temp_block[0x2A];
block[0x2A] = temp_block[0x31]; block[0x32] = temp_block[0x38];
block[0x3A] = temp_block[0x39]; block[0x26] = temp_block[0x32];
block[0x39] = temp_block[0x2B]; block[0x03] = temp_block[0x24];
if(last_non_zero_p1 <= 40) goto end;
block[0x1E] = temp_block[0x1D]; block[0x25] = temp_block[0x16];
block[0x1D] = temp_block[0x0F]; block[0x2D] = temp_block[0x17];
block[0x17] = temp_block[0x1E]; block[0x0E] = temp_block[0x25];
block[0x31] = temp_block[0x2C]; block[0x2B] = temp_block[0x33];
if(last_non_zero_p1 <= 48) goto end;
block[0x36] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B];
block[0x23] = temp_block[0x34]; block[0x3C] = temp_block[0x2D];
block[0x07] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
block[0x0F] = temp_block[0x27]; block[0x35] = temp_block[0x2E];
if(last_non_zero_p1 <= 56) goto end;
block[0x2E] = temp_block[0x35]; block[0x33] = temp_block[0x3C];
block[0x3E] = temp_block[0x3D]; block[0x27] = temp_block[0x36];
block[0x3D] = temp_block[0x2F]; block[0x2F] = temp_block[0x37];
block[0x37] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
}else if(s->idct_permutation[1]==4){
if(last_non_zero_p1 <= 1) goto end;
block[0x04] = temp_block[0x01];
block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
if(last_non_zero_p1 <= 4) goto end;
block[0x0C] = temp_block[0x09]; block[0x01] = temp_block[0x02];
block[0x05] = temp_block[0x03];
if(last_non_zero_p1 <= 7) goto end;
block[0x09] = temp_block[0x0A]; block[0x14] = temp_block[0x11];
block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20];
if(last_non_zero_p1 <= 11) goto end;
block[0x1C] = temp_block[0x19];
block[0x11] = temp_block[0x12]; block[0x0D] = temp_block[0x0B];
block[0x02] = temp_block[0x04]; block[0x06] = temp_block[0x05];
if(last_non_zero_p1 <= 16) goto end;
block[0x0A] = temp_block[0x0C]; block[0x15] = temp_block[0x13];
block[0x19] = temp_block[0x1A]; block[0x24] = temp_block[0x21];
block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30];
block[0x2C] = temp_block[0x29]; block[0x21] = temp_block[0x22];
if(last_non_zero_p1 <= 24) goto end;
block[0x1D] = temp_block[0x1B]; block[0x12] = temp_block[0x14];
block[0x0E] = temp_block[0x0D]; block[0x03] = temp_block[0x06];
block[0x07] = temp_block[0x07]; block[0x0B] = temp_block[0x0E];
block[0x16] = temp_block[0x15]; block[0x1A] = temp_block[0x1C];
if(last_non_zero_p1 <= 32) goto end;
block[0x25] = temp_block[0x23]; block[0x29] = temp_block[0x2A];
block[0x34] = temp_block[0x31]; block[0x38] = temp_block[0x38];
block[0x3C] = temp_block[0x39]; block[0x31] = temp_block[0x32];
block[0x2D] = temp_block[0x2B]; block[0x22] = temp_block[0x24];
if(last_non_zero_p1 <= 40) goto end;
block[0x1E] = temp_block[0x1D]; block[0x13] = temp_block[0x16];
block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17];
block[0x1B] = temp_block[0x1E]; block[0x26] = temp_block[0x25];
block[0x2A] = temp_block[0x2C]; block[0x35] = temp_block[0x33];
if(last_non_zero_p1 <= 48) goto end;
block[0x39] = temp_block[0x3A]; block[0x3D] = temp_block[0x3B];
block[0x32] = temp_block[0x34]; block[0x2E] = temp_block[0x2D];
block[0x23] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
block[0x27] = temp_block[0x27]; block[0x2B] = temp_block[0x2E];
if(last_non_zero_p1 <= 56) goto end;
block[0x36] = temp_block[0x35]; block[0x3A] = temp_block[0x3C];
block[0x3E] = temp_block[0x3D]; block[0x33] = temp_block[0x36];
block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37];
block[0x3B] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
}else{
if(last_non_zero_p1 <= 1) goto end;
block[0x01] = temp_block[0x01];
block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
if(last_non_zero_p1 <= 4) goto end;
block[0x09] = temp_block[0x09]; block[0x02] = temp_block[0x02];
block[0x03] = temp_block[0x03];
if(last_non_zero_p1 <= 7) goto end;
block[0x0A] = temp_block[0x0A]; block[0x11] = temp_block[0x11];
block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20];
if(last_non_zero_p1 <= 11) goto end;
block[0x19] = temp_block[0x19];
block[0x12] = temp_block[0x12]; block[0x0B] = temp_block[0x0B];
block[0x04] = temp_block[0x04]; block[0x05] = temp_block[0x05];
if(last_non_zero_p1 <= 16) goto end;
block[0x0C] = temp_block[0x0C]; block[0x13] = temp_block[0x13];
block[0x1A] = temp_block[0x1A]; block[0x21] = temp_block[0x21];
block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30];
block[0x29] = temp_block[0x29]; block[0x22] = temp_block[0x22];
if(last_non_zero_p1 <= 24) goto end;
block[0x1B] = temp_block[0x1B]; block[0x14] = temp_block[0x14];
block[0x0D] = temp_block[0x0D]; block[0x06] = temp_block[0x06];
block[0x07] = temp_block[0x07]; block[0x0E] = temp_block[0x0E];
block[0x15] = temp_block[0x15]; block[0x1C] = temp_block[0x1C];
if(last_non_zero_p1 <= 32) goto end;
block[0x23] = temp_block[0x23]; block[0x2A] = temp_block[0x2A];
block[0x31] = temp_block[0x31]; block[0x38] = temp_block[0x38];
block[0x39] = temp_block[0x39]; block[0x32] = temp_block[0x32];
block[0x2B] = temp_block[0x2B]; block[0x24] = temp_block[0x24];
if(last_non_zero_p1 <= 40) goto end;
block[0x1D] = temp_block[0x1D]; block[0x16] = temp_block[0x16];
block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17];
block[0x1E] = temp_block[0x1E]; block[0x25] = temp_block[0x25];
block[0x2C] = temp_block[0x2C]; block[0x33] = temp_block[0x33];
if(last_non_zero_p1 <= 48) goto end;
block[0x3A] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B];
block[0x34] = temp_block[0x34]; block[0x2D] = temp_block[0x2D];
block[0x26] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
block[0x27] = temp_block[0x27]; block[0x2E] = temp_block[0x2E];
if(last_non_zero_p1 <= 56) goto end;
block[0x35] = temp_block[0x35]; block[0x3C] = temp_block[0x3C];
block[0x3D] = temp_block[0x3D]; block[0x36] = temp_block[0x36];
block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37];
block[0x3E] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
}
end:
/* /*
for(i=0; i<last_non_zero_p1; i++) for(i=0; i<last_non_zero_p1; i++)
{ {
@ -221,7 +333,6 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
block[block_permute_op(j)]= temp_block[j]; block[block_permute_op(j)]= temp_block[j];
} }
*/ */
//block_permute(block);
return last_non_zero_p1 - 1; return last_non_zero_p1 - 1;
} }

View File

@ -1291,7 +1291,20 @@ Temp
); );
} }
void simple_idct_mmx(int16_t *block) void ff_simple_idct_mmx(int16_t *block)
{ {
idct(block); idct(block);
}
//FIXME merge add/put into the idct
void ff_simple_idct_put_mmx(UINT8 *dest, int line_size, DCTELEM *block)
{
idct(block);
put_pixels_clamped(block, dest, line_size);
}
void ff_simple_idct_add_mmx(UINT8 *dest, int line_size, DCTELEM *block)
{
idct(block);
add_pixels_clamped(block, dest, line_size);
} }

View File

@ -322,14 +322,14 @@ static void jpeg_table_header(MpegEncContext *s)
put_bits(p, 4, 0); /* 8 bit precision */ put_bits(p, 4, 0); /* 8 bit precision */
put_bits(p, 4, 0); /* table 0 */ put_bits(p, 4, 0); /* table 0 */
for(i=0;i<64;i++) { for(i=0;i<64;i++) {
j = zigzag_direct[i]; j = s->intra_scantable.permutated[i];
put_bits(p, 8, s->intra_matrix[j]); put_bits(p, 8, s->intra_matrix[j]);
} }
#ifdef TWOMATRIXES #ifdef TWOMATRIXES
put_bits(p, 4, 0); /* 8 bit precision */ put_bits(p, 4, 0); /* 8 bit precision */
put_bits(p, 4, 1); /* table 1 */ put_bits(p, 4, 1); /* table 1 */
for(i=0;i<64;i++) { for(i=0;i<64;i++) {
j = zigzag_direct[i]; j = s->intra_scantable.permutated[i];
put_bits(p, 8, s->chroma_intra_matrix[j]); put_bits(p, 8, s->chroma_intra_matrix[j]);
} }
#endif #endif
@ -535,7 +535,7 @@ static void encode_block(MpegEncContext *s, DCTELEM *block, int n)
run = 0; run = 0;
last_index = s->block_last_index[n]; last_index = s->block_last_index[n];
for(i=1;i<=last_index;i++) { for(i=1;i<=last_index;i++) {
j = zigzag_direct[i]; j = s->intra_scantable.permutated[i];
val = block[j]; val = block[j];
if (val == 0) { if (val == 0) {
run++; run++;
@ -620,6 +620,8 @@ typedef struct MJpegDecodeContext {
int restart_interval; int restart_interval;
int restart_count; int restart_count;
int interleaved_rows; int interleaved_rows;
ScanTable scantable;
void (*idct_put)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
} MJpegDecodeContext; } MJpegDecodeContext;
#define SKIP_REMAINING(gb, len) { \ #define SKIP_REMAINING(gb, len) { \
@ -645,9 +647,23 @@ static void build_vlc(VLC *vlc, const UINT8 *bits_table, const UINT8 *val_table,
static int mjpeg_decode_init(AVCodecContext *avctx) static int mjpeg_decode_init(AVCodecContext *avctx)
{ {
MJpegDecodeContext *s = avctx->priv_data; MJpegDecodeContext *s = avctx->priv_data;
MpegEncContext s2;
s->avctx = avctx; s->avctx = avctx;
/* ugly way to get the idct & scantable */
memset(&s2, 0, sizeof(MpegEncContext));
s2.flags= avctx->flags;
s2.avctx= avctx;
// s2->out_format = FMT_MJPEG;
s2.width = 8;
s2.height = 8;
if (MPV_common_init(&s2) < 0)
return -1;
s->scantable= s2.intra_scantable;
s->idct_put= s2.idct_put;
MPV_common_end(&s2);
s->header_state = 0; s->header_state = 0;
s->mpeg_enc_ctx_allocated = 0; s->mpeg_enc_ctx_allocated = 0;
s->buffer_size = PICTURE_BUFFER_SIZE - 1; /* minus 1 to take into s->buffer_size = PICTURE_BUFFER_SIZE - 1; /* minus 1 to take into
@ -657,7 +673,7 @@ static int mjpeg_decode_init(AVCodecContext *avctx)
s->first_picture = 1; s->first_picture = 1;
s->org_width = avctx->width; s->org_width = avctx->width;
s->org_height = avctx->height; s->org_height = avctx->height;
build_vlc(&s->vlcs[0][0], bits_dc_luminance, val_dc_luminance, 12); build_vlc(&s->vlcs[0][0], bits_dc_luminance, val_dc_luminance, 12);
build_vlc(&s->vlcs[0][1], bits_dc_chrominance, val_dc_chrominance, 12); build_vlc(&s->vlcs[0][1], bits_dc_chrominance, val_dc_chrominance, 12);
build_vlc(&s->vlcs[1][0], bits_ac_luminance, val_ac_luminance, 251); build_vlc(&s->vlcs[1][0], bits_ac_luminance, val_ac_luminance, 251);
@ -694,7 +710,7 @@ static int mjpeg_decode_dqt(MJpegDecodeContext *s,
dprintf("index=%d\n", index); dprintf("index=%d\n", index);
/* read quant table */ /* read quant table */
for(i=0;i<64;i++) { for(i=0;i<64;i++) {
j = zigzag_direct[i]; j = s->scantable.permutated[i];
s->quant_matrixes[index][j] = get_bits(&s->gb, 8); s->quant_matrixes[index][j] = get_bits(&s->gb, 8);
} }
len -= 65; len -= 65;
@ -897,7 +913,7 @@ static int decode_block(MJpegDecodeContext *s, DCTELEM *block,
dprintf("error count: %d\n", i); dprintf("error count: %d\n", i);
return -1; return -1;
} }
j = zigzag_direct[i]; j = s->scantable.permutated[i];
block[j] = level * quant_matrix[j]; block[j] = level * quant_matrix[j];
i++; i++;
if (i >= 64) if (i >= 64)
@ -1021,7 +1037,7 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s,
(h * mb_x + x) * 8; (h * mb_x + x) * 8;
if (s->interlaced && s->bottom_field) if (s->interlaced && s->bottom_field)
ptr += s->linesize[c] >> 1; ptr += s->linesize[c] >> 1;
ff_idct_put(ptr, s->linesize[c], s->block); s->idct_put(ptr, s->linesize[c], s->block);
if (++x == h) { if (++x == h) {
x = 0; x = 0;
y++; y++;

View File

@ -542,7 +542,7 @@ static void mpeg1_encode_block(MpegEncContext *s,
last_non_zero = i - 1; last_non_zero = i - 1;
for(;i<=last_index;i++) { for(;i<=last_index;i++) {
j = zigzag_direct[i]; j = s->intra_scantable.permutated[i];
level = block[j]; level = block[j];
next_coef: next_coef:
#if 0 #if 0
@ -552,26 +552,11 @@ static void mpeg1_encode_block(MpegEncContext *s,
/* encode using VLC */ /* encode using VLC */
if (level != 0) { if (level != 0) {
run = i - last_non_zero - 1; run = i - last_non_zero - 1;
#ifdef ARCH_X86
asm volatile( alevel= level;
"movl %2, %1 \n\t" MASK_ABS(sign, alevel)
"movl %1, %0 \n\t" sign&=1;
"addl %1, %1 \n\t"
"sbbl %1, %1 \n\t"
"xorl %1, %0 \n\t"
"subl %1, %0 \n\t"
"andl $1, %1 \n\t"
: "=&r" (alevel), "=&r" (sign)
: "g" (level)
);
#else
sign = 0;
alevel = level;
if (alevel < 0) {
sign = 1;
alevel = -alevel;
}
#endif
// code = get_rl_index(rl, 0, run, alevel); // code = get_rl_index(rl, 0, run, alevel);
if (alevel > mpeg1_max_level[0][run]) if (alevel > mpeg1_max_level[0][run])
code= 111; /*rl->n*/ code= 111; /*rl->n*/
@ -1040,6 +1025,7 @@ static int mpeg1_decode_block(MpegEncContext *s,
int level, dc, diff, i, j, run; int level, dc, diff, i, j, run;
int code, component; int code, component;
RLTable *rl = &rl_mpeg1; RLTable *rl = &rl_mpeg1;
UINT8 * const scantable= s->intra_scantable.permutated;
if (s->mb_intra) { if (s->mb_intra) {
/* DC coef */ /* DC coef */
@ -1099,7 +1085,7 @@ static int mpeg1_decode_block(MpegEncContext *s,
return -1; return -1;
add_coef: add_coef:
dprintf("%d: run=%d level=%d\n", n, run, level); dprintf("%d: run=%d level=%d\n", n, run, level);
j = zigzag_direct[i]; j = scantable[i];
block[j] = level; block[j] = level;
i++; i++;
} }
@ -1121,9 +1107,9 @@ static int mpeg2_decode_block_non_intra(MpegEncContext *s,
int mismatch; int mismatch;
if (s->alternate_scan) if (s->alternate_scan)
scan_table = ff_alternate_vertical_scan; scan_table = s->intra_v_scantable.permutated;
else else
scan_table = zigzag_direct; scan_table = s->intra_scantable.permutated;
mismatch = 1; mismatch = 1;
{ {
@ -1140,7 +1126,7 @@ static int mpeg2_decode_block_non_intra(MpegEncContext *s,
v= SHOW_UBITS(re, &s->gb, 2); v= SHOW_UBITS(re, &s->gb, 2);
if (v & 2) { if (v & 2) {
run = 0; run = 0;
level = 1 - ((v & 1) << 1); level = 5 - (v << 1);
SKIP_BITS(re, &s->gb, 2); SKIP_BITS(re, &s->gb, 2);
CLOSE_READER(re, &s->gb); CLOSE_READER(re, &s->gb);
goto add_coef; goto add_coef;
@ -1191,6 +1177,7 @@ static int mpeg2_decode_block_non_intra(MpegEncContext *s,
} }
block[63] ^= (mismatch & 1); block[63] ^= (mismatch & 1);
s->block_last_index[n] = i; s->block_last_index[n] = i;
return 0; return 0;
} }
@ -1206,9 +1193,9 @@ static int mpeg2_decode_block_intra(MpegEncContext *s,
int mismatch; int mismatch;
if (s->alternate_scan) if (s->alternate_scan)
scan_table = ff_alternate_vertical_scan; scan_table = s->intra_v_scantable.permutated;
else else
scan_table = zigzag_direct; scan_table = s->intra_scantable.permutated;
/* DC coef */ /* DC coef */
component = (n <= 3 ? 0 : n - 4 + 1); component = (n <= 3 ? 0 : n - 4 + 1);
@ -1402,7 +1389,7 @@ static void mpeg_decode_quant_matrix_extension(MpegEncContext *s)
if (get_bits1(&s->gb)) { if (get_bits1(&s->gb)) {
for(i=0;i<64;i++) { for(i=0;i<64;i++) {
v = get_bits(&s->gb, 8); v = get_bits(&s->gb, 8);
j = zigzag_direct[i]; j = s->intra_scantable.permutated[i];
s->intra_matrix[j] = v; s->intra_matrix[j] = v;
s->chroma_intra_matrix[j] = v; s->chroma_intra_matrix[j] = v;
} }
@ -1410,7 +1397,7 @@ static void mpeg_decode_quant_matrix_extension(MpegEncContext *s)
if (get_bits1(&s->gb)) { if (get_bits1(&s->gb)) {
for(i=0;i<64;i++) { for(i=0;i<64;i++) {
v = get_bits(&s->gb, 8); v = get_bits(&s->gb, 8);
j = zigzag_direct[i]; j = s->intra_scantable.permutated[i];
s->inter_matrix[j] = v; s->inter_matrix[j] = v;
s->chroma_inter_matrix[j] = v; s->chroma_inter_matrix[j] = v;
} }
@ -1418,14 +1405,14 @@ static void mpeg_decode_quant_matrix_extension(MpegEncContext *s)
if (get_bits1(&s->gb)) { if (get_bits1(&s->gb)) {
for(i=0;i<64;i++) { for(i=0;i<64;i++) {
v = get_bits(&s->gb, 8); v = get_bits(&s->gb, 8);
j = zigzag_direct[i]; j = s->intra_scantable.permutated[i];
s->chroma_intra_matrix[j] = v; s->chroma_intra_matrix[j] = v;
} }
} }
if (get_bits1(&s->gb)) { if (get_bits1(&s->gb)) {
for(i=0;i<64;i++) { for(i=0;i<64;i++) {
v = get_bits(&s->gb, 8); v = get_bits(&s->gb, 8);
j = zigzag_direct[i]; j = s->intra_scantable.permutated[i];
s->chroma_inter_matrix[j] = v; s->chroma_inter_matrix[j] = v;
} }
} }
@ -1636,7 +1623,7 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
if (get_bits1(&s->gb)) { if (get_bits1(&s->gb)) {
for(i=0;i<64;i++) { for(i=0;i<64;i++) {
v = get_bits(&s->gb, 8); v = get_bits(&s->gb, 8);
j = zigzag_direct[i]; j = s->intra_scantable.permutated[i];
s->intra_matrix[j] = v; s->intra_matrix[j] = v;
s->chroma_intra_matrix[j] = v; s->chroma_intra_matrix[j] = v;
} }
@ -1648,15 +1635,16 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
#endif #endif
} else { } else {
for(i=0;i<64;i++) { for(i=0;i<64;i++) {
int j= s->idct_permutation[i];
v = ff_mpeg1_default_intra_matrix[i]; v = ff_mpeg1_default_intra_matrix[i];
s->intra_matrix[i] = v; s->intra_matrix[j] = v;
s->chroma_intra_matrix[i] = v; s->chroma_intra_matrix[j] = v;
} }
} }
if (get_bits1(&s->gb)) { if (get_bits1(&s->gb)) {
for(i=0;i<64;i++) { for(i=0;i<64;i++) {
v = get_bits(&s->gb, 8); v = get_bits(&s->gb, 8);
j = zigzag_direct[i]; j = s->intra_scantable.permutated[i];
s->inter_matrix[j] = v; s->inter_matrix[j] = v;
s->chroma_inter_matrix[j] = v; s->chroma_inter_matrix[j] = v;
} }
@ -1668,9 +1656,10 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
#endif #endif
} else { } else {
for(i=0;i<64;i++) { for(i=0;i<64;i++) {
int j= s->idct_permutation[i];
v = ff_mpeg1_default_non_intra_matrix[i]; v = ff_mpeg1_default_non_intra_matrix[i];
s->inter_matrix[i] = v; s->inter_matrix[j] = v;
s->chroma_inter_matrix[i] = v; s->chroma_inter_matrix[j] = v;
} }
} }

View File

@ -2,7 +2,7 @@
* MPEG1/2 tables * MPEG1/2 tables
*/ */
INT16 ff_mpeg1_default_intra_matrix[64] = { const INT16 ff_mpeg1_default_intra_matrix[64] = {
8, 16, 19, 22, 26, 27, 29, 34, 8, 16, 19, 22, 26, 27, 29, 34,
16, 16, 22, 24, 27, 29, 34, 37, 16, 16, 22, 24, 27, 29, 34, 37,
19, 22, 26, 27, 29, 34, 34, 38, 19, 22, 26, 27, 29, 34, 34, 38,
@ -13,7 +13,7 @@ INT16 ff_mpeg1_default_intra_matrix[64] = {
27, 29, 35, 38, 46, 56, 69, 83 27, 29, 35, 38, 46, 56, 69, 83
}; };
INT16 ff_mpeg1_default_non_intra_matrix[64] = { const INT16 ff_mpeg1_default_non_intra_matrix[64] = {
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,

View File

@ -135,7 +135,7 @@ static const UINT16 pixel_aspect[16][2]={
}; };
/* these matrixes will be permuted for the idct */ /* these matrixes will be permuted for the idct */
INT16 ff_mpeg4_default_intra_matrix[64] = { const INT16 ff_mpeg4_default_intra_matrix[64] = {
8, 17, 18, 19, 21, 23, 25, 27, 8, 17, 18, 19, 21, 23, 25, 27,
17, 18, 19, 21, 23, 25, 27, 28, 17, 18, 19, 21, 23, 25, 27, 28,
20, 21, 22, 23, 24, 26, 28, 30, 20, 21, 22, 23, 24, 26, 28, 30,
@ -146,7 +146,7 @@ INT16 ff_mpeg4_default_intra_matrix[64] = {
27, 28, 30, 32, 35, 38, 41, 45, 27, 28, 30, 32, 35, 38, 41, 45,
}; };
INT16 ff_mpeg4_default_non_intra_matrix[64] = { const INT16 ff_mpeg4_default_non_intra_matrix[64] = {
16, 17, 18, 19, 20, 21, 22, 23, 16, 17, 18, 19, 20, 21, 22, 23,
17, 18, 19, 20, 21, 22, 23, 24, 17, 18, 19, 20, 21, 22, 23, 24,
18, 19, 20, 21, 22, 23, 24, 25, 18, 19, 20, 21, 22, 23, 24, 25,

View File

@ -23,11 +23,15 @@
#include "avcodec.h" #include "avcodec.h"
#include "dsputil.h" #include "dsputil.h"
#include "mpegvideo.h" #include "mpegvideo.h"
#include "simple_idct.h"
#ifdef USE_FASTMEMCPY #ifdef USE_FASTMEMCPY
#include "fastmemcpy.h" #include "fastmemcpy.h"
#endif #endif
//#undef NDEBUG
//#include <assert.h>
static void encode_picture(MpegEncContext *s, int picture_number); static void encode_picture(MpegEncContext *s, int picture_number);
static void dct_unquantize_mpeg1_c(MpegEncContext *s, static void dct_unquantize_mpeg1_c(MpegEncContext *s,
DCTELEM *block, int n, int qscale); DCTELEM *block, int n, int qscale);
@ -72,8 +76,6 @@ static UINT8 h263_chroma_roundtab[16] = {
static UINT16 default_mv_penalty[MAX_FCODE+1][MAX_MV*2+1]; static UINT16 default_mv_penalty[MAX_FCODE+1][MAX_MV*2+1];
static UINT8 default_fcode_tab[MAX_MV*2+1]; static UINT8 default_fcode_tab[MAX_MV*2+1];
extern UINT8 zigzag_end[64];
/* default motion estimation */ /* default motion estimation */
int motion_estimation_method = ME_EPZS; int motion_estimation_method = ME_EPZS;
@ -86,7 +88,7 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16
int i; int i;
if (s->fdct == ff_jpeg_fdct_islow) { if (s->fdct == ff_jpeg_fdct_islow) {
for(i=0;i<64;i++) { for(i=0;i<64;i++) {
const int j= block_permute_op(i); const int j= s->idct_permutation[i];
/* 16 <= qscale * quant_matrix[i] <= 7905 */ /* 16 <= qscale * quant_matrix[i] <= 7905 */
/* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */ /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */
/* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
@ -97,7 +99,7 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16
} }
} else if (s->fdct == fdct_ifast) { } else if (s->fdct == fdct_ifast) {
for(i=0;i<64;i++) { for(i=0;i<64;i++) {
const int j= block_permute_op(i); const int j= s->idct_permutation[i];
/* 16 <= qscale * quant_matrix[i] <= 7905 */ /* 16 <= qscale * quant_matrix[i] <= 7905 */
/* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */ /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */
/* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
@ -108,13 +110,14 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16
} }
} else { } else {
for(i=0;i<64;i++) { for(i=0;i<64;i++) {
const int j= s->idct_permutation[i];
/* We can safely suppose that 16 <= quant_matrix[i] <= 255 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
So 16 <= qscale * quant_matrix[i] <= 7905 So 16 <= qscale * quant_matrix[i] <= 7905
so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905 so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
so 32768 >= (1<<19) / (qscale * quant_matrix[i]) >= 67 so 32768 >= (1<<19) / (qscale * quant_matrix[i]) >= 67
*/ */
qmat [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]); qmat [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[block_permute_op(i)]); qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1; if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1;
qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]); qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]);
@ -131,6 +134,50 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16
goto fail;\ goto fail;\
}\ }\
} }
/*
static void build_end(void)
{
int lastIndex;
int lastIndexAfterPerm=0;
for(lastIndex=0; lastIndex<64; lastIndex++)
{
if(ff_zigzag_direct[lastIndex] > lastIndexAfterPerm)
lastIndexAfterPerm= ff_zigzag_direct[lastIndex];
zigzag_end[lastIndex]= lastIndexAfterPerm + 1;
}
}
*/
void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scantable){
int i;
int end;
for(i=0; i<64; i++){
int j;
j = src_scantable[i];
st->permutated[i] = s->idct_permutation[j];
}
end=-1;
for(i=0; i<64; i++){
int j;
j = st->permutated[i];
if(j>end) end=j;
st->raster_end[i]= end;
}
}
/* XXX: those functions should be suppressed ASAP when all IDCTs are
converted */
static void ff_jref_idct_put(UINT8 *dest, int line_size, DCTELEM *block)
{
j_rev_dct (block);
put_pixels_clamped(block, dest, line_size);
}
static void ff_jref_idct_add(UINT8 *dest, int line_size, DCTELEM *block)
{
j_rev_dct (block);
add_pixels_clamped(block, dest, line_size);
}
/* init common structure for both encoder and decoder */ /* init common structure for both encoder and decoder */
int MPV_common_init(MpegEncContext *s) int MPV_common_init(MpegEncContext *s)
@ -146,7 +193,19 @@ int MPV_common_init(MpegEncContext *s)
if(s->avctx->dct_algo==FF_DCT_FASTINT) if(s->avctx->dct_algo==FF_DCT_FASTINT)
s->fdct = fdct_ifast; s->fdct = fdct_ifast;
else else
s->fdct = ff_jpeg_fdct_islow; s->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
if(s->avctx->idct_algo==FF_IDCT_INT){
s->idct_put= ff_jref_idct_put;
s->idct_add= ff_jref_idct_add;
for(i=0; i<64; i++)
s->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
}else{ //accurate/default
s->idct_put= simple_idct_put;
s->idct_add= simple_idct_add;
for(i=0; i<64; i++)
s->idct_permutation[i]= i;
}
#ifdef HAVE_MMX #ifdef HAVE_MMX
MPV_common_init_mmx(s); MPV_common_init_mmx(s);
@ -157,6 +216,15 @@ int MPV_common_init(MpegEncContext *s)
#ifdef HAVE_MLIB #ifdef HAVE_MLIB
MPV_common_init_mlib(s); MPV_common_init_mlib(s);
#endif #endif
/* load & permutate scantables
note: only wmv uses differnt ones
*/
ff_init_scantable(s, &s->inter_scantable , ff_zigzag_direct);
ff_init_scantable(s, &s->intra_scantable , ff_zigzag_direct);
ff_init_scantable(s, &s->intra_h_scantable, ff_alternate_horizontal_scan);
ff_init_scantable(s, &s->intra_v_scantable, ff_alternate_vertical_scan);
s->mb_width = (s->width + 15) / 16; s->mb_width = (s->width + 15) / 16;
s->mb_height = (s->height + 15) / 16; s->mb_height = (s->height + 15) / 16;
@ -577,13 +645,6 @@ int MPV_encode_init(AVCodecContext *avctx)
s->y_dc_scale_table= s->y_dc_scale_table=
s->c_dc_scale_table= ff_mpeg1_dc_scale_table; s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
if (s->out_format == FMT_H263)
h263_encode_init(s);
else if (s->out_format == FMT_MPEG1)
ff_mpeg1_encode_init(s);
if(s->msmpeg4_version)
ff_msmpeg4_encode_init(s);
/* dont use mv_penalty table for crap MV as it would be confused */ /* dont use mv_penalty table for crap MV as it would be confused */
if (s->me_method < ME_EPZS) s->mv_penalty = default_mv_penalty; if (s->me_method < ME_EPZS) s->mv_penalty = default_mv_penalty;
@ -593,17 +654,25 @@ int MPV_encode_init(AVCodecContext *avctx)
if (MPV_common_init(s) < 0) if (MPV_common_init(s) < 0)
return -1; return -1;
if (s->out_format == FMT_H263)
h263_encode_init(s);
else if (s->out_format == FMT_MPEG1)
ff_mpeg1_encode_init(s);
if(s->msmpeg4_version)
ff_msmpeg4_encode_init(s);
/* init default q matrix */ /* init default q matrix */
for(i=0;i<64;i++) { for(i=0;i<64;i++) {
int j= s->idct_permutation[i];
if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){ if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
s->intra_matrix[i] = ff_mpeg4_default_intra_matrix[i]; s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
s->inter_matrix[i] = ff_mpeg4_default_non_intra_matrix[i]; s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
}else if(s->out_format == FMT_H263){ }else if(s->out_format == FMT_H263){
s->intra_matrix[i] = s->intra_matrix[j] =
s->inter_matrix[i] = ff_mpeg1_default_non_intra_matrix[i]; s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
}else{ /* mpeg1 */ }else{ /* mpeg1 */
s->intra_matrix[i] = ff_mpeg1_default_intra_matrix[i]; s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
s->inter_matrix[i] = ff_mpeg1_default_non_intra_matrix[i]; s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
} }
} }
@ -1450,7 +1519,7 @@ static inline void put_dct(MpegEncContext *s,
{ {
if (!s->mpeg2) if (!s->mpeg2)
s->dct_unquantize(s, block, i, s->qscale); s->dct_unquantize(s, block, i, s->qscale);
ff_idct_put (dest, line_size, block); s->idct_put (dest, line_size, block);
} }
/* add block[] to dest[] */ /* add block[] to dest[] */
@ -1458,7 +1527,7 @@ static inline void add_dct(MpegEncContext *s,
DCTELEM *block, int i, UINT8 *dest, int line_size) DCTELEM *block, int i, UINT8 *dest, int line_size)
{ {
if (s->block_last_index[i] >= 0) { if (s->block_last_index[i] >= 0) {
ff_idct_add (dest, line_size, block); s->idct_add (dest, line_size, block);
} }
} }
@ -1468,7 +1537,7 @@ static inline void add_dequant_dct(MpegEncContext *s,
if (s->block_last_index[i] >= 0) { if (s->block_last_index[i] >= 0) {
s->dct_unquantize(s, block, i, s->qscale); s->dct_unquantize(s, block, i, s->qscale);
ff_idct_add (dest, line_size, block); s->idct_add (dest, line_size, block);
} }
} }
@ -1720,7 +1789,7 @@ static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int th
if(last_index<=skip_dc - 1) return; if(last_index<=skip_dc - 1) return;
for(i=0; i<=last_index; i++){ for(i=0; i<=last_index; i++){
const int j = zigzag_direct[i]; const int j = s->intra_scantable.permutated[i];
const int level = ABS(block[j]); const int level = ABS(block[j]);
if(level==1){ if(level==1){
if(skip_dc && i==0) continue; if(skip_dc && i==0) continue;
@ -1734,7 +1803,7 @@ static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int th
} }
if(score >= threshold) return; if(score >= threshold) return;
for(i=skip_dc; i<=last_index; i++){ for(i=skip_dc; i<=last_index; i++){
const int j = zigzag_direct[i]; const int j = s->intra_scantable.permutated[i];
block[j]=0; block[j]=0;
} }
if(block[0]) s->block_last_index[n]= 0; if(block[0]) s->block_last_index[n]= 0;
@ -1746,9 +1815,14 @@ static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index
int i; int i;
const int maxlevel= s->max_qcoeff; const int maxlevel= s->max_qcoeff;
const int minlevel= s->min_qcoeff; const int minlevel= s->min_qcoeff;
for(i=0;i<=last_index; i++){ if(s->mb_intra){
const int j = zigzag_direct[i]; i=1; //skip clipping of intra dc
}else
i=0;
for(;i<=last_index; i++){
const int j= s->intra_scantable.permutated[i];
int level = block[j]; int level = block[j];
if (level>maxlevel) level=maxlevel; if (level>maxlevel) level=maxlevel;
@ -1760,22 +1834,22 @@ static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index
static inline void requantize_coeffs(MpegEncContext *s, DCTELEM block[64], int oldq, int newq, int n) static inline void requantize_coeffs(MpegEncContext *s, DCTELEM block[64], int oldq, int newq, int n)
{ {
int i; int i;
if(s->mb_intra){ if(s->mb_intra){
//FIXME requantize, note (mpeg1/h263/h263p-aic dont need it,...) i=1; //skip clipping of intra dc
i=1; //FIXME requantize, note (mpeg1/h263/h263p-aic dont need it,...)
}else }else
i=0; i=0;
for(;i<=s->block_last_index[n]; i++){ for(;i<=s->block_last_index[n]; i++){
const int j = zigzag_direct[i]; const int j = s->intra_scantable.permutated[i];
int level = block[j]; int level = block[j];
block[j]= ROUNDED_DIV(level*oldq, newq); block[j]= ROUNDED_DIV(level*oldq, newq);
} }
for(i=s->block_last_index[n]; i>=0; i--){ for(i=s->block_last_index[n]; i>=0; i--){
const int j = zigzag_direct[i]; //FIXME other scantabs const int j = s->intra_scantable.permutated[i];
if(block[j]) break; if(block[j]) break;
} }
s->block_last_index[n]= i; s->block_last_index[n]= i;
@ -1791,11 +1865,14 @@ static inline void auto_requantize_coeffs(MpegEncContext *s, DCTELEM block[6][64
assert(s->adaptive_quant); assert(s->adaptive_quant);
for(n=0; n<6; n++){ for(n=0; n<6; n++){
if(s->mb_intra) i=1; if(s->mb_intra){
else i=0; i=1; //skip clipping of intra dc
//FIXME requantize, note (mpeg1/h263/h263p-aic dont need it,...)
}else
i=0;
for(;i<=s->block_last_index[n]; i++){ for(;i<=s->block_last_index[n]; i++){
const int j = zigzag_direct[i]; //FIXME other scantabs const int j = s->intra_scantable.permutated[i];
int level = block[n][j]; int level = block[n][j];
if(largest < level) largest = level; if(largest < level) largest = level;
if(smallest > level) smallest= level; if(smallest > level) smallest= level;
@ -2379,8 +2456,11 @@ static void encode_picture(MpegEncContext *s, int picture_number)
if (s->out_format == FMT_MJPEG) { if (s->out_format == FMT_MJPEG) {
/* for mjpeg, we do include qscale in the matrix */ /* for mjpeg, we do include qscale in the matrix */
s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0]; s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
for(i=1;i<64;i++) for(i=1;i<64;i++){
s->intra_matrix[i] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3); int j= s->idct_permutation[i];
s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
}
convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias); s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias);
} }
@ -2752,7 +2832,7 @@ static int dct_quantize_c(MpegEncContext *s,
#ifndef ARCH_ALPHA /* Alpha uses unpermuted matrix */ #ifndef ARCH_ALPHA /* Alpha uses unpermuted matrix */
/* we need this permutation so that we correct the IDCT /* we need this permutation so that we correct the IDCT
permutation. will be moved into DCT code */ permutation. will be moved into DCT code */
block_permute(block); block_permute(block, s->idct_permutation); //FIXME remove
#endif #endif
if (s->mb_intra) { if (s->mb_intra) {
@ -2782,7 +2862,7 @@ static int dct_quantize_c(MpegEncContext *s,
threshold2= (threshold1<<1); threshold2= (threshold1<<1);
for(;i<64;i++) { for(;i<64;i++) {
j = zigzag_direct[i]; j = s->intra_scantable.permutated[i];
level = block[j]; level = block[j];
level = level * qmat[j]; level = level * qmat[j];
@ -2813,8 +2893,7 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s,
int i, level, nCoeffs; int i, level, nCoeffs;
const UINT16 *quant_matrix; const UINT16 *quant_matrix;
if(s->alternate_scan) nCoeffs= 64; nCoeffs= s->block_last_index[n];
else nCoeffs= s->block_last_index[n]+1;
if (s->mb_intra) { if (s->mb_intra) {
if (n < 4) if (n < 4)
@ -2823,8 +2902,8 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s,
block[0] = block[0] * s->c_dc_scale; block[0] = block[0] * s->c_dc_scale;
/* XXX: only mpeg1 */ /* XXX: only mpeg1 */
quant_matrix = s->intra_matrix; quant_matrix = s->intra_matrix;
for(i=1;i<nCoeffs;i++) { for(i=1;i<=nCoeffs;i++) {
int j= zigzag_direct[i]; int j= s->intra_scantable.permutated[i];
level = block[j]; level = block[j];
if (level) { if (level) {
if (level < 0) { if (level < 0) {
@ -2846,8 +2925,8 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s,
} else { } else {
i = 0; i = 0;
quant_matrix = s->inter_matrix; quant_matrix = s->inter_matrix;
for(;i<nCoeffs;i++) { for(;i<=nCoeffs;i++) {
int j= zigzag_direct[i]; int j= s->intra_scantable.permutated[i];
level = block[j]; level = block[j];
if (level) { if (level) {
if (level < 0) { if (level < 0) {
@ -2877,8 +2956,8 @@ static void dct_unquantize_mpeg2_c(MpegEncContext *s,
int i, level, nCoeffs; int i, level, nCoeffs;
const UINT16 *quant_matrix; const UINT16 *quant_matrix;
if(s->alternate_scan) nCoeffs= 64; if(s->alternate_scan) nCoeffs= 63;
else nCoeffs= s->block_last_index[n]+1; else nCoeffs= s->block_last_index[n];
if (s->mb_intra) { if (s->mb_intra) {
if (n < 4) if (n < 4)
@ -2886,8 +2965,8 @@ static void dct_unquantize_mpeg2_c(MpegEncContext *s,
else else
block[0] = block[0] * s->c_dc_scale; block[0] = block[0] * s->c_dc_scale;
quant_matrix = s->intra_matrix; quant_matrix = s->intra_matrix;
for(i=1;i<nCoeffs;i++) { for(i=1;i<=nCoeffs;i++) {
int j= zigzag_direct[i]; int j= s->intra_scantable.permutated[i];
level = block[j]; level = block[j];
if (level) { if (level) {
if (level < 0) { if (level < 0) {
@ -2908,8 +2987,8 @@ static void dct_unquantize_mpeg2_c(MpegEncContext *s,
int sum=-1; int sum=-1;
i = 0; i = 0;
quant_matrix = s->inter_matrix; quant_matrix = s->inter_matrix;
for(;i<nCoeffs;i++) { for(;i<=nCoeffs;i++) {
int j= zigzag_direct[i]; int j= s->intra_scantable.permutated[i];
level = block[j]; level = block[j];
if (level) { if (level) {
if (level < 0) { if (level < 0) {
@ -2940,27 +3019,27 @@ static void dct_unquantize_h263_c(MpegEncContext *s,
int i, level, qmul, qadd; int i, level, qmul, qadd;
int nCoeffs; int nCoeffs;
assert(s->block_last_index[n]>=0);
qadd = (qscale - 1) | 1;
qmul = qscale << 1;
if (s->mb_intra) { if (s->mb_intra) {
if (!s->h263_aic) { if (!s->h263_aic) {
if (n < 4) if (n < 4)
block[0] = block[0] * s->y_dc_scale; block[0] = block[0] * s->y_dc_scale;
else else
block[0] = block[0] * s->c_dc_scale; block[0] = block[0] * s->c_dc_scale;
} }else
qadd = 0;
i = 1; i = 1;
nCoeffs= 64; //does not allways use zigzag table nCoeffs= 63; //does not allways use zigzag table
} else { } else {
i = 0; i = 0;
nCoeffs= zigzag_end[ s->block_last_index[n] ]; nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
} }
qmul = s->qscale << 1; for(;i<=nCoeffs;i++) {
if (s->h263_aic && s->mb_intra)
qadd = 0;
else
qadd = (s->qscale - 1) | 1;
for(;i<nCoeffs;i++) {
level = block[i]; level = block[i];
if (level) { if (level) {
if (level < 0) { if (level < 0) {

View File

@ -99,6 +99,11 @@ typedef struct ReorderBuffer{
int picture_in_gop_number; int picture_in_gop_number;
} ReorderBuffer; } ReorderBuffer;
typedef struct ScanTable{
UINT8 permutated[64];
UINT8 raster_end[64];
} ScanTable;
typedef struct MpegEncContext { typedef struct MpegEncContext {
struct AVCodecContext *avctx; struct AVCodecContext *avctx;
/* the following parameters must be initialized before encoding */ /* the following parameters must be initialized before encoding */
@ -286,6 +291,12 @@ typedef struct MpegEncContext {
UINT16 __align8 q_intra_matrix16_bias[32][64]; UINT16 __align8 q_intra_matrix16_bias[32][64];
UINT16 __align8 q_inter_matrix16_bias[32][64]; UINT16 __align8 q_inter_matrix16_bias[32][64];
int block_last_index[6]; /* last non zero coefficient in block */ int block_last_index[6]; /* last non zero coefficient in block */
/* scantables */
ScanTable intra_scantable;
ScanTable intra_h_scantable;
ScanTable intra_v_scantable;
ScanTable inter_scantable; // if inter == intra then intra should be used to reduce tha cache usage
UINT8 idct_permutation[64];
void *opaque; /* private data for the user */ void *opaque; /* private data for the user */
@ -421,10 +432,6 @@ typedef struct MpegEncContext {
int per_mb_rl_table; int per_mb_rl_table;
int esc3_level_length; int esc3_level_length;
int esc3_run_length; int esc3_run_length;
UINT8 *inter_scantable;
UINT8 *intra_scantable;
UINT8 *intra_v_scantable;
UINT8 *intra_h_scantable;
/* [mb_intra][isChroma][level][run][last] */ /* [mb_intra][isChroma][level][run][last] */
int (*ac_stats)[2][MAX_LEVEL+1][MAX_RUN+1][2]; int (*ac_stats)[2][MAX_LEVEL+1][MAX_RUN+1][2];
int inter_intra_pred; int inter_intra_pred;
@ -477,7 +484,9 @@ typedef struct MpegEncContext {
void (*dct_unquantize)(struct MpegEncContext *s, // unquantizer to use (mpeg4 can use both) void (*dct_unquantize)(struct MpegEncContext *s, // unquantizer to use (mpeg4 can use both)
DCTELEM *block, int n, int qscale); DCTELEM *block, int n, int qscale);
int (*dct_quantize)(struct MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow); int (*dct_quantize)(struct MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
void (*fdct)(DCTELEM *block); void (*fdct)(DCTELEM *block/* align 16*/);
void (*idct_put)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
void (*idct_add)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
} MpegEncContext; } MpegEncContext;
int MPV_common_init(MpegEncContext *s); int MPV_common_init(MpegEncContext *s);
@ -498,6 +507,7 @@ extern void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w);
void ff_conceal_past_errors(MpegEncContext *s, int conceal_all); void ff_conceal_past_errors(MpegEncContext *s, int conceal_all);
void ff_copy_bits(PutBitContext *pb, UINT8 *src, int length); void ff_copy_bits(PutBitContext *pb, UINT8 *src, int length);
void ff_clean_intra_table_entries(MpegEncContext *s); void ff_clean_intra_table_entries(MpegEncContext *s);
void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scantable);
extern int ff_bit_exact; extern int ff_bit_exact;
@ -511,8 +521,8 @@ void ff_fix_long_p_mvs(MpegEncContext * s);
void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type); void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type);
/* mpeg12.c */ /* mpeg12.c */
extern INT16 ff_mpeg1_default_intra_matrix[64]; extern const INT16 ff_mpeg1_default_intra_matrix[64];
extern INT16 ff_mpeg1_default_non_intra_matrix[64]; extern const INT16 ff_mpeg1_default_non_intra_matrix[64];
extern UINT8 ff_mpeg1_dc_scale_table[128]; extern UINT8 ff_mpeg1_dc_scale_table[128];
void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number); void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number);
@ -551,8 +561,8 @@ static inline int get_rl_index(const RLTable *rl, int last, int run, int level)
extern UINT8 ff_mpeg4_y_dc_scale_table[32]; extern UINT8 ff_mpeg4_y_dc_scale_table[32];
extern UINT8 ff_mpeg4_c_dc_scale_table[32]; extern UINT8 ff_mpeg4_c_dc_scale_table[32];
extern INT16 ff_mpeg4_default_intra_matrix[64]; extern const INT16 ff_mpeg4_default_intra_matrix[64];
extern INT16 ff_mpeg4_default_non_intra_matrix[64]; extern const INT16 ff_mpeg4_default_non_intra_matrix[64];
void h263_encode_mb(MpegEncContext *s, void h263_encode_mb(MpegEncContext *s,
DCTELEM block[6][64], DCTELEM block[6][64],

View File

@ -164,32 +164,21 @@ static void common_init(MpegEncContext * s)
break; break;
} }
if(s->msmpeg4_version==4){ if(s->msmpeg4_version==4){
s->intra_scantable = wmv1_scantable[1]; int i;
s->intra_h_scantable= wmv1_scantable[2]; ff_init_scantable(s, &s->intra_scantable , wmv1_scantable[1]);
s->intra_v_scantable= wmv1_scantable[3]; ff_init_scantable(s, &s->intra_h_scantable, wmv1_scantable[2]);
s->inter_scantable = wmv1_scantable[0]; ff_init_scantable(s, &s->intra_v_scantable, wmv1_scantable[3]);
}else{ ff_init_scantable(s, &s->inter_scantable , wmv1_scantable[0]);
s->intra_scantable = zigzag_direct;
s->intra_h_scantable= ff_alternate_horizontal_scan;
s->intra_v_scantable= ff_alternate_vertical_scan;
s->inter_scantable = zigzag_direct;
} }
//Note the default tables are set in common_init in mpegvideo.c
if(!inited){ if(!inited){
int i; int i;
inited=1; inited=1;
init_h263_dc_for_msmpeg4(); init_h263_dc_for_msmpeg4();
/* permute for IDCT */
for(i=0; i<WMV1_SCANTABLE_COUNT; i++){
int k;
for(k=0;k<64;k++) {
int j = wmv1_scantable[i][k];
wmv1_scantable[i][k]= block_permute_op(j);
}
}
} }
} }
@ -936,7 +925,7 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int
rl = &rl_table[3 + s->rl_chroma_table_index]; rl = &rl_table[3 + s->rl_chroma_table_index];
} }
run_diff = 0; run_diff = 0;
scantable= s->intra_scantable; scantable= s->intra_scantable.permutated;
set_stat(ST_INTRA_AC); set_stat(ST_INTRA_AC);
} else { } else {
i = 0; i = 0;
@ -945,12 +934,12 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int
run_diff = 0; run_diff = 0;
else else
run_diff = 1; run_diff = 1;
scantable= s->inter_scantable; scantable= s->inter_scantable.permutated;
set_stat(ST_INTER_AC); set_stat(ST_INTER_AC);
} }
/* recalculate block_last_index for M$ wmv1 */ /* recalculate block_last_index for M$ wmv1 */
if(scantable!=zigzag_direct && s->block_last_index[n]>0){ if(s->msmpeg4_version==4 && s->block_last_index[n]>0){
for(last_index=63; last_index>=0; last_index--){ for(last_index=63; last_index>=0; last_index--){
if(block[scantable[last_index]]) break; if(block[scantable[last_index]]) break;
} }
@ -1704,11 +1693,11 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
} }
if (s->ac_pred) { if (s->ac_pred) {
if (dc_pred_dir == 0) if (dc_pred_dir == 0)
scan_table = s->intra_v_scantable; /* left */ scan_table = s->intra_v_scantable.permutated; /* left */
else else
scan_table = s->intra_h_scantable; /* top */ scan_table = s->intra_h_scantable.permutated; /* top */
} else { } else {
scan_table = s->intra_scantable; scan_table = s->intra_scantable.permutated;
} }
set_stat(ST_INTRA_AC); set_stat(ST_INTRA_AC);
rl_vlc= rl->rl_vlc[0]; rl_vlc= rl->rl_vlc[0];
@ -1727,7 +1716,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
s->block_last_index[n] = i; s->block_last_index[n] = i;
return 0; return 0;
} }
scan_table = s->inter_scantable; scan_table = s->inter_scantable.permutated;
set_stat(ST_INTER_AC); set_stat(ST_INTER_AC);
rl_vlc= rl->rl_vlc[s->qscale]; rl_vlc= rl->rl_vlc[s->qscale];
} }

View File

@ -1819,7 +1819,7 @@ static UINT8 old_ff_c_dc_scale_table[32]={
#define WMV1_SCANTABLE_COUNT 4 #define WMV1_SCANTABLE_COUNT 4
static UINT8 wmv1_scantable00[64]= { static const UINT8 wmv1_scantable00[64]= {
0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11, 0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11,
0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28, 0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28,
0x30, 0x38, 0x29, 0x21, 0x1A, 0x13, 0x0C, 0x05, 0x30, 0x38, 0x29, 0x21, 0x1A, 0x13, 0x0C, 0x05,
@ -1829,7 +1829,7 @@ static UINT8 wmv1_scantable00[64]= {
0x2C, 0x25, 0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x35, 0x2C, 0x25, 0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x35,
0x3D, 0x3E, 0x36, 0x2E, 0x27, 0x2F, 0x37, 0x3F, 0x3D, 0x3E, 0x36, 0x2E, 0x27, 0x2F, 0x37, 0x3F,
}; };
static UINT8 wmv1_scantable01[64]= { static const UINT8 wmv1_scantable01[64]= {
0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11, 0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11,
0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28, 0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28,
0x21, 0x30, 0x1A, 0x13, 0x0C, 0x05, 0x06, 0x0D, 0x21, 0x30, 0x1A, 0x13, 0x0C, 0x05, 0x06, 0x0D,
@ -1839,7 +1839,7 @@ static UINT8 wmv1_scantable01[64]= {
0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3C, 0x35, 0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3C, 0x35,
0x3D, 0x2E, 0x27, 0x2F, 0x36, 0x3E, 0x37, 0x3F, 0x3D, 0x2E, 0x27, 0x2F, 0x36, 0x3E, 0x37, 0x3F,
}; };
static UINT8 wmv1_scantable02[64]= { static const UINT8 wmv1_scantable02[64]= {
0x00, 0x01, 0x08, 0x02, 0x03, 0x09, 0x10, 0x18, 0x00, 0x01, 0x08, 0x02, 0x03, 0x09, 0x10, 0x18,
0x11, 0x0A, 0x04, 0x05, 0x0B, 0x12, 0x19, 0x20, 0x11, 0x0A, 0x04, 0x05, 0x0B, 0x12, 0x19, 0x20,
0x28, 0x30, 0x21, 0x1A, 0x13, 0x0C, 0x06, 0x07, 0x28, 0x30, 0x21, 0x1A, 0x13, 0x0C, 0x06, 0x07,
@ -1849,7 +1849,7 @@ static UINT8 wmv1_scantable02[64]= {
0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3B, 0x3C, 0x35, 0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3B, 0x3C, 0x35,
0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F, 0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F,
}; };
static UINT8 wmv1_scantable03[64]= { static const UINT8 wmv1_scantable03[64]= {
0x00, 0x08, 0x10, 0x01, 0x18, 0x20, 0x28, 0x09, 0x00, 0x08, 0x10, 0x01, 0x18, 0x20, 0x28, 0x09,
0x02, 0x03, 0x0A, 0x11, 0x19, 0x30, 0x38, 0x29, 0x02, 0x03, 0x0A, 0x11, 0x19, 0x30, 0x38, 0x29,
0x21, 0x1A, 0x12, 0x0B, 0x04, 0x05, 0x0C, 0x13, 0x21, 0x1A, 0x12, 0x0B, 0x04, 0x05, 0x0C, 0x13,
@ -1860,7 +1860,7 @@ static UINT8 wmv1_scantable03[64]= {
0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F, 0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F,
}; };
static UINT8 *wmv1_scantable[WMV1_SCANTABLE_COUNT+1]={ static const UINT8 *wmv1_scantable[WMV1_SCANTABLE_COUNT+1]={
wmv1_scantable00, wmv1_scantable00,
wmv1_scantable01, wmv1_scantable01,
wmv1_scantable02, wmv1_scantable02,

View File

@ -20,5 +20,7 @@
void simple_idct_put(UINT8 *dest, int line_size, INT16 *block); void simple_idct_put(UINT8 *dest, int line_size, INT16 *block);
void simple_idct_add(UINT8 *dest, int line_size, INT16 *block); void simple_idct_add(UINT8 *dest, int line_size, INT16 *block);
void simple_idct_mmx(short *block); void ff_simple_idct_mmx(short *block);
void ff_simple_idct_add_mmx(UINT8 *dest, int line_size, INT16 *block);
void ff_simple_idct_put_mmx(UINT8 *dest, int line_size, INT16 *block);
void simple_idct(short *block); void simple_idct(short *block);

View File

@ -51,8 +51,8 @@ do_ffmpeg()
{ {
f="$1" f="$1"
shift shift
echo $ffmpeg -bitexact -dct_algo 1 $* echo $ffmpeg -bitexact -dct_algo 1 -idct_algo 2 $*
$ffmpeg -bitexact -dct_algo 1 -benchmark $* > $datadir/bench.tmp $ffmpeg -bitexact -dct_algo 1 -idct_algo 2 -benchmark $* > $datadir/bench.tmp
md5sum -b $f >> $logfile md5sum -b $f >> $logfile
expr "`cat $datadir/bench.tmp`" : '.*utime=\(.*s\)' > $datadir/bench2.tmp expr "`cat $datadir/bench.tmp`" : '.*utime=\(.*s\)' > $datadir/bench2.tmp
echo `cat $datadir/bench2.tmp` $f >> $benchfile echo `cat $datadir/bench2.tmp` $f >> $benchfile
@ -62,8 +62,8 @@ do_ffmpeg_crc()
{ {
f="$1" f="$1"
shift shift
echo $ffmpeg -y -bitexact -dct_algo 1 $* -f crc $datadir/ffmpeg.crc echo $ffmpeg -y -bitexact -dct_algo 1 -idct_algo 2 $* -f crc $datadir/ffmpeg.crc
$ffmpeg -y -bitexact -dct_algo 1 $* -f crc $datadir/ffmpeg.crc $ffmpeg -y -bitexact -dct_algo 1 -idct_algo 2 $* -f crc $datadir/ffmpeg.crc
echo -n "$f " >> $logfile echo -n "$f " >> $logfile
cat $datadir/ffmpeg.crc >> $logfile cat $datadir/ffmpeg.crc >> $logfile
} }