mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-08 13:22:53 +02:00
idct permutation cleanup, idct can be selected per context now
fixing some threadunsafe code Originally committed as revision 980 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
f9bb4bdffc
commit
2ad1516a6c
10
ffmpeg.c
10
ffmpeg.c
@ -117,6 +117,7 @@ static int do_deinterlace = 0;
|
||||
static int workaround_bugs = 0;
|
||||
static int error_resilience = 0;
|
||||
static int dct_algo = 0;
|
||||
static int idct_algo = 0;
|
||||
static int use_part = 0;
|
||||
static int packet_size = 0;
|
||||
|
||||
@ -1392,6 +1393,12 @@ void opt_dct_algo(const char *arg)
|
||||
dct_algo = atoi(arg);
|
||||
}
|
||||
|
||||
void opt_idct_algo(const char *arg)
|
||||
{
|
||||
idct_algo = atoi(arg);
|
||||
}
|
||||
|
||||
|
||||
void opt_error_resilience(const char *arg)
|
||||
{
|
||||
error_resilience = atoi(arg);
|
||||
@ -1750,6 +1757,7 @@ void opt_input_file(const char *filename)
|
||||
rfps = ic->streams[i]->r_frame_rate;
|
||||
enc->workaround_bugs = workaround_bugs;
|
||||
enc->error_resilience = error_resilience;
|
||||
enc->idct_algo= idct_algo;
|
||||
if (enc->frame_rate != rfps) {
|
||||
fprintf(stderr,"\nSeems that stream %d comes from film source: %2.2f->%2.2f\n",
|
||||
i, (float)enc->frame_rate / FRAME_RATE_BASE,
|
||||
@ -1922,6 +1930,7 @@ void opt_output_file(const char *filename)
|
||||
video_enc->i_quant_offset = video_i_qoffset;
|
||||
video_enc->b_quant_offset = video_b_qoffset;
|
||||
video_enc->dct_algo = dct_algo;
|
||||
video_enc->idct_algo = idct_algo;
|
||||
if(packet_size){
|
||||
video_enc->rtp_mode= 1;
|
||||
video_enc->rtp_payload_size= packet_size;
|
||||
@ -2287,6 +2296,7 @@ const OptionDef options[] = {
|
||||
{ "me", HAS_ARG | OPT_EXPERT, {(void*)opt_motion_estimation}, "set motion estimation method",
|
||||
"method" },
|
||||
{ "dct_algo", HAS_ARG | OPT_EXPERT, {(void*)opt_dct_algo}, "set dct algo", "algo" },
|
||||
{ "idct_algo", HAS_ARG | OPT_EXPERT, {(void*)opt_idct_algo}, "set idct algo", "algo" },
|
||||
{ "er", HAS_ARG | OPT_EXPERT, {(void*)opt_error_resilience}, "set error resilience", "" },
|
||||
{ "bf", HAS_ARG | OPT_EXPERT, {(void*)opt_b_frames}, "use 'frames' B frames (only MPEG-4)", "frames" },
|
||||
{ "hq", OPT_BOOL | OPT_EXPERT, {(void*)&use_hq}, "activate high quality settings" },
|
||||
|
@ -5,8 +5,8 @@
|
||||
|
||||
#define LIBAVCODEC_VERSION_INT 0x000406
|
||||
#define LIBAVCODEC_VERSION "0.4.6"
|
||||
#define LIBAVCODEC_BUILD 4628
|
||||
#define LIBAVCODEC_BUILD_STR "4628"
|
||||
#define LIBAVCODEC_BUILD 4629
|
||||
#define LIBAVCODEC_BUILD_STR "4629"
|
||||
|
||||
enum CodecID {
|
||||
CODEC_ID_NONE,
|
||||
@ -684,6 +684,21 @@ typedef struct AVCodecContext {
|
||||
*/
|
||||
int fourcc;
|
||||
|
||||
/**
|
||||
* idct algorithm, see FF_IDCT_* below
|
||||
* encoding: set by user
|
||||
* decoding: set by user
|
||||
*/
|
||||
int idct_algo;
|
||||
#define FF_IDCT_AUTO 0
|
||||
#define FF_IDCT_INT 1
|
||||
#define FF_IDCT_SIMPLE 2
|
||||
#define FF_IDCT_SIMPLEMMX 3
|
||||
#define FF_IDCT_LIBMPEG2MMX 4
|
||||
#define FF_IDCT_PS2 5
|
||||
#define FF_IDCT_MLIB 6
|
||||
#define FF_IDCT_ARM 7
|
||||
|
||||
//FIXME this should be reordered after kabis API is finished ...
|
||||
//TODO kill kabi
|
||||
/*
|
||||
|
@ -932,6 +932,22 @@ static inline int ff_get_fourcc(char *s){
|
||||
return (s[0]) + (s[1]<<8) + (s[2]<<16) + (s[3]<<24);
|
||||
}
|
||||
|
||||
|
||||
#ifdef ARCH_X86
|
||||
#define MASK_ABS(mask, level)\
|
||||
asm volatile(\
|
||||
"cdq \n\t"\
|
||||
"xorl %1, %0 \n\t"\
|
||||
"subl %1, %0 \n\t"\
|
||||
: "+a" (level), "=&d" (mask)\
|
||||
);
|
||||
#else
|
||||
#define MASK_ABS(mask, level)\
|
||||
mask= level>>31;\
|
||||
level= (level^mask)-mask;
|
||||
#endif
|
||||
|
||||
|
||||
#if __CPU__ >= 686 && !defined(RUNTIME_CPUDETECT)
|
||||
#define COPY3_IF_LT(x,y,a,b,c,d)\
|
||||
asm volatile (\
|
||||
|
@ -20,11 +20,7 @@
|
||||
*/
|
||||
#include "avcodec.h"
|
||||
#include "dsputil.h"
|
||||
#include "simple_idct.h"
|
||||
|
||||
void (*ff_idct)(DCTELEM *block);
|
||||
void (*ff_idct_put)(UINT8 *dest, int line_size, DCTELEM *block);
|
||||
void (*ff_idct_add)(UINT8 *dest, int line_size, DCTELEM *block);
|
||||
void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
|
||||
void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
|
||||
void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
|
||||
@ -49,16 +45,11 @@ int ff_bit_exact=0;
|
||||
UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
|
||||
UINT32 squareTbl[512];
|
||||
|
||||
extern INT16 ff_mpeg1_default_intra_matrix[64];
|
||||
extern INT16 ff_mpeg1_default_non_intra_matrix[64];
|
||||
extern INT16 ff_mpeg4_default_intra_matrix[64];
|
||||
extern INT16 ff_mpeg4_default_non_intra_matrix[64];
|
||||
|
||||
UINT8 zigzag_direct[64] = {
|
||||
0, 1, 8, 16, 9, 2, 3, 10,
|
||||
17, 24, 32, 25, 18, 11, 4, 5,
|
||||
const UINT8 ff_zigzag_direct[64] = {
|
||||
0, 1, 8, 16, 9, 2, 3, 10,
|
||||
17, 24, 32, 25, 18, 11, 4, 5,
|
||||
12, 19, 26, 33, 40, 48, 41, 34,
|
||||
27, 20, 13, 6, 7, 14, 21, 28,
|
||||
27, 20, 13, 6, 7, 14, 21, 28,
|
||||
35, 42, 49, 56, 57, 50, 43, 36,
|
||||
29, 22, 15, 23, 30, 37, 44, 51,
|
||||
58, 59, 52, 45, 38, 31, 39, 46,
|
||||
@ -68,11 +59,8 @@ UINT8 zigzag_direct[64] = {
|
||||
/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
|
||||
UINT16 __align8 inv_zigzag_direct16[64];
|
||||
|
||||
/* not permutated zigzag_direct for MMX quantizer */
|
||||
UINT8 zigzag_direct_noperm[64];
|
||||
|
||||
UINT8 ff_alternate_horizontal_scan[64] = {
|
||||
0, 1, 2, 3, 8, 9, 16, 17,
|
||||
const UINT8 ff_alternate_horizontal_scan[64] = {
|
||||
0, 1, 2, 3, 8, 9, 16, 17,
|
||||
10, 11, 4, 5, 6, 7, 15, 14,
|
||||
13, 12, 19, 18, 24, 25, 32, 33,
|
||||
26, 27, 20, 21, 22, 23, 28, 29,
|
||||
@ -82,8 +70,8 @@ UINT8 ff_alternate_horizontal_scan[64] = {
|
||||
52, 53, 54, 55, 60, 61, 62, 63,
|
||||
};
|
||||
|
||||
UINT8 ff_alternate_vertical_scan[64] = {
|
||||
0, 8, 16, 24, 1, 9, 2, 10,
|
||||
const UINT8 ff_alternate_vertical_scan[64] = {
|
||||
0, 8, 16, 24, 1, 9, 2, 10,
|
||||
17, 25, 32, 40, 48, 56, 57, 49,
|
||||
41, 33, 26, 18, 3, 11, 4, 12,
|
||||
19, 27, 34, 42, 50, 58, 35, 43,
|
||||
@ -93,21 +81,6 @@ UINT8 ff_alternate_vertical_scan[64] = {
|
||||
38, 46, 54, 62, 39, 47, 55, 63,
|
||||
};
|
||||
|
||||
#ifdef SIMPLE_IDCT
|
||||
|
||||
/* Input permutation for the simple_idct_mmx */
|
||||
static UINT8 simple_mmx_permutation[64]={
|
||||
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
|
||||
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
|
||||
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
|
||||
0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
|
||||
0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
|
||||
0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
|
||||
0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
|
||||
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
|
||||
};
|
||||
#endif
|
||||
|
||||
/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
|
||||
UINT32 inverse[256]={
|
||||
0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
|
||||
@ -144,24 +117,6 @@ UINT32 inverse[256]={
|
||||
17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
|
||||
};
|
||||
|
||||
/* used to skip zeros at the end */
|
||||
UINT8 zigzag_end[64];
|
||||
|
||||
UINT8 permutation[64];
|
||||
//UINT8 invPermutation[64];
|
||||
|
||||
static void build_zigzag_end(void)
|
||||
{
|
||||
int lastIndex;
|
||||
int lastIndexAfterPerm=0;
|
||||
for(lastIndex=0; lastIndex<64; lastIndex++)
|
||||
{
|
||||
if(zigzag_direct[lastIndex] > lastIndexAfterPerm)
|
||||
lastIndexAfterPerm= zigzag_direct[lastIndex];
|
||||
zigzag_end[lastIndex]= lastIndexAfterPerm + 1;
|
||||
}
|
||||
}
|
||||
|
||||
int pix_sum_c(UINT8 * pix, int line_size)
|
||||
{
|
||||
int s, i, j;
|
||||
@ -1540,65 +1495,24 @@ int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
|
||||
|
||||
/* permute block according so that it corresponds to the MMX idct
|
||||
order */
|
||||
#ifdef SIMPLE_IDCT
|
||||
/* general permutation, but perhaps slightly slower */
|
||||
void block_permute(INT16 *block)
|
||||
void block_permute(INT16 *block, UINT8 *permutation)
|
||||
{
|
||||
int i;
|
||||
INT16 temp[64];
|
||||
|
||||
for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i];
|
||||
for(i=0; i<64; i++) temp[ permutation[i] ] = block[i];
|
||||
|
||||
for(i=0; i<64; i++) block[i] = temp[i];
|
||||
}
|
||||
#else
|
||||
|
||||
void block_permute(INT16 *block)
|
||||
{
|
||||
int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
|
||||
int i;
|
||||
|
||||
for(i=0;i<8;i++) {
|
||||
tmp1 = block[1];
|
||||
tmp2 = block[2];
|
||||
tmp3 = block[3];
|
||||
tmp4 = block[4];
|
||||
tmp5 = block[5];
|
||||
tmp6 = block[6];
|
||||
block[1] = tmp2;
|
||||
block[2] = tmp4;
|
||||
block[3] = tmp6;
|
||||
block[4] = tmp1;
|
||||
block[5] = tmp3;
|
||||
block[6] = tmp5;
|
||||
block += 8;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void clear_blocks_c(DCTELEM *blocks)
|
||||
{
|
||||
memset(blocks, 0, sizeof(DCTELEM)*6*64);
|
||||
}
|
||||
|
||||
/* XXX: those functions should be suppressed ASAP when all IDCTs are
|
||||
converted */
|
||||
void gen_idct_put(UINT8 *dest, int line_size, DCTELEM *block)
|
||||
{
|
||||
ff_idct (block);
|
||||
put_pixels_clamped(block, dest, line_size);
|
||||
}
|
||||
|
||||
void gen_idct_add(UINT8 *dest, int line_size, DCTELEM *block)
|
||||
{
|
||||
ff_idct (block);
|
||||
add_pixels_clamped(block, dest, line_size);
|
||||
}
|
||||
|
||||
void dsputil_init(void)
|
||||
{
|
||||
int i, j;
|
||||
int use_permuted_idct;
|
||||
|
||||
for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
|
||||
for(i=0;i<MAX_NEG_CROP;i++) {
|
||||
@ -1610,11 +1524,6 @@ void dsputil_init(void)
|
||||
squareTbl[i] = (i - 256) * (i - 256);
|
||||
}
|
||||
|
||||
#ifdef SIMPLE_IDCT
|
||||
ff_idct = NULL;
|
||||
#else
|
||||
ff_idct = j_rev_dct;
|
||||
#endif
|
||||
get_pixels = get_pixels_c;
|
||||
diff_pixels = diff_pixels_c;
|
||||
put_pixels_clamped = put_pixels_clamped_c;
|
||||
@ -1633,8 +1542,6 @@ void dsputil_init(void)
|
||||
pix_abs8x8_y2 = pix_abs8x8_y2_c;
|
||||
pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
|
||||
|
||||
use_permuted_idct = 1;
|
||||
|
||||
#ifdef HAVE_MMX
|
||||
dsputil_init_mmx();
|
||||
#endif
|
||||
@ -1643,61 +1550,18 @@ void dsputil_init(void)
|
||||
#endif
|
||||
#ifdef HAVE_MLIB
|
||||
dsputil_init_mlib();
|
||||
use_permuted_idct = 0;
|
||||
#endif
|
||||
#ifdef ARCH_ALPHA
|
||||
dsputil_init_alpha();
|
||||
use_permuted_idct = 0;
|
||||
#endif
|
||||
#ifdef ARCH_POWERPC
|
||||
dsputil_init_ppc();
|
||||
#endif
|
||||
#ifdef HAVE_MMI
|
||||
dsputil_init_mmi();
|
||||
use_permuted_idct = 0;
|
||||
#endif
|
||||
|
||||
#ifdef SIMPLE_IDCT
|
||||
if (ff_idct == NULL) {
|
||||
ff_idct_put = simple_idct_put;
|
||||
ff_idct_add = simple_idct_add;
|
||||
use_permuted_idct=0;
|
||||
}
|
||||
#endif
|
||||
if(ff_idct != NULL) {
|
||||
ff_idct_put = gen_idct_put;
|
||||
ff_idct_add = gen_idct_add;
|
||||
}
|
||||
|
||||
if(use_permuted_idct)
|
||||
#ifdef SIMPLE_IDCT
|
||||
for(i=0; i<64; i++) permutation[i]= simple_mmx_permutation[i];
|
||||
#else
|
||||
for(i=0; i<64; i++) permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
|
||||
#endif
|
||||
else
|
||||
for(i=0; i<64; i++) permutation[i]=i;
|
||||
|
||||
for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1;
|
||||
for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i];
|
||||
|
||||
if (use_permuted_idct) {
|
||||
/* permute for IDCT */
|
||||
for(i=0;i<64;i++) {
|
||||
j = zigzag_direct[i];
|
||||
zigzag_direct[i] = block_permute_op(j);
|
||||
j = ff_alternate_horizontal_scan[i];
|
||||
ff_alternate_horizontal_scan[i] = block_permute_op(j);
|
||||
j = ff_alternate_vertical_scan[i];
|
||||
ff_alternate_vertical_scan[i] = block_permute_op(j);
|
||||
}
|
||||
block_permute(ff_mpeg1_default_intra_matrix);
|
||||
block_permute(ff_mpeg1_default_non_intra_matrix);
|
||||
block_permute(ff_mpeg4_default_intra_matrix);
|
||||
block_permute(ff_mpeg4_default_non_intra_matrix);
|
||||
}
|
||||
|
||||
build_zigzag_end();
|
||||
for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
|
||||
}
|
||||
|
||||
/* remove any non bit exact operation (testing purpose) */
|
||||
|
@ -34,12 +34,9 @@ void j_rev_dct (DCTELEM *data);
|
||||
void ff_fdct_mmx(DCTELEM *block);
|
||||
|
||||
/* encoding scans */
|
||||
extern UINT8 ff_alternate_horizontal_scan[64];
|
||||
extern UINT8 ff_alternate_vertical_scan[64];
|
||||
extern UINT8 zigzag_direct[64];
|
||||
|
||||
/* permutation table */
|
||||
extern UINT8 permutation[64];
|
||||
extern const UINT8 ff_alternate_horizontal_scan[64];
|
||||
extern const UINT8 ff_alternate_vertical_scan[64];
|
||||
extern const UINT8 ff_zigzag_direct[64];
|
||||
|
||||
/* pixel operations */
|
||||
#define MAX_NEG_CROP 384
|
||||
@ -61,9 +58,6 @@ i (michael) didnt check them, these are just the alignents which i think could b
|
||||
*/
|
||||
|
||||
/* pixel ops : interface with DCT */
|
||||
extern void (*ff_idct)(DCTELEM *block/*align 16*/);
|
||||
extern void (*ff_idct_put)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
|
||||
extern void (*ff_idct_add)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
|
||||
extern void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size);
|
||||
extern void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride);
|
||||
extern void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
|
||||
@ -119,12 +113,7 @@ int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx);
|
||||
int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx);
|
||||
int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx);
|
||||
|
||||
static inline int block_permute_op(int j)
|
||||
{
|
||||
return permutation[j];
|
||||
}
|
||||
|
||||
void block_permute(INT16 *block);
|
||||
void block_permute(INT16 *block, UINT8 *permutation);
|
||||
|
||||
#if defined(HAVE_MMX)
|
||||
|
||||
|
@ -287,19 +287,19 @@ static inline int decide_ac_pred(MpegEncContext * s, DCTELEM block[6][64], int d
|
||||
if(s->mb_y==0 || s->qscale == s->qscale_table[xy] || n==2 || n==3){
|
||||
/* same qscale */
|
||||
for(i=1; i<8; i++){
|
||||
const int level= block[n][block_permute_op(i )];
|
||||
const int level= block[n][s->idct_permutation[i ]];
|
||||
score0+= ABS(level);
|
||||
score1+= ABS(level - ac_val[i+8]);
|
||||
ac_val1[i ]= block[n][block_permute_op(i<<3)];
|
||||
ac_val1[i ]= block[n][s->idct_permutation[i<<3]];
|
||||
ac_val1[i+8]= level;
|
||||
}
|
||||
}else{
|
||||
/* different qscale, we must rescale */
|
||||
for(i=1; i<8; i++){
|
||||
const int level= block[n][block_permute_op(i )];
|
||||
const int level= block[n][s->idct_permutation[i ]];
|
||||
score0+= ABS(level);
|
||||
score1+= ABS(level - ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale));
|
||||
ac_val1[i ]= block[n][block_permute_op(i<<3)];
|
||||
ac_val1[i ]= block[n][s->idct_permutation[i<<3]];
|
||||
ac_val1[i+8]= level;
|
||||
}
|
||||
}
|
||||
@ -310,20 +310,20 @@ static inline int decide_ac_pred(MpegEncContext * s, DCTELEM block[6][64], int d
|
||||
if(s->mb_x==0 || s->qscale == s->qscale_table[xy] || n==1 || n==3){
|
||||
/* same qscale */
|
||||
for(i=1; i<8; i++){
|
||||
const int level= block[n][block_permute_op(i<<3)];
|
||||
const int level= block[n][s->idct_permutation[i<<3]];
|
||||
score0+= ABS(level);
|
||||
score1+= ABS(level - ac_val[i]);
|
||||
ac_val1[i ]= level;
|
||||
ac_val1[i+8]= block[n][block_permute_op(i )];
|
||||
ac_val1[i+8]= block[n][s->idct_permutation[i ]];
|
||||
}
|
||||
}else{
|
||||
/* different qscale, we must rescale */
|
||||
for(i=1; i<8; i++){
|
||||
const int level= block[n][block_permute_op(i<<3)];
|
||||
const int level= block[n][s->idct_permutation[i<<3]];
|
||||
score0+= ABS(level);
|
||||
score1+= ABS(level - ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale));
|
||||
ac_val1[i ]= level;
|
||||
ac_val1[i+8]= block[n][block_permute_op(i )];
|
||||
ac_val1[i+8]= block[n][s->idct_permutation[i ]];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -519,7 +519,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
|
||||
|
||||
/* encode each block */
|
||||
for (i = 0; i < 6; i++) {
|
||||
mpeg4_encode_block(s, block[i], i, 0, zigzag_direct, NULL, &s->pb);
|
||||
mpeg4_encode_block(s, block[i], i, 0, s->intra_scantable.permutated, NULL, &s->pb);
|
||||
}
|
||||
|
||||
if(interleaved_stats){
|
||||
@ -637,7 +637,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
|
||||
|
||||
/* encode each block */
|
||||
for (i = 0; i < 6; i++) {
|
||||
mpeg4_encode_block(s, block[i], i, 0, zigzag_direct, NULL, tex_pb);
|
||||
mpeg4_encode_block(s, block[i], i, 0, s->intra_scantable.permutated, NULL, tex_pb);
|
||||
}
|
||||
|
||||
if(interleaved_stats){
|
||||
@ -674,8 +674,8 @@ void mpeg4_encode_mb(MpegEncContext * s,
|
||||
int last_index;
|
||||
|
||||
mpeg4_inv_pred_ac(s, block[i], i, dir[i]);
|
||||
if (dir[i]==0) st = ff_alternate_vertical_scan; /* left */
|
||||
else st = ff_alternate_horizontal_scan; /* top */
|
||||
if (dir[i]==0) st = s->intra_v_scantable.permutated; /* left */
|
||||
else st = s->intra_h_scantable.permutated; /* top */
|
||||
|
||||
for(last_index=63; last_index>=0; last_index--) //FIXME optimize
|
||||
if(block[i][st[last_index]]) break;
|
||||
@ -685,7 +685,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
|
||||
}
|
||||
}else{
|
||||
for(i=0; i<6; i++)
|
||||
scan_table[i]= zigzag_direct;
|
||||
scan_table[i]= s->intra_scantable.permutated;
|
||||
}
|
||||
|
||||
/* compute cbp */
|
||||
@ -746,10 +746,10 @@ void mpeg4_encode_mb(MpegEncContext * s,
|
||||
|
||||
if(dir[i]){
|
||||
for(j=1; j<8; j++)
|
||||
block[i][block_permute_op(j )]= ac_val[j+8];
|
||||
block[i][s->idct_permutation[j ]]= ac_val[j+8];
|
||||
}else{
|
||||
for(j=1; j<8; j++)
|
||||
block[i][block_permute_op(j<<3)]= ac_val[j ];
|
||||
block[i][s->idct_permutation[j<<3]]= ac_val[j ];
|
||||
}
|
||||
s->block_last_index[i]= zigzag_last_index[i];
|
||||
}
|
||||
@ -974,7 +974,7 @@ void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n)
|
||||
if (a != 1024) {
|
||||
ac_val -= 16;
|
||||
for(i=1;i<8;i++) {
|
||||
block[block_permute_op(i*8)] += ac_val[i];
|
||||
block[s->idct_permutation[i<<3]] += ac_val[i];
|
||||
}
|
||||
pred_dc = a;
|
||||
}
|
||||
@ -983,7 +983,7 @@ void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n)
|
||||
if (c != 1024) {
|
||||
ac_val -= 16 * wrap;
|
||||
for(i=1;i<8;i++) {
|
||||
block[block_permute_op(i)] += ac_val[i + 8];
|
||||
block[s->idct_permutation[i ]] += ac_val[i + 8];
|
||||
}
|
||||
pred_dc = c;
|
||||
}
|
||||
@ -1011,10 +1011,10 @@ void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n)
|
||||
|
||||
/* left copy */
|
||||
for(i=1;i<8;i++)
|
||||
ac_val1[i] = block[block_permute_op(i * 8)];
|
||||
ac_val1[i ] = block[s->idct_permutation[i<<3]];
|
||||
/* top copy */
|
||||
for(i=1;i<8;i++)
|
||||
ac_val1[8 + i] = block[block_permute_op(i)];
|
||||
ac_val1[8 + i] = block[s->idct_permutation[i ]];
|
||||
}
|
||||
|
||||
INT16 *h263_pred_motion(MpegEncContext * s, int block,
|
||||
@ -1425,7 +1425,7 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
|
||||
last_index = s->block_last_index[n];
|
||||
last_non_zero = i - 1;
|
||||
for (; i <= last_index; i++) {
|
||||
j = zigzag_direct[i];
|
||||
j = s->intra_scantable.permutated[i];
|
||||
level = block[j];
|
||||
if (level) {
|
||||
run = i - last_non_zero - 1;
|
||||
@ -1710,12 +1710,12 @@ void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n,
|
||||
if(s->mb_x==0 || s->qscale == s->qscale_table[xy] || n==1 || n==3){
|
||||
/* same qscale */
|
||||
for(i=1;i<8;i++) {
|
||||
block[block_permute_op(i*8)] += ac_val[i];
|
||||
block[s->idct_permutation[i<<3]] += ac_val[i];
|
||||
}
|
||||
}else{
|
||||
/* different qscale, we must rescale */
|
||||
for(i=1;i<8;i++) {
|
||||
block[block_permute_op(i*8)] += ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale);
|
||||
block[s->idct_permutation[i<<3]] += ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -1726,23 +1726,23 @@ void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n,
|
||||
if(s->mb_y==0 || s->qscale == s->qscale_table[xy] || n==2 || n==3){
|
||||
/* same qscale */
|
||||
for(i=1;i<8;i++) {
|
||||
block[block_permute_op(i)] += ac_val[i + 8];
|
||||
block[s->idct_permutation[i]] += ac_val[i + 8];
|
||||
}
|
||||
}else{
|
||||
/* different qscale, we must rescale */
|
||||
for(i=1;i<8;i++) {
|
||||
block[block_permute_op(i)] += ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale);
|
||||
block[s->idct_permutation[i]] += ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* left copy */
|
||||
for(i=1;i<8;i++)
|
||||
ac_val1[i] = block[block_permute_op(i * 8)];
|
||||
ac_val1[i ] = block[s->idct_permutation[i<<3]];
|
||||
|
||||
/* top copy */
|
||||
for(i=1;i<8;i++)
|
||||
ac_val1[8 + i] = block[block_permute_op(i)];
|
||||
ac_val1[8 + i] = block[s->idct_permutation[i ]];
|
||||
|
||||
}
|
||||
|
||||
@ -1762,12 +1762,12 @@ static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n,
|
||||
if(s->mb_x==0 || s->qscale == s->qscale_table[xy] || n==1 || n==3){
|
||||
/* same qscale */
|
||||
for(i=1;i<8;i++) {
|
||||
block[block_permute_op(i*8)] -= ac_val[i];
|
||||
block[s->idct_permutation[i<<3]] -= ac_val[i];
|
||||
}
|
||||
}else{
|
||||
/* different qscale, we must rescale */
|
||||
for(i=1;i<8;i++) {
|
||||
block[block_permute_op(i*8)] -= ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale);
|
||||
block[s->idct_permutation[i<<3]] -= ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -1777,12 +1777,12 @@ static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n,
|
||||
if(s->mb_y==0 || s->qscale == s->qscale_table[xy] || n==2 || n==3){
|
||||
/* same qscale */
|
||||
for(i=1;i<8;i++) {
|
||||
block[block_permute_op(i)] -= ac_val[i + 8];
|
||||
block[s->idct_permutation[i]] -= ac_val[i + 8];
|
||||
}
|
||||
}else{
|
||||
/* different qscale, we must rescale */
|
||||
for(i=1;i<8;i++) {
|
||||
block[block_permute_op(i)] -= ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale);
|
||||
block[s->idct_permutation[i]] -= ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3192,13 +3192,13 @@ intra:
|
||||
static int h263_decode_motion(MpegEncContext * s, int pred, int f_code)
|
||||
{
|
||||
int code, val, sign, shift, l;
|
||||
|
||||
code = get_vlc2(&s->gb, mv_vlc.table, MV_VLC_BITS, 2);
|
||||
if (code < 0)
|
||||
return 0xffff;
|
||||
|
||||
if (code == 0)
|
||||
return pred;
|
||||
|
||||
sign = get_bits1(&s->gb);
|
||||
shift = f_code - 1;
|
||||
val = (code - 1) << shift;
|
||||
@ -3211,7 +3211,7 @@ static int h263_decode_motion(MpegEncContext * s, int pred, int f_code)
|
||||
|
||||
/* modulo decoding */
|
||||
if (!s->h263_long_vectors) {
|
||||
l = (1 << (f_code - 1)) * 32;
|
||||
l = 1 << (f_code + 4);
|
||||
if (val < -l) {
|
||||
val += l<<1;
|
||||
} else if (val >= l) {
|
||||
@ -3261,15 +3261,15 @@ static int h263_decode_block(MpegEncContext * s, DCTELEM * block,
|
||||
RLTable *rl = &rl_inter;
|
||||
const UINT8 *scan_table;
|
||||
|
||||
scan_table = zigzag_direct;
|
||||
scan_table = s->intra_scantable.permutated;
|
||||
if (s->h263_aic && s->mb_intra) {
|
||||
rl = &rl_intra_aic;
|
||||
i = 0;
|
||||
if (s->ac_pred) {
|
||||
if (s->h263_aic_dir)
|
||||
scan_table = ff_alternate_vertical_scan; /* left */
|
||||
scan_table = s->intra_v_scantable.permutated; /* left */
|
||||
else
|
||||
scan_table = ff_alternate_horizontal_scan; /* top */
|
||||
scan_table = s->intra_h_scantable.permutated; /* top */
|
||||
}
|
||||
} else if (s->mb_intra) {
|
||||
/* DC coef */
|
||||
@ -3417,14 +3417,14 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
|
||||
rl = &rl_intra;
|
||||
rl_vlc = rl_intra.rl_vlc[0];
|
||||
if(s->alternate_scan)
|
||||
scan_table = ff_alternate_vertical_scan; /* left */
|
||||
scan_table = s->intra_v_scantable.permutated; /* left */
|
||||
else if (s->ac_pred) {
|
||||
if (dc_pred_dir == 0)
|
||||
scan_table = ff_alternate_vertical_scan; /* left */
|
||||
scan_table = s->intra_v_scantable.permutated; /* left */
|
||||
else
|
||||
scan_table = ff_alternate_horizontal_scan; /* top */
|
||||
scan_table = s->intra_h_scantable.permutated; /* top */
|
||||
} else {
|
||||
scan_table = zigzag_direct;
|
||||
scan_table = s->intra_scantable.permutated;
|
||||
}
|
||||
qmul=1;
|
||||
qadd=0;
|
||||
@ -3437,9 +3437,9 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
|
||||
rl = &rl_inter;
|
||||
|
||||
if(s->alternate_scan)
|
||||
scan_table = ff_alternate_vertical_scan; /* left */
|
||||
scan_table = s->intra_v_scantable.permutated; /* left */
|
||||
else
|
||||
scan_table = zigzag_direct;
|
||||
scan_table = s->intra_scantable.permutated;
|
||||
|
||||
if(s->mpeg_quant){
|
||||
qmul=1;
|
||||
@ -4081,13 +4081,14 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
|
||||
|
||||
/* load default matrixes */
|
||||
for(i=0; i<64; i++){
|
||||
int j= s->idct_permutation[i];
|
||||
v= ff_mpeg4_default_intra_matrix[i];
|
||||
s->intra_matrix[i]= v;
|
||||
s->chroma_intra_matrix[i]= v;
|
||||
s->intra_matrix[j]= v;
|
||||
s->chroma_intra_matrix[j]= v;
|
||||
|
||||
v= ff_mpeg4_default_non_intra_matrix[i];
|
||||
s->inter_matrix[i]= v;
|
||||
s->chroma_inter_matrix[i]= v;
|
||||
s->inter_matrix[j]= v;
|
||||
s->chroma_inter_matrix[j]= v;
|
||||
}
|
||||
|
||||
/* load custom intra matrix */
|
||||
@ -4096,7 +4097,7 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
|
||||
v= get_bits(&s->gb, 8);
|
||||
if(v==0) break;
|
||||
|
||||
j= zigzag_direct[i];
|
||||
j= s->intra_scantable.permutated[i];
|
||||
s->intra_matrix[j]= v;
|
||||
s->chroma_intra_matrix[j]= v;
|
||||
}
|
||||
@ -4108,14 +4109,14 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
|
||||
v= get_bits(&s->gb, 8);
|
||||
if(v==0) break;
|
||||
|
||||
j= zigzag_direct[i];
|
||||
j= s->intra_scantable.permutated[i];
|
||||
s->inter_matrix[j]= v;
|
||||
s->chroma_inter_matrix[j]= v;
|
||||
}
|
||||
|
||||
/* replicate last value */
|
||||
for(; i<64; i++){
|
||||
j= zigzag_direct[i];
|
||||
j= s->intra_scantable.permutated[i];
|
||||
s->inter_matrix[j]= v;
|
||||
s->chroma_inter_matrix[j]= v;
|
||||
}
|
||||
|
@ -20,7 +20,6 @@
|
||||
*/
|
||||
|
||||
#include "../dsputil.h"
|
||||
#include "../simple_idct.h"
|
||||
|
||||
int mm_flags; /* multimedia extension flags */
|
||||
|
||||
@ -44,10 +43,6 @@ int pix_abs8x8_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
|
||||
int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
|
||||
int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
|
||||
|
||||
/* external functions, from idct_mmx.c */
|
||||
void ff_mmx_idct(DCTELEM *block);
|
||||
void ff_mmxext_idct(DCTELEM *block);
|
||||
|
||||
/* pixel operations */
|
||||
static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL;
|
||||
static const uint64_t mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
|
||||
@ -588,17 +583,6 @@ void dsputil_init_mmx(void)
|
||||
avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow;
|
||||
avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
|
||||
}
|
||||
|
||||
/* idct */
|
||||
if (mm_flags & MM_MMXEXT) {
|
||||
ff_idct = ff_mmxext_idct;
|
||||
} else {
|
||||
ff_idct = ff_mmx_idct;
|
||||
}
|
||||
#ifdef SIMPLE_IDCT
|
||||
// ff_idct = simple_idct;
|
||||
ff_idct = simple_idct_mmx;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if 0
|
||||
@ -637,28 +621,6 @@ void dsputil_init_mmx(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
void gen_idct_put(UINT8 *dest, int line_size, DCTELEM *block);
|
||||
|
||||
/**
|
||||
* this will send coeff matrixes which would have different results for the 16383 type MMX vs C IDCTs to the C IDCT
|
||||
*/
|
||||
void bit_exact_idct_put(UINT8 *dest, int line_size, INT16 *block){
|
||||
if( block[0]>1022 && block[1]==0 && block[4 ]==0 && block[5 ]==0
|
||||
&& block[8]==0 && block[9]==0 && block[12]==0 && block[13]==0){
|
||||
int16_t tmp[64];
|
||||
int i;
|
||||
|
||||
for(i=0; i<64; i++)
|
||||
tmp[i]= block[i];
|
||||
for(i=0; i<64; i++)
|
||||
block[i]= tmp[block_permute_op(i)];
|
||||
|
||||
simple_idct_put(dest, line_size, block);
|
||||
}
|
||||
else
|
||||
gen_idct_put(dest, line_size, block);
|
||||
}
|
||||
|
||||
/* remove any non bit exact operation (testing purpose). NOTE that
|
||||
this function should be kept as small as possible because it is
|
||||
always difficult to test automatically non bit exact cases. */
|
||||
@ -682,9 +644,5 @@ void dsputil_set_bit_exact_mmx(void)
|
||||
pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
|
||||
pix_abs8x8_xy2= pix_abs8x8_xy2_mmx;
|
||||
}
|
||||
#ifdef SIMPLE_IDCT
|
||||
if(ff_idct_put==gen_idct_put && ff_idct == simple_idct_mmx)
|
||||
ff_idct_put= bit_exact_idct_put;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
@ -23,53 +23,24 @@
|
||||
#include "../dsputil.h"
|
||||
#include "../mpegvideo.h"
|
||||
#include "../avcodec.h"
|
||||
#include "../simple_idct.h"
|
||||
|
||||
extern UINT8 zigzag_end[64];
|
||||
/* Input permutation for the simple_idct_mmx */
|
||||
static UINT8 simple_mmx_permutation[64]={
|
||||
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
|
||||
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
|
||||
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
|
||||
0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
|
||||
0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
|
||||
0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
|
||||
0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
|
||||
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
|
||||
};
|
||||
|
||||
extern UINT8 zigzag_direct_noperm[64];
|
||||
extern UINT16 inv_zigzag_direct16[64];
|
||||
extern UINT32 inverse[256];
|
||||
|
||||
#if 0
|
||||
|
||||
/* XXX: GL: I don't understand why this function needs optimization
|
||||
(it is called only once per frame!), so I disabled it */
|
||||
|
||||
void MPV_frame_start(MpegEncContext *s)
|
||||
{
|
||||
if (s->pict_type == B_TYPE) {
|
||||
__asm __volatile(
|
||||
"movl (%1), %%eax\n\t"
|
||||
"movl 4(%1), %%edx\n\t"
|
||||
"movl 8(%1), %%ecx\n\t"
|
||||
"movl %%eax, (%0)\n\t"
|
||||
"movl %%edx, 4(%0)\n\t"
|
||||
"movl %%ecx, 8(%0)\n\t"
|
||||
:
|
||||
:"r"(s->current_picture), "r"(s->aux_picture)
|
||||
:"eax","edx","ecx","memory");
|
||||
} else {
|
||||
/* swap next and last */
|
||||
__asm __volatile(
|
||||
"movl (%1), %%eax\n\t"
|
||||
"movl 4(%1), %%edx\n\t"
|
||||
"movl 8(%1), %%ecx\n\t"
|
||||
"xchgl (%0), %%eax\n\t"
|
||||
"xchgl 4(%0), %%edx\n\t"
|
||||
"xchgl 8(%0), %%ecx\n\t"
|
||||
"movl %%eax, (%1)\n\t"
|
||||
"movl %%edx, 4(%1)\n\t"
|
||||
"movl %%ecx, 8(%1)\n\t"
|
||||
"movl %%eax, (%2)\n\t"
|
||||
"movl %%edx, 4(%2)\n\t"
|
||||
"movl %%ecx, 8(%2)\n\t"
|
||||
:
|
||||
:"r"(s->last_picture), "r"(s->next_picture), "r"(s->current_picture)
|
||||
:"eax","edx","ecx","memory");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL;
|
||||
static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
|
||||
|
||||
@ -77,36 +48,26 @@ static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x000
|
||||
static void dct_unquantize_h263_mmx(MpegEncContext *s,
|
||||
DCTELEM *block, int n, int qscale)
|
||||
{
|
||||
int i, level, qmul, qadd, nCoeffs;
|
||||
|
||||
qmul = s->qscale << 1;
|
||||
if (s->h263_aic && s->mb_intra)
|
||||
qadd = 0;
|
||||
else
|
||||
qadd = (s->qscale - 1) | 1;
|
||||
int level, qmul, qadd, nCoeffs;
|
||||
|
||||
qmul = qscale << 1;
|
||||
qadd = (qscale - 1) | 1;
|
||||
|
||||
assert(s->block_last_index[n]>=0);
|
||||
|
||||
if (s->mb_intra) {
|
||||
if (!s->h263_aic) {
|
||||
if (n < 4)
|
||||
block[0] = block[0] * s->y_dc_scale;
|
||||
level = block[0] * s->y_dc_scale;
|
||||
else
|
||||
block[0] = block[0] * s->c_dc_scale;
|
||||
level = block[0] * s->c_dc_scale;
|
||||
}else{
|
||||
qadd = 0;
|
||||
level= block[0];
|
||||
}
|
||||
for(i=1; i<8; i++) {
|
||||
level = block[i];
|
||||
if (level) {
|
||||
if (level < 0) {
|
||||
level = level * qmul - qadd;
|
||||
} else {
|
||||
level = level * qmul + qadd;
|
||||
}
|
||||
block[i] = level;
|
||||
}
|
||||
}
|
||||
nCoeffs=64;
|
||||
nCoeffs=63;
|
||||
} else {
|
||||
i = 0;
|
||||
nCoeffs= zigzag_end[ s->block_last_index[n] ];
|
||||
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
|
||||
}
|
||||
//printf("%d %d ", qmul, qadd);
|
||||
asm volatile(
|
||||
@ -152,10 +113,12 @@ asm volatile(
|
||||
"movq %%mm1, 8(%0, %3) \n\t"
|
||||
|
||||
"addl $16, %3 \n\t"
|
||||
"js 1b \n\t"
|
||||
::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(i-nCoeffs))
|
||||
"jng 1b \n\t"
|
||||
::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
|
||||
: "memory"
|
||||
);
|
||||
if(s->mb_intra)
|
||||
block[0]= level;
|
||||
}
|
||||
|
||||
|
||||
@ -193,9 +156,10 @@ static void dct_unquantize_mpeg1_mmx(MpegEncContext *s,
|
||||
{
|
||||
int nCoeffs;
|
||||
const UINT16 *quant_matrix;
|
||||
|
||||
if(s->alternate_scan) nCoeffs= 64;
|
||||
else nCoeffs= zigzag_end[ s->block_last_index[n] ];
|
||||
|
||||
assert(s->block_last_index[n]>=0);
|
||||
|
||||
nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
|
||||
|
||||
if (s->mb_intra) {
|
||||
int block0;
|
||||
@ -312,6 +276,7 @@ asm volatile(
|
||||
: "%eax", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void dct_unquantize_mpeg2_mmx(MpegEncContext *s,
|
||||
@ -320,8 +285,10 @@ static void dct_unquantize_mpeg2_mmx(MpegEncContext *s,
|
||||
int nCoeffs;
|
||||
const UINT16 *quant_matrix;
|
||||
|
||||
if(s->alternate_scan) nCoeffs= 64;
|
||||
else nCoeffs= zigzag_end[ s->block_last_index[n] ];
|
||||
assert(s->block_last_index[n]>=0);
|
||||
|
||||
if(s->alternate_scan) nCoeffs= 63; //FIXME
|
||||
else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
|
||||
|
||||
if (s->mb_intra) {
|
||||
int block0;
|
||||
@ -371,7 +338,7 @@ asm volatile(
|
||||
"movq %%mm5, 8(%0, %%eax) \n\t"
|
||||
|
||||
"addl $16, %%eax \n\t"
|
||||
"js 1b \n\t"
|
||||
"jng 1b \n\t"
|
||||
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
|
||||
: "%eax", "memory"
|
||||
);
|
||||
@ -427,7 +394,7 @@ asm volatile(
|
||||
"movq %%mm5, 8(%0, %%eax) \n\t"
|
||||
|
||||
"addl $16, %%eax \n\t"
|
||||
"js 1b \n\t"
|
||||
"jng 1b \n\t"
|
||||
"movd 124(%0, %3), %%mm0 \n\t"
|
||||
"movq %%mm7, %%mm6 \n\t"
|
||||
"psrlq $32, %%mm7 \n\t"
|
||||
@ -534,12 +501,6 @@ static void draw_edges_mmx(UINT8 *buf, int wrap, int width, int height, int w)
|
||||
}
|
||||
}
|
||||
|
||||
static volatile int esp_temp;
|
||||
|
||||
void unused_var_warning_killer(){
|
||||
esp_temp++;
|
||||
}
|
||||
|
||||
#undef HAVE_MMX2
|
||||
#define RENAME(a) a ## _MMX
|
||||
#include "mpegvideo_mmx_template.c"
|
||||
@ -549,10 +510,40 @@ void unused_var_warning_killer(){
|
||||
#define RENAME(a) a ## _MMX2
|
||||
#include "mpegvideo_mmx_template.c"
|
||||
|
||||
/* external functions, from idct_mmx.c */
|
||||
void ff_mmx_idct(DCTELEM *block);
|
||||
void ff_mmxext_idct(DCTELEM *block);
|
||||
|
||||
/* XXX: those functions should be suppressed ASAP when all IDCTs are
|
||||
converted */
|
||||
static void ff_libmpeg2mmx_idct_put(UINT8 *dest, int line_size, DCTELEM *block)
|
||||
{
|
||||
ff_mmx_idct (block);
|
||||
put_pixels_clamped(block, dest, line_size);
|
||||
}
|
||||
static void ff_libmpeg2mmx_idct_add(UINT8 *dest, int line_size, DCTELEM *block)
|
||||
{
|
||||
ff_mmx_idct (block);
|
||||
add_pixels_clamped(block, dest, line_size);
|
||||
}
|
||||
static void ff_libmpeg2mmx2_idct_put(UINT8 *dest, int line_size, DCTELEM *block)
|
||||
{
|
||||
ff_mmxext_idct (block);
|
||||
put_pixels_clamped(block, dest, line_size);
|
||||
}
|
||||
static void ff_libmpeg2mmx2_idct_add(UINT8 *dest, int line_size, DCTELEM *block)
|
||||
{
|
||||
ff_mmxext_idct (block);
|
||||
add_pixels_clamped(block, dest, line_size);
|
||||
}
|
||||
|
||||
void MPV_common_init_mmx(MpegEncContext *s)
|
||||
{
|
||||
if (mm_flags & MM_MMX) {
|
||||
const int dct_algo= s->avctx->dct_algo;
|
||||
int i;
|
||||
const int dct_algo = s->avctx->dct_algo;
|
||||
const int idct_algo= s->avctx->idct_algo;
|
||||
|
||||
s->dct_unquantize_h263 = dct_unquantize_h263_mmx;
|
||||
s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_mmx;
|
||||
s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_mmx;
|
||||
@ -568,5 +559,22 @@ void MPV_common_init_mmx(MpegEncContext *s)
|
||||
s->dct_quantize= dct_quantize_MMX;
|
||||
}
|
||||
}
|
||||
|
||||
if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){
|
||||
s->idct_put= ff_simple_idct_put_mmx;
|
||||
s->idct_add= ff_simple_idct_add_mmx;
|
||||
for(i=0; i<64; i++)
|
||||
s->idct_permutation[i]= simple_mmx_permutation[i];
|
||||
}else if(idct_algo==FF_IDCT_LIBMPEG2MMX){
|
||||
if(mm_flags & MM_MMXEXT){
|
||||
s->idct_put= ff_libmpeg2mmx2_idct_put;
|
||||
s->idct_add= ff_libmpeg2mmx2_idct_add;
|
||||
}else{
|
||||
s->idct_put= ff_libmpeg2mmx_idct_put;
|
||||
s->idct_add= ff_libmpeg2mmx_idct_add;
|
||||
}
|
||||
for(i=0; i<64; i++)
|
||||
s->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -189,31 +189,143 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
|
||||
);
|
||||
}
|
||||
|
||||
if(s->mb_intra) temp_block[0]= level; //FIXME move afer permute
|
||||
|
||||
// last_non_zero_p1=64;
|
||||
/* permute for IDCT */
|
||||
asm volatile(
|
||||
"movl %0, %%eax \n\t"
|
||||
"pushl %%ebp \n\t"
|
||||
"movl %%esp, " MANGLE(esp_temp) "\n\t"
|
||||
"1: \n\t"
|
||||
"movzbl (%1, %%eax), %%ebx \n\t"
|
||||
"movzbl 1(%1, %%eax), %%ebp \n\t"
|
||||
"movw (%2, %%ebx, 2), %%cx \n\t"
|
||||
"movw (%2, %%ebp, 2), %%sp \n\t"
|
||||
"movzbl " MANGLE(permutation) "(%%ebx), %%ebx\n\t"
|
||||
"movzbl " MANGLE(permutation) "(%%ebp), %%ebp\n\t"
|
||||
"movw %%cx, (%3, %%ebx, 2) \n\t"
|
||||
"movw %%sp, (%3, %%ebp, 2) \n\t"
|
||||
"addl $2, %%eax \n\t"
|
||||
" js 1b \n\t"
|
||||
"movl " MANGLE(esp_temp) ", %%esp\n\t"
|
||||
"popl %%ebp \n\t"
|
||||
:
|
||||
: "g" (-last_non_zero_p1), "d" (zigzag_direct_noperm+last_non_zero_p1), "S" (temp_block), "D" (block)
|
||||
: "%eax", "%ebx", "%ecx"
|
||||
);
|
||||
if(s->mb_intra) block[0]= level;
|
||||
else block[0]= temp_block[0];
|
||||
|
||||
if(s->idct_permutation[1]==8){
|
||||
if(last_non_zero_p1 <= 1) goto end;
|
||||
block[0x08] = temp_block[0x01]; block[0x10] = temp_block[0x08];
|
||||
block[0x20] = temp_block[0x10];
|
||||
if(last_non_zero_p1 <= 4) goto end;
|
||||
block[0x18] = temp_block[0x09]; block[0x04] = temp_block[0x02];
|
||||
block[0x09] = temp_block[0x03];
|
||||
if(last_non_zero_p1 <= 7) goto end;
|
||||
block[0x14] = temp_block[0x0A]; block[0x28] = temp_block[0x11];
|
||||
block[0x12] = temp_block[0x18]; block[0x02] = temp_block[0x20];
|
||||
if(last_non_zero_p1 <= 11) goto end;
|
||||
block[0x1A] = temp_block[0x19]; block[0x24] = temp_block[0x12];
|
||||
block[0x19] = temp_block[0x0B]; block[0x01] = temp_block[0x04];
|
||||
block[0x0C] = temp_block[0x05];
|
||||
if(last_non_zero_p1 <= 16) goto end;
|
||||
block[0x11] = temp_block[0x0C]; block[0x29] = temp_block[0x13];
|
||||
block[0x16] = temp_block[0x1A]; block[0x0A] = temp_block[0x21];
|
||||
block[0x30] = temp_block[0x28]; block[0x22] = temp_block[0x30];
|
||||
block[0x38] = temp_block[0x29]; block[0x06] = temp_block[0x22];
|
||||
if(last_non_zero_p1 <= 24) goto end;
|
||||
block[0x1B] = temp_block[0x1B]; block[0x21] = temp_block[0x14];
|
||||
block[0x1C] = temp_block[0x0D]; block[0x05] = temp_block[0x06];
|
||||
block[0x0D] = temp_block[0x07]; block[0x15] = temp_block[0x0E];
|
||||
block[0x2C] = temp_block[0x15]; block[0x13] = temp_block[0x1C];
|
||||
if(last_non_zero_p1 <= 32) goto end;
|
||||
block[0x0B] = temp_block[0x23]; block[0x34] = temp_block[0x2A];
|
||||
block[0x2A] = temp_block[0x31]; block[0x32] = temp_block[0x38];
|
||||
block[0x3A] = temp_block[0x39]; block[0x26] = temp_block[0x32];
|
||||
block[0x39] = temp_block[0x2B]; block[0x03] = temp_block[0x24];
|
||||
if(last_non_zero_p1 <= 40) goto end;
|
||||
block[0x1E] = temp_block[0x1D]; block[0x25] = temp_block[0x16];
|
||||
block[0x1D] = temp_block[0x0F]; block[0x2D] = temp_block[0x17];
|
||||
block[0x17] = temp_block[0x1E]; block[0x0E] = temp_block[0x25];
|
||||
block[0x31] = temp_block[0x2C]; block[0x2B] = temp_block[0x33];
|
||||
if(last_non_zero_p1 <= 48) goto end;
|
||||
block[0x36] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B];
|
||||
block[0x23] = temp_block[0x34]; block[0x3C] = temp_block[0x2D];
|
||||
block[0x07] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
|
||||
block[0x0F] = temp_block[0x27]; block[0x35] = temp_block[0x2E];
|
||||
if(last_non_zero_p1 <= 56) goto end;
|
||||
block[0x2E] = temp_block[0x35]; block[0x33] = temp_block[0x3C];
|
||||
block[0x3E] = temp_block[0x3D]; block[0x27] = temp_block[0x36];
|
||||
block[0x3D] = temp_block[0x2F]; block[0x2F] = temp_block[0x37];
|
||||
block[0x37] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
|
||||
}else if(s->idct_permutation[1]==4){
|
||||
if(last_non_zero_p1 <= 1) goto end;
|
||||
block[0x04] = temp_block[0x01];
|
||||
block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
|
||||
if(last_non_zero_p1 <= 4) goto end;
|
||||
block[0x0C] = temp_block[0x09]; block[0x01] = temp_block[0x02];
|
||||
block[0x05] = temp_block[0x03];
|
||||
if(last_non_zero_p1 <= 7) goto end;
|
||||
block[0x09] = temp_block[0x0A]; block[0x14] = temp_block[0x11];
|
||||
block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20];
|
||||
if(last_non_zero_p1 <= 11) goto end;
|
||||
block[0x1C] = temp_block[0x19];
|
||||
block[0x11] = temp_block[0x12]; block[0x0D] = temp_block[0x0B];
|
||||
block[0x02] = temp_block[0x04]; block[0x06] = temp_block[0x05];
|
||||
if(last_non_zero_p1 <= 16) goto end;
|
||||
block[0x0A] = temp_block[0x0C]; block[0x15] = temp_block[0x13];
|
||||
block[0x19] = temp_block[0x1A]; block[0x24] = temp_block[0x21];
|
||||
block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30];
|
||||
block[0x2C] = temp_block[0x29]; block[0x21] = temp_block[0x22];
|
||||
if(last_non_zero_p1 <= 24) goto end;
|
||||
block[0x1D] = temp_block[0x1B]; block[0x12] = temp_block[0x14];
|
||||
block[0x0E] = temp_block[0x0D]; block[0x03] = temp_block[0x06];
|
||||
block[0x07] = temp_block[0x07]; block[0x0B] = temp_block[0x0E];
|
||||
block[0x16] = temp_block[0x15]; block[0x1A] = temp_block[0x1C];
|
||||
if(last_non_zero_p1 <= 32) goto end;
|
||||
block[0x25] = temp_block[0x23]; block[0x29] = temp_block[0x2A];
|
||||
block[0x34] = temp_block[0x31]; block[0x38] = temp_block[0x38];
|
||||
block[0x3C] = temp_block[0x39]; block[0x31] = temp_block[0x32];
|
||||
block[0x2D] = temp_block[0x2B]; block[0x22] = temp_block[0x24];
|
||||
if(last_non_zero_p1 <= 40) goto end;
|
||||
block[0x1E] = temp_block[0x1D]; block[0x13] = temp_block[0x16];
|
||||
block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17];
|
||||
block[0x1B] = temp_block[0x1E]; block[0x26] = temp_block[0x25];
|
||||
block[0x2A] = temp_block[0x2C]; block[0x35] = temp_block[0x33];
|
||||
if(last_non_zero_p1 <= 48) goto end;
|
||||
block[0x39] = temp_block[0x3A]; block[0x3D] = temp_block[0x3B];
|
||||
block[0x32] = temp_block[0x34]; block[0x2E] = temp_block[0x2D];
|
||||
block[0x23] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
|
||||
block[0x27] = temp_block[0x27]; block[0x2B] = temp_block[0x2E];
|
||||
if(last_non_zero_p1 <= 56) goto end;
|
||||
block[0x36] = temp_block[0x35]; block[0x3A] = temp_block[0x3C];
|
||||
block[0x3E] = temp_block[0x3D]; block[0x33] = temp_block[0x36];
|
||||
block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37];
|
||||
block[0x3B] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
|
||||
}else{
|
||||
if(last_non_zero_p1 <= 1) goto end;
|
||||
block[0x01] = temp_block[0x01];
|
||||
block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
|
||||
if(last_non_zero_p1 <= 4) goto end;
|
||||
block[0x09] = temp_block[0x09]; block[0x02] = temp_block[0x02];
|
||||
block[0x03] = temp_block[0x03];
|
||||
if(last_non_zero_p1 <= 7) goto end;
|
||||
block[0x0A] = temp_block[0x0A]; block[0x11] = temp_block[0x11];
|
||||
block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20];
|
||||
if(last_non_zero_p1 <= 11) goto end;
|
||||
block[0x19] = temp_block[0x19];
|
||||
block[0x12] = temp_block[0x12]; block[0x0B] = temp_block[0x0B];
|
||||
block[0x04] = temp_block[0x04]; block[0x05] = temp_block[0x05];
|
||||
if(last_non_zero_p1 <= 16) goto end;
|
||||
block[0x0C] = temp_block[0x0C]; block[0x13] = temp_block[0x13];
|
||||
block[0x1A] = temp_block[0x1A]; block[0x21] = temp_block[0x21];
|
||||
block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30];
|
||||
block[0x29] = temp_block[0x29]; block[0x22] = temp_block[0x22];
|
||||
if(last_non_zero_p1 <= 24) goto end;
|
||||
block[0x1B] = temp_block[0x1B]; block[0x14] = temp_block[0x14];
|
||||
block[0x0D] = temp_block[0x0D]; block[0x06] = temp_block[0x06];
|
||||
block[0x07] = temp_block[0x07]; block[0x0E] = temp_block[0x0E];
|
||||
block[0x15] = temp_block[0x15]; block[0x1C] = temp_block[0x1C];
|
||||
if(last_non_zero_p1 <= 32) goto end;
|
||||
block[0x23] = temp_block[0x23]; block[0x2A] = temp_block[0x2A];
|
||||
block[0x31] = temp_block[0x31]; block[0x38] = temp_block[0x38];
|
||||
block[0x39] = temp_block[0x39]; block[0x32] = temp_block[0x32];
|
||||
block[0x2B] = temp_block[0x2B]; block[0x24] = temp_block[0x24];
|
||||
if(last_non_zero_p1 <= 40) goto end;
|
||||
block[0x1D] = temp_block[0x1D]; block[0x16] = temp_block[0x16];
|
||||
block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17];
|
||||
block[0x1E] = temp_block[0x1E]; block[0x25] = temp_block[0x25];
|
||||
block[0x2C] = temp_block[0x2C]; block[0x33] = temp_block[0x33];
|
||||
if(last_non_zero_p1 <= 48) goto end;
|
||||
block[0x3A] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B];
|
||||
block[0x34] = temp_block[0x34]; block[0x2D] = temp_block[0x2D];
|
||||
block[0x26] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
|
||||
block[0x27] = temp_block[0x27]; block[0x2E] = temp_block[0x2E];
|
||||
if(last_non_zero_p1 <= 56) goto end;
|
||||
block[0x35] = temp_block[0x35]; block[0x3C] = temp_block[0x3C];
|
||||
block[0x3D] = temp_block[0x3D]; block[0x36] = temp_block[0x36];
|
||||
block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37];
|
||||
block[0x3E] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
|
||||
}
|
||||
end:
|
||||
/*
|
||||
for(i=0; i<last_non_zero_p1; i++)
|
||||
{
|
||||
@ -221,7 +333,6 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
|
||||
block[block_permute_op(j)]= temp_block[j];
|
||||
}
|
||||
*/
|
||||
//block_permute(block);
|
||||
|
||||
return last_non_zero_p1 - 1;
|
||||
}
|
||||
|
@ -1291,7 +1291,20 @@ Temp
|
||||
);
|
||||
}
|
||||
|
||||
void simple_idct_mmx(int16_t *block)
|
||||
void ff_simple_idct_mmx(int16_t *block)
|
||||
{
|
||||
idct(block);
|
||||
idct(block);
|
||||
}
|
||||
|
||||
//FIXME merge add/put into the idct
|
||||
|
||||
void ff_simple_idct_put_mmx(UINT8 *dest, int line_size, DCTELEM *block)
|
||||
{
|
||||
idct(block);
|
||||
put_pixels_clamped(block, dest, line_size);
|
||||
}
|
||||
void ff_simple_idct_add_mmx(UINT8 *dest, int line_size, DCTELEM *block)
|
||||
{
|
||||
idct(block);
|
||||
add_pixels_clamped(block, dest, line_size);
|
||||
}
|
||||
|
@ -322,14 +322,14 @@ static void jpeg_table_header(MpegEncContext *s)
|
||||
put_bits(p, 4, 0); /* 8 bit precision */
|
||||
put_bits(p, 4, 0); /* table 0 */
|
||||
for(i=0;i<64;i++) {
|
||||
j = zigzag_direct[i];
|
||||
j = s->intra_scantable.permutated[i];
|
||||
put_bits(p, 8, s->intra_matrix[j]);
|
||||
}
|
||||
#ifdef TWOMATRIXES
|
||||
put_bits(p, 4, 0); /* 8 bit precision */
|
||||
put_bits(p, 4, 1); /* table 1 */
|
||||
for(i=0;i<64;i++) {
|
||||
j = zigzag_direct[i];
|
||||
j = s->intra_scantable.permutated[i];
|
||||
put_bits(p, 8, s->chroma_intra_matrix[j]);
|
||||
}
|
||||
#endif
|
||||
@ -535,7 +535,7 @@ static void encode_block(MpegEncContext *s, DCTELEM *block, int n)
|
||||
run = 0;
|
||||
last_index = s->block_last_index[n];
|
||||
for(i=1;i<=last_index;i++) {
|
||||
j = zigzag_direct[i];
|
||||
j = s->intra_scantable.permutated[i];
|
||||
val = block[j];
|
||||
if (val == 0) {
|
||||
run++;
|
||||
@ -620,6 +620,8 @@ typedef struct MJpegDecodeContext {
|
||||
int restart_interval;
|
||||
int restart_count;
|
||||
int interleaved_rows;
|
||||
ScanTable scantable;
|
||||
void (*idct_put)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
|
||||
} MJpegDecodeContext;
|
||||
|
||||
#define SKIP_REMAINING(gb, len) { \
|
||||
@ -645,9 +647,23 @@ static void build_vlc(VLC *vlc, const UINT8 *bits_table, const UINT8 *val_table,
|
||||
static int mjpeg_decode_init(AVCodecContext *avctx)
|
||||
{
|
||||
MJpegDecodeContext *s = avctx->priv_data;
|
||||
MpegEncContext s2;
|
||||
|
||||
s->avctx = avctx;
|
||||
|
||||
/* ugly way to get the idct & scantable */
|
||||
memset(&s2, 0, sizeof(MpegEncContext));
|
||||
s2.flags= avctx->flags;
|
||||
s2.avctx= avctx;
|
||||
// s2->out_format = FMT_MJPEG;
|
||||
s2.width = 8;
|
||||
s2.height = 8;
|
||||
if (MPV_common_init(&s2) < 0)
|
||||
return -1;
|
||||
s->scantable= s2.intra_scantable;
|
||||
s->idct_put= s2.idct_put;
|
||||
MPV_common_end(&s2);
|
||||
|
||||
s->header_state = 0;
|
||||
s->mpeg_enc_ctx_allocated = 0;
|
||||
s->buffer_size = PICTURE_BUFFER_SIZE - 1; /* minus 1 to take into
|
||||
@ -657,7 +673,7 @@ static int mjpeg_decode_init(AVCodecContext *avctx)
|
||||
s->first_picture = 1;
|
||||
s->org_width = avctx->width;
|
||||
s->org_height = avctx->height;
|
||||
|
||||
|
||||
build_vlc(&s->vlcs[0][0], bits_dc_luminance, val_dc_luminance, 12);
|
||||
build_vlc(&s->vlcs[0][1], bits_dc_chrominance, val_dc_chrominance, 12);
|
||||
build_vlc(&s->vlcs[1][0], bits_ac_luminance, val_ac_luminance, 251);
|
||||
@ -694,7 +710,7 @@ static int mjpeg_decode_dqt(MJpegDecodeContext *s,
|
||||
dprintf("index=%d\n", index);
|
||||
/* read quant table */
|
||||
for(i=0;i<64;i++) {
|
||||
j = zigzag_direct[i];
|
||||
j = s->scantable.permutated[i];
|
||||
s->quant_matrixes[index][j] = get_bits(&s->gb, 8);
|
||||
}
|
||||
len -= 65;
|
||||
@ -897,7 +913,7 @@ static int decode_block(MJpegDecodeContext *s, DCTELEM *block,
|
||||
dprintf("error count: %d\n", i);
|
||||
return -1;
|
||||
}
|
||||
j = zigzag_direct[i];
|
||||
j = s->scantable.permutated[i];
|
||||
block[j] = level * quant_matrix[j];
|
||||
i++;
|
||||
if (i >= 64)
|
||||
@ -1021,7 +1037,7 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s,
|
||||
(h * mb_x + x) * 8;
|
||||
if (s->interlaced && s->bottom_field)
|
||||
ptr += s->linesize[c] >> 1;
|
||||
ff_idct_put(ptr, s->linesize[c], s->block);
|
||||
s->idct_put(ptr, s->linesize[c], s->block);
|
||||
if (++x == h) {
|
||||
x = 0;
|
||||
y++;
|
||||
|
@ -542,7 +542,7 @@ static void mpeg1_encode_block(MpegEncContext *s,
|
||||
last_non_zero = i - 1;
|
||||
|
||||
for(;i<=last_index;i++) {
|
||||
j = zigzag_direct[i];
|
||||
j = s->intra_scantable.permutated[i];
|
||||
level = block[j];
|
||||
next_coef:
|
||||
#if 0
|
||||
@ -552,26 +552,11 @@ static void mpeg1_encode_block(MpegEncContext *s,
|
||||
/* encode using VLC */
|
||||
if (level != 0) {
|
||||
run = i - last_non_zero - 1;
|
||||
#ifdef ARCH_X86
|
||||
asm volatile(
|
||||
"movl %2, %1 \n\t"
|
||||
"movl %1, %0 \n\t"
|
||||
"addl %1, %1 \n\t"
|
||||
"sbbl %1, %1 \n\t"
|
||||
"xorl %1, %0 \n\t"
|
||||
"subl %1, %0 \n\t"
|
||||
"andl $1, %1 \n\t"
|
||||
: "=&r" (alevel), "=&r" (sign)
|
||||
: "g" (level)
|
||||
);
|
||||
#else
|
||||
sign = 0;
|
||||
alevel = level;
|
||||
if (alevel < 0) {
|
||||
sign = 1;
|
||||
alevel = -alevel;
|
||||
}
|
||||
#endif
|
||||
|
||||
alevel= level;
|
||||
MASK_ABS(sign, alevel)
|
||||
sign&=1;
|
||||
|
||||
// code = get_rl_index(rl, 0, run, alevel);
|
||||
if (alevel > mpeg1_max_level[0][run])
|
||||
code= 111; /*rl->n*/
|
||||
@ -1040,6 +1025,7 @@ static int mpeg1_decode_block(MpegEncContext *s,
|
||||
int level, dc, diff, i, j, run;
|
||||
int code, component;
|
||||
RLTable *rl = &rl_mpeg1;
|
||||
UINT8 * const scantable= s->intra_scantable.permutated;
|
||||
|
||||
if (s->mb_intra) {
|
||||
/* DC coef */
|
||||
@ -1099,7 +1085,7 @@ static int mpeg1_decode_block(MpegEncContext *s,
|
||||
return -1;
|
||||
add_coef:
|
||||
dprintf("%d: run=%d level=%d\n", n, run, level);
|
||||
j = zigzag_direct[i];
|
||||
j = scantable[i];
|
||||
block[j] = level;
|
||||
i++;
|
||||
}
|
||||
@ -1121,9 +1107,9 @@ static int mpeg2_decode_block_non_intra(MpegEncContext *s,
|
||||
int mismatch;
|
||||
|
||||
if (s->alternate_scan)
|
||||
scan_table = ff_alternate_vertical_scan;
|
||||
scan_table = s->intra_v_scantable.permutated;
|
||||
else
|
||||
scan_table = zigzag_direct;
|
||||
scan_table = s->intra_scantable.permutated;
|
||||
mismatch = 1;
|
||||
|
||||
{
|
||||
@ -1140,7 +1126,7 @@ static int mpeg2_decode_block_non_intra(MpegEncContext *s,
|
||||
v= SHOW_UBITS(re, &s->gb, 2);
|
||||
if (v & 2) {
|
||||
run = 0;
|
||||
level = 1 - ((v & 1) << 1);
|
||||
level = 5 - (v << 1);
|
||||
SKIP_BITS(re, &s->gb, 2);
|
||||
CLOSE_READER(re, &s->gb);
|
||||
goto add_coef;
|
||||
@ -1191,6 +1177,7 @@ static int mpeg2_decode_block_non_intra(MpegEncContext *s,
|
||||
}
|
||||
block[63] ^= (mismatch & 1);
|
||||
s->block_last_index[n] = i;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1206,9 +1193,9 @@ static int mpeg2_decode_block_intra(MpegEncContext *s,
|
||||
int mismatch;
|
||||
|
||||
if (s->alternate_scan)
|
||||
scan_table = ff_alternate_vertical_scan;
|
||||
scan_table = s->intra_v_scantable.permutated;
|
||||
else
|
||||
scan_table = zigzag_direct;
|
||||
scan_table = s->intra_scantable.permutated;
|
||||
|
||||
/* DC coef */
|
||||
component = (n <= 3 ? 0 : n - 4 + 1);
|
||||
@ -1402,7 +1389,7 @@ static void mpeg_decode_quant_matrix_extension(MpegEncContext *s)
|
||||
if (get_bits1(&s->gb)) {
|
||||
for(i=0;i<64;i++) {
|
||||
v = get_bits(&s->gb, 8);
|
||||
j = zigzag_direct[i];
|
||||
j = s->intra_scantable.permutated[i];
|
||||
s->intra_matrix[j] = v;
|
||||
s->chroma_intra_matrix[j] = v;
|
||||
}
|
||||
@ -1410,7 +1397,7 @@ static void mpeg_decode_quant_matrix_extension(MpegEncContext *s)
|
||||
if (get_bits1(&s->gb)) {
|
||||
for(i=0;i<64;i++) {
|
||||
v = get_bits(&s->gb, 8);
|
||||
j = zigzag_direct[i];
|
||||
j = s->intra_scantable.permutated[i];
|
||||
s->inter_matrix[j] = v;
|
||||
s->chroma_inter_matrix[j] = v;
|
||||
}
|
||||
@ -1418,14 +1405,14 @@ static void mpeg_decode_quant_matrix_extension(MpegEncContext *s)
|
||||
if (get_bits1(&s->gb)) {
|
||||
for(i=0;i<64;i++) {
|
||||
v = get_bits(&s->gb, 8);
|
||||
j = zigzag_direct[i];
|
||||
j = s->intra_scantable.permutated[i];
|
||||
s->chroma_intra_matrix[j] = v;
|
||||
}
|
||||
}
|
||||
if (get_bits1(&s->gb)) {
|
||||
for(i=0;i<64;i++) {
|
||||
v = get_bits(&s->gb, 8);
|
||||
j = zigzag_direct[i];
|
||||
j = s->intra_scantable.permutated[i];
|
||||
s->chroma_inter_matrix[j] = v;
|
||||
}
|
||||
}
|
||||
@ -1636,7 +1623,7 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
|
||||
if (get_bits1(&s->gb)) {
|
||||
for(i=0;i<64;i++) {
|
||||
v = get_bits(&s->gb, 8);
|
||||
j = zigzag_direct[i];
|
||||
j = s->intra_scantable.permutated[i];
|
||||
s->intra_matrix[j] = v;
|
||||
s->chroma_intra_matrix[j] = v;
|
||||
}
|
||||
@ -1648,15 +1635,16 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
|
||||
#endif
|
||||
} else {
|
||||
for(i=0;i<64;i++) {
|
||||
int j= s->idct_permutation[i];
|
||||
v = ff_mpeg1_default_intra_matrix[i];
|
||||
s->intra_matrix[i] = v;
|
||||
s->chroma_intra_matrix[i] = v;
|
||||
s->intra_matrix[j] = v;
|
||||
s->chroma_intra_matrix[j] = v;
|
||||
}
|
||||
}
|
||||
if (get_bits1(&s->gb)) {
|
||||
for(i=0;i<64;i++) {
|
||||
v = get_bits(&s->gb, 8);
|
||||
j = zigzag_direct[i];
|
||||
j = s->intra_scantable.permutated[i];
|
||||
s->inter_matrix[j] = v;
|
||||
s->chroma_inter_matrix[j] = v;
|
||||
}
|
||||
@ -1668,9 +1656,10 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
|
||||
#endif
|
||||
} else {
|
||||
for(i=0;i<64;i++) {
|
||||
int j= s->idct_permutation[i];
|
||||
v = ff_mpeg1_default_non_intra_matrix[i];
|
||||
s->inter_matrix[i] = v;
|
||||
s->chroma_inter_matrix[i] = v;
|
||||
s->inter_matrix[j] = v;
|
||||
s->chroma_inter_matrix[j] = v;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
* MPEG1/2 tables
|
||||
*/
|
||||
|
||||
INT16 ff_mpeg1_default_intra_matrix[64] = {
|
||||
const INT16 ff_mpeg1_default_intra_matrix[64] = {
|
||||
8, 16, 19, 22, 26, 27, 29, 34,
|
||||
16, 16, 22, 24, 27, 29, 34, 37,
|
||||
19, 22, 26, 27, 29, 34, 34, 38,
|
||||
@ -13,7 +13,7 @@ INT16 ff_mpeg1_default_intra_matrix[64] = {
|
||||
27, 29, 35, 38, 46, 56, 69, 83
|
||||
};
|
||||
|
||||
INT16 ff_mpeg1_default_non_intra_matrix[64] = {
|
||||
const INT16 ff_mpeg1_default_non_intra_matrix[64] = {
|
||||
16, 16, 16, 16, 16, 16, 16, 16,
|
||||
16, 16, 16, 16, 16, 16, 16, 16,
|
||||
16, 16, 16, 16, 16, 16, 16, 16,
|
||||
|
@ -135,7 +135,7 @@ static const UINT16 pixel_aspect[16][2]={
|
||||
};
|
||||
|
||||
/* these matrixes will be permuted for the idct */
|
||||
INT16 ff_mpeg4_default_intra_matrix[64] = {
|
||||
const INT16 ff_mpeg4_default_intra_matrix[64] = {
|
||||
8, 17, 18, 19, 21, 23, 25, 27,
|
||||
17, 18, 19, 21, 23, 25, 27, 28,
|
||||
20, 21, 22, 23, 24, 26, 28, 30,
|
||||
@ -146,7 +146,7 @@ INT16 ff_mpeg4_default_intra_matrix[64] = {
|
||||
27, 28, 30, 32, 35, 38, 41, 45,
|
||||
};
|
||||
|
||||
INT16 ff_mpeg4_default_non_intra_matrix[64] = {
|
||||
const INT16 ff_mpeg4_default_non_intra_matrix[64] = {
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
17, 18, 19, 20, 21, 22, 23, 24,
|
||||
18, 19, 20, 21, 22, 23, 24, 25,
|
||||
|
@ -23,11 +23,15 @@
|
||||
#include "avcodec.h"
|
||||
#include "dsputil.h"
|
||||
#include "mpegvideo.h"
|
||||
#include "simple_idct.h"
|
||||
|
||||
#ifdef USE_FASTMEMCPY
|
||||
#include "fastmemcpy.h"
|
||||
#endif
|
||||
|
||||
//#undef NDEBUG
|
||||
//#include <assert.h>
|
||||
|
||||
static void encode_picture(MpegEncContext *s, int picture_number);
|
||||
static void dct_unquantize_mpeg1_c(MpegEncContext *s,
|
||||
DCTELEM *block, int n, int qscale);
|
||||
@ -72,8 +76,6 @@ static UINT8 h263_chroma_roundtab[16] = {
|
||||
static UINT16 default_mv_penalty[MAX_FCODE+1][MAX_MV*2+1];
|
||||
static UINT8 default_fcode_tab[MAX_MV*2+1];
|
||||
|
||||
extern UINT8 zigzag_end[64];
|
||||
|
||||
/* default motion estimation */
|
||||
int motion_estimation_method = ME_EPZS;
|
||||
|
||||
@ -86,7 +88,7 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16
|
||||
int i;
|
||||
if (s->fdct == ff_jpeg_fdct_islow) {
|
||||
for(i=0;i<64;i++) {
|
||||
const int j= block_permute_op(i);
|
||||
const int j= s->idct_permutation[i];
|
||||
/* 16 <= qscale * quant_matrix[i] <= 7905 */
|
||||
/* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */
|
||||
/* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
|
||||
@ -97,7 +99,7 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16
|
||||
}
|
||||
} else if (s->fdct == fdct_ifast) {
|
||||
for(i=0;i<64;i++) {
|
||||
const int j= block_permute_op(i);
|
||||
const int j= s->idct_permutation[i];
|
||||
/* 16 <= qscale * quant_matrix[i] <= 7905 */
|
||||
/* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */
|
||||
/* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
|
||||
@ -108,13 +110,14 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16
|
||||
}
|
||||
} else {
|
||||
for(i=0;i<64;i++) {
|
||||
const int j= s->idct_permutation[i];
|
||||
/* We can safely suppose that 16 <= quant_matrix[i] <= 255
|
||||
So 16 <= qscale * quant_matrix[i] <= 7905
|
||||
so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
|
||||
so 32768 >= (1<<19) / (qscale * quant_matrix[i]) >= 67
|
||||
*/
|
||||
qmat [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
|
||||
qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[block_permute_op(i)]);
|
||||
qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
|
||||
|
||||
if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1;
|
||||
qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]);
|
||||
@ -131,6 +134,50 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16
|
||||
goto fail;\
|
||||
}\
|
||||
}
|
||||
/*
|
||||
static void build_end(void)
|
||||
{
|
||||
int lastIndex;
|
||||
int lastIndexAfterPerm=0;
|
||||
for(lastIndex=0; lastIndex<64; lastIndex++)
|
||||
{
|
||||
if(ff_zigzag_direct[lastIndex] > lastIndexAfterPerm)
|
||||
lastIndexAfterPerm= ff_zigzag_direct[lastIndex];
|
||||
zigzag_end[lastIndex]= lastIndexAfterPerm + 1;
|
||||
}
|
||||
}
|
||||
*/
|
||||
void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scantable){
|
||||
int i;
|
||||
int end;
|
||||
|
||||
for(i=0; i<64; i++){
|
||||
int j;
|
||||
j = src_scantable[i];
|
||||
st->permutated[i] = s->idct_permutation[j];
|
||||
}
|
||||
|
||||
end=-1;
|
||||
for(i=0; i<64; i++){
|
||||
int j;
|
||||
j = st->permutated[i];
|
||||
if(j>end) end=j;
|
||||
st->raster_end[i]= end;
|
||||
}
|
||||
}
|
||||
|
||||
/* XXX: those functions should be suppressed ASAP when all IDCTs are
|
||||
converted */
|
||||
static void ff_jref_idct_put(UINT8 *dest, int line_size, DCTELEM *block)
|
||||
{
|
||||
j_rev_dct (block);
|
||||
put_pixels_clamped(block, dest, line_size);
|
||||
}
|
||||
static void ff_jref_idct_add(UINT8 *dest, int line_size, DCTELEM *block)
|
||||
{
|
||||
j_rev_dct (block);
|
||||
add_pixels_clamped(block, dest, line_size);
|
||||
}
|
||||
|
||||
/* init common structure for both encoder and decoder */
|
||||
int MPV_common_init(MpegEncContext *s)
|
||||
@ -146,7 +193,19 @@ int MPV_common_init(MpegEncContext *s)
|
||||
if(s->avctx->dct_algo==FF_DCT_FASTINT)
|
||||
s->fdct = fdct_ifast;
|
||||
else
|
||||
s->fdct = ff_jpeg_fdct_islow;
|
||||
s->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
|
||||
|
||||
if(s->avctx->idct_algo==FF_IDCT_INT){
|
||||
s->idct_put= ff_jref_idct_put;
|
||||
s->idct_add= ff_jref_idct_add;
|
||||
for(i=0; i<64; i++)
|
||||
s->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
|
||||
}else{ //accurate/default
|
||||
s->idct_put= simple_idct_put;
|
||||
s->idct_add= simple_idct_add;
|
||||
for(i=0; i<64; i++)
|
||||
s->idct_permutation[i]= i;
|
||||
}
|
||||
|
||||
#ifdef HAVE_MMX
|
||||
MPV_common_init_mmx(s);
|
||||
@ -157,6 +216,15 @@ int MPV_common_init(MpegEncContext *s)
|
||||
#ifdef HAVE_MLIB
|
||||
MPV_common_init_mlib(s);
|
||||
#endif
|
||||
|
||||
|
||||
/* load & permutate scantables
|
||||
note: only wmv uses differnt ones
|
||||
*/
|
||||
ff_init_scantable(s, &s->inter_scantable , ff_zigzag_direct);
|
||||
ff_init_scantable(s, &s->intra_scantable , ff_zigzag_direct);
|
||||
ff_init_scantable(s, &s->intra_h_scantable, ff_alternate_horizontal_scan);
|
||||
ff_init_scantable(s, &s->intra_v_scantable, ff_alternate_vertical_scan);
|
||||
|
||||
s->mb_width = (s->width + 15) / 16;
|
||||
s->mb_height = (s->height + 15) / 16;
|
||||
@ -577,13 +645,6 @@ int MPV_encode_init(AVCodecContext *avctx)
|
||||
s->y_dc_scale_table=
|
||||
s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
|
||||
|
||||
if (s->out_format == FMT_H263)
|
||||
h263_encode_init(s);
|
||||
else if (s->out_format == FMT_MPEG1)
|
||||
ff_mpeg1_encode_init(s);
|
||||
if(s->msmpeg4_version)
|
||||
ff_msmpeg4_encode_init(s);
|
||||
|
||||
/* dont use mv_penalty table for crap MV as it would be confused */
|
||||
if (s->me_method < ME_EPZS) s->mv_penalty = default_mv_penalty;
|
||||
|
||||
@ -593,17 +654,25 @@ int MPV_encode_init(AVCodecContext *avctx)
|
||||
if (MPV_common_init(s) < 0)
|
||||
return -1;
|
||||
|
||||
if (s->out_format == FMT_H263)
|
||||
h263_encode_init(s);
|
||||
else if (s->out_format == FMT_MPEG1)
|
||||
ff_mpeg1_encode_init(s);
|
||||
if(s->msmpeg4_version)
|
||||
ff_msmpeg4_encode_init(s);
|
||||
|
||||
/* init default q matrix */
|
||||
for(i=0;i<64;i++) {
|
||||
int j= s->idct_permutation[i];
|
||||
if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
|
||||
s->intra_matrix[i] = ff_mpeg4_default_intra_matrix[i];
|
||||
s->inter_matrix[i] = ff_mpeg4_default_non_intra_matrix[i];
|
||||
s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
|
||||
s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
|
||||
}else if(s->out_format == FMT_H263){
|
||||
s->intra_matrix[i] =
|
||||
s->inter_matrix[i] = ff_mpeg1_default_non_intra_matrix[i];
|
||||
s->intra_matrix[j] =
|
||||
s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
|
||||
}else{ /* mpeg1 */
|
||||
s->intra_matrix[i] = ff_mpeg1_default_intra_matrix[i];
|
||||
s->inter_matrix[i] = ff_mpeg1_default_non_intra_matrix[i];
|
||||
s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
|
||||
s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
|
||||
}
|
||||
}
|
||||
|
||||
@ -1450,7 +1519,7 @@ static inline void put_dct(MpegEncContext *s,
|
||||
{
|
||||
if (!s->mpeg2)
|
||||
s->dct_unquantize(s, block, i, s->qscale);
|
||||
ff_idct_put (dest, line_size, block);
|
||||
s->idct_put (dest, line_size, block);
|
||||
}
|
||||
|
||||
/* add block[] to dest[] */
|
||||
@ -1458,7 +1527,7 @@ static inline void add_dct(MpegEncContext *s,
|
||||
DCTELEM *block, int i, UINT8 *dest, int line_size)
|
||||
{
|
||||
if (s->block_last_index[i] >= 0) {
|
||||
ff_idct_add (dest, line_size, block);
|
||||
s->idct_add (dest, line_size, block);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1468,7 +1537,7 @@ static inline void add_dequant_dct(MpegEncContext *s,
|
||||
if (s->block_last_index[i] >= 0) {
|
||||
s->dct_unquantize(s, block, i, s->qscale);
|
||||
|
||||
ff_idct_add (dest, line_size, block);
|
||||
s->idct_add (dest, line_size, block);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1720,7 +1789,7 @@ static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int th
|
||||
if(last_index<=skip_dc - 1) return;
|
||||
|
||||
for(i=0; i<=last_index; i++){
|
||||
const int j = zigzag_direct[i];
|
||||
const int j = s->intra_scantable.permutated[i];
|
||||
const int level = ABS(block[j]);
|
||||
if(level==1){
|
||||
if(skip_dc && i==0) continue;
|
||||
@ -1734,7 +1803,7 @@ static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int th
|
||||
}
|
||||
if(score >= threshold) return;
|
||||
for(i=skip_dc; i<=last_index; i++){
|
||||
const int j = zigzag_direct[i];
|
||||
const int j = s->intra_scantable.permutated[i];
|
||||
block[j]=0;
|
||||
}
|
||||
if(block[0]) s->block_last_index[n]= 0;
|
||||
@ -1746,9 +1815,14 @@ static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index
|
||||
int i;
|
||||
const int maxlevel= s->max_qcoeff;
|
||||
const int minlevel= s->min_qcoeff;
|
||||
|
||||
for(i=0;i<=last_index; i++){
|
||||
const int j = zigzag_direct[i];
|
||||
|
||||
if(s->mb_intra){
|
||||
i=1; //skip clipping of intra dc
|
||||
}else
|
||||
i=0;
|
||||
|
||||
for(;i<=last_index; i++){
|
||||
const int j= s->intra_scantable.permutated[i];
|
||||
int level = block[j];
|
||||
|
||||
if (level>maxlevel) level=maxlevel;
|
||||
@ -1760,22 +1834,22 @@ static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index
|
||||
static inline void requantize_coeffs(MpegEncContext *s, DCTELEM block[64], int oldq, int newq, int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
if(s->mb_intra){
|
||||
//FIXME requantize, note (mpeg1/h263/h263p-aic dont need it,...)
|
||||
i=1;
|
||||
|
||||
if(s->mb_intra){
|
||||
i=1; //skip clipping of intra dc
|
||||
//FIXME requantize, note (mpeg1/h263/h263p-aic dont need it,...)
|
||||
}else
|
||||
i=0;
|
||||
|
||||
for(;i<=s->block_last_index[n]; i++){
|
||||
const int j = zigzag_direct[i];
|
||||
const int j = s->intra_scantable.permutated[i];
|
||||
int level = block[j];
|
||||
|
||||
block[j]= ROUNDED_DIV(level*oldq, newq);
|
||||
}
|
||||
|
||||
for(i=s->block_last_index[n]; i>=0; i--){
|
||||
const int j = zigzag_direct[i]; //FIXME other scantabs
|
||||
const int j = s->intra_scantable.permutated[i];
|
||||
if(block[j]) break;
|
||||
}
|
||||
s->block_last_index[n]= i;
|
||||
@ -1791,11 +1865,14 @@ static inline void auto_requantize_coeffs(MpegEncContext *s, DCTELEM block[6][64
|
||||
assert(s->adaptive_quant);
|
||||
|
||||
for(n=0; n<6; n++){
|
||||
if(s->mb_intra) i=1;
|
||||
else i=0;
|
||||
if(s->mb_intra){
|
||||
i=1; //skip clipping of intra dc
|
||||
//FIXME requantize, note (mpeg1/h263/h263p-aic dont need it,...)
|
||||
}else
|
||||
i=0;
|
||||
|
||||
for(;i<=s->block_last_index[n]; i++){
|
||||
const int j = zigzag_direct[i]; //FIXME other scantabs
|
||||
const int j = s->intra_scantable.permutated[i];
|
||||
int level = block[n][j];
|
||||
if(largest < level) largest = level;
|
||||
if(smallest > level) smallest= level;
|
||||
@ -2379,8 +2456,11 @@ static void encode_picture(MpegEncContext *s, int picture_number)
|
||||
if (s->out_format == FMT_MJPEG) {
|
||||
/* for mjpeg, we do include qscale in the matrix */
|
||||
s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
|
||||
for(i=1;i<64;i++)
|
||||
s->intra_matrix[i] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
|
||||
for(i=1;i<64;i++){
|
||||
int j= s->idct_permutation[i];
|
||||
|
||||
s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
|
||||
}
|
||||
convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
|
||||
s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias);
|
||||
}
|
||||
@ -2752,7 +2832,7 @@ static int dct_quantize_c(MpegEncContext *s,
|
||||
#ifndef ARCH_ALPHA /* Alpha uses unpermuted matrix */
|
||||
/* we need this permutation so that we correct the IDCT
|
||||
permutation. will be moved into DCT code */
|
||||
block_permute(block);
|
||||
block_permute(block, s->idct_permutation); //FIXME remove
|
||||
#endif
|
||||
|
||||
if (s->mb_intra) {
|
||||
@ -2782,7 +2862,7 @@ static int dct_quantize_c(MpegEncContext *s,
|
||||
threshold2= (threshold1<<1);
|
||||
|
||||
for(;i<64;i++) {
|
||||
j = zigzag_direct[i];
|
||||
j = s->intra_scantable.permutated[i];
|
||||
level = block[j];
|
||||
level = level * qmat[j];
|
||||
|
||||
@ -2813,8 +2893,7 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s,
|
||||
int i, level, nCoeffs;
|
||||
const UINT16 *quant_matrix;
|
||||
|
||||
if(s->alternate_scan) nCoeffs= 64;
|
||||
else nCoeffs= s->block_last_index[n]+1;
|
||||
nCoeffs= s->block_last_index[n];
|
||||
|
||||
if (s->mb_intra) {
|
||||
if (n < 4)
|
||||
@ -2823,8 +2902,8 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s,
|
||||
block[0] = block[0] * s->c_dc_scale;
|
||||
/* XXX: only mpeg1 */
|
||||
quant_matrix = s->intra_matrix;
|
||||
for(i=1;i<nCoeffs;i++) {
|
||||
int j= zigzag_direct[i];
|
||||
for(i=1;i<=nCoeffs;i++) {
|
||||
int j= s->intra_scantable.permutated[i];
|
||||
level = block[j];
|
||||
if (level) {
|
||||
if (level < 0) {
|
||||
@ -2846,8 +2925,8 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s,
|
||||
} else {
|
||||
i = 0;
|
||||
quant_matrix = s->inter_matrix;
|
||||
for(;i<nCoeffs;i++) {
|
||||
int j= zigzag_direct[i];
|
||||
for(;i<=nCoeffs;i++) {
|
||||
int j= s->intra_scantable.permutated[i];
|
||||
level = block[j];
|
||||
if (level) {
|
||||
if (level < 0) {
|
||||
@ -2877,8 +2956,8 @@ static void dct_unquantize_mpeg2_c(MpegEncContext *s,
|
||||
int i, level, nCoeffs;
|
||||
const UINT16 *quant_matrix;
|
||||
|
||||
if(s->alternate_scan) nCoeffs= 64;
|
||||
else nCoeffs= s->block_last_index[n]+1;
|
||||
if(s->alternate_scan) nCoeffs= 63;
|
||||
else nCoeffs= s->block_last_index[n];
|
||||
|
||||
if (s->mb_intra) {
|
||||
if (n < 4)
|
||||
@ -2886,8 +2965,8 @@ static void dct_unquantize_mpeg2_c(MpegEncContext *s,
|
||||
else
|
||||
block[0] = block[0] * s->c_dc_scale;
|
||||
quant_matrix = s->intra_matrix;
|
||||
for(i=1;i<nCoeffs;i++) {
|
||||
int j= zigzag_direct[i];
|
||||
for(i=1;i<=nCoeffs;i++) {
|
||||
int j= s->intra_scantable.permutated[i];
|
||||
level = block[j];
|
||||
if (level) {
|
||||
if (level < 0) {
|
||||
@ -2908,8 +2987,8 @@ static void dct_unquantize_mpeg2_c(MpegEncContext *s,
|
||||
int sum=-1;
|
||||
i = 0;
|
||||
quant_matrix = s->inter_matrix;
|
||||
for(;i<nCoeffs;i++) {
|
||||
int j= zigzag_direct[i];
|
||||
for(;i<=nCoeffs;i++) {
|
||||
int j= s->intra_scantable.permutated[i];
|
||||
level = block[j];
|
||||
if (level) {
|
||||
if (level < 0) {
|
||||
@ -2940,27 +3019,27 @@ static void dct_unquantize_h263_c(MpegEncContext *s,
|
||||
int i, level, qmul, qadd;
|
||||
int nCoeffs;
|
||||
|
||||
assert(s->block_last_index[n]>=0);
|
||||
|
||||
qadd = (qscale - 1) | 1;
|
||||
qmul = qscale << 1;
|
||||
|
||||
if (s->mb_intra) {
|
||||
if (!s->h263_aic) {
|
||||
if (n < 4)
|
||||
block[0] = block[0] * s->y_dc_scale;
|
||||
else
|
||||
block[0] = block[0] * s->c_dc_scale;
|
||||
}
|
||||
}else
|
||||
qadd = 0;
|
||||
i = 1;
|
||||
nCoeffs= 64; //does not allways use zigzag table
|
||||
nCoeffs= 63; //does not allways use zigzag table
|
||||
} else {
|
||||
i = 0;
|
||||
nCoeffs= zigzag_end[ s->block_last_index[n] ];
|
||||
nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
|
||||
}
|
||||
|
||||
qmul = s->qscale << 1;
|
||||
if (s->h263_aic && s->mb_intra)
|
||||
qadd = 0;
|
||||
else
|
||||
qadd = (s->qscale - 1) | 1;
|
||||
|
||||
for(;i<nCoeffs;i++) {
|
||||
for(;i<=nCoeffs;i++) {
|
||||
level = block[i];
|
||||
if (level) {
|
||||
if (level < 0) {
|
||||
|
@ -99,6 +99,11 @@ typedef struct ReorderBuffer{
|
||||
int picture_in_gop_number;
|
||||
} ReorderBuffer;
|
||||
|
||||
typedef struct ScanTable{
|
||||
UINT8 permutated[64];
|
||||
UINT8 raster_end[64];
|
||||
} ScanTable;
|
||||
|
||||
typedef struct MpegEncContext {
|
||||
struct AVCodecContext *avctx;
|
||||
/* the following parameters must be initialized before encoding */
|
||||
@ -286,6 +291,12 @@ typedef struct MpegEncContext {
|
||||
UINT16 __align8 q_intra_matrix16_bias[32][64];
|
||||
UINT16 __align8 q_inter_matrix16_bias[32][64];
|
||||
int block_last_index[6]; /* last non zero coefficient in block */
|
||||
/* scantables */
|
||||
ScanTable intra_scantable;
|
||||
ScanTable intra_h_scantable;
|
||||
ScanTable intra_v_scantable;
|
||||
ScanTable inter_scantable; // if inter == intra then intra should be used to reduce tha cache usage
|
||||
UINT8 idct_permutation[64];
|
||||
|
||||
void *opaque; /* private data for the user */
|
||||
|
||||
@ -421,10 +432,6 @@ typedef struct MpegEncContext {
|
||||
int per_mb_rl_table;
|
||||
int esc3_level_length;
|
||||
int esc3_run_length;
|
||||
UINT8 *inter_scantable;
|
||||
UINT8 *intra_scantable;
|
||||
UINT8 *intra_v_scantable;
|
||||
UINT8 *intra_h_scantable;
|
||||
/* [mb_intra][isChroma][level][run][last] */
|
||||
int (*ac_stats)[2][MAX_LEVEL+1][MAX_RUN+1][2];
|
||||
int inter_intra_pred;
|
||||
@ -477,7 +484,9 @@ typedef struct MpegEncContext {
|
||||
void (*dct_unquantize)(struct MpegEncContext *s, // unquantizer to use (mpeg4 can use both)
|
||||
DCTELEM *block, int n, int qscale);
|
||||
int (*dct_quantize)(struct MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
|
||||
void (*fdct)(DCTELEM *block);
|
||||
void (*fdct)(DCTELEM *block/* align 16*/);
|
||||
void (*idct_put)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
|
||||
void (*idct_add)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
|
||||
} MpegEncContext;
|
||||
|
||||
int MPV_common_init(MpegEncContext *s);
|
||||
@ -498,6 +507,7 @@ extern void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w);
|
||||
void ff_conceal_past_errors(MpegEncContext *s, int conceal_all);
|
||||
void ff_copy_bits(PutBitContext *pb, UINT8 *src, int length);
|
||||
void ff_clean_intra_table_entries(MpegEncContext *s);
|
||||
void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scantable);
|
||||
|
||||
extern int ff_bit_exact;
|
||||
|
||||
@ -511,8 +521,8 @@ void ff_fix_long_p_mvs(MpegEncContext * s);
|
||||
void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type);
|
||||
|
||||
/* mpeg12.c */
|
||||
extern INT16 ff_mpeg1_default_intra_matrix[64];
|
||||
extern INT16 ff_mpeg1_default_non_intra_matrix[64];
|
||||
extern const INT16 ff_mpeg1_default_intra_matrix[64];
|
||||
extern const INT16 ff_mpeg1_default_non_intra_matrix[64];
|
||||
extern UINT8 ff_mpeg1_dc_scale_table[128];
|
||||
|
||||
void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number);
|
||||
@ -551,8 +561,8 @@ static inline int get_rl_index(const RLTable *rl, int last, int run, int level)
|
||||
|
||||
extern UINT8 ff_mpeg4_y_dc_scale_table[32];
|
||||
extern UINT8 ff_mpeg4_c_dc_scale_table[32];
|
||||
extern INT16 ff_mpeg4_default_intra_matrix[64];
|
||||
extern INT16 ff_mpeg4_default_non_intra_matrix[64];
|
||||
extern const INT16 ff_mpeg4_default_intra_matrix[64];
|
||||
extern const INT16 ff_mpeg4_default_non_intra_matrix[64];
|
||||
|
||||
void h263_encode_mb(MpegEncContext *s,
|
||||
DCTELEM block[6][64],
|
||||
|
@ -164,32 +164,21 @@ static void common_init(MpegEncContext * s)
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
if(s->msmpeg4_version==4){
|
||||
s->intra_scantable = wmv1_scantable[1];
|
||||
s->intra_h_scantable= wmv1_scantable[2];
|
||||
s->intra_v_scantable= wmv1_scantable[3];
|
||||
s->inter_scantable = wmv1_scantable[0];
|
||||
}else{
|
||||
s->intra_scantable = zigzag_direct;
|
||||
s->intra_h_scantable= ff_alternate_horizontal_scan;
|
||||
s->intra_v_scantable= ff_alternate_vertical_scan;
|
||||
s->inter_scantable = zigzag_direct;
|
||||
int i;
|
||||
ff_init_scantable(s, &s->intra_scantable , wmv1_scantable[1]);
|
||||
ff_init_scantable(s, &s->intra_h_scantable, wmv1_scantable[2]);
|
||||
ff_init_scantable(s, &s->intra_v_scantable, wmv1_scantable[3]);
|
||||
ff_init_scantable(s, &s->inter_scantable , wmv1_scantable[0]);
|
||||
}
|
||||
//Note the default tables are set in common_init in mpegvideo.c
|
||||
|
||||
if(!inited){
|
||||
int i;
|
||||
inited=1;
|
||||
|
||||
init_h263_dc_for_msmpeg4();
|
||||
|
||||
/* permute for IDCT */
|
||||
for(i=0; i<WMV1_SCANTABLE_COUNT; i++){
|
||||
int k;
|
||||
for(k=0;k<64;k++) {
|
||||
int j = wmv1_scantable[i][k];
|
||||
wmv1_scantable[i][k]= block_permute_op(j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -936,7 +925,7 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int
|
||||
rl = &rl_table[3 + s->rl_chroma_table_index];
|
||||
}
|
||||
run_diff = 0;
|
||||
scantable= s->intra_scantable;
|
||||
scantable= s->intra_scantable.permutated;
|
||||
set_stat(ST_INTRA_AC);
|
||||
} else {
|
||||
i = 0;
|
||||
@ -945,12 +934,12 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int
|
||||
run_diff = 0;
|
||||
else
|
||||
run_diff = 1;
|
||||
scantable= s->inter_scantable;
|
||||
scantable= s->inter_scantable.permutated;
|
||||
set_stat(ST_INTER_AC);
|
||||
}
|
||||
|
||||
/* recalculate block_last_index for M$ wmv1 */
|
||||
if(scantable!=zigzag_direct && s->block_last_index[n]>0){
|
||||
if(s->msmpeg4_version==4 && s->block_last_index[n]>0){
|
||||
for(last_index=63; last_index>=0; last_index--){
|
||||
if(block[scantable[last_index]]) break;
|
||||
}
|
||||
@ -1704,11 +1693,11 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
|
||||
}
|
||||
if (s->ac_pred) {
|
||||
if (dc_pred_dir == 0)
|
||||
scan_table = s->intra_v_scantable; /* left */
|
||||
scan_table = s->intra_v_scantable.permutated; /* left */
|
||||
else
|
||||
scan_table = s->intra_h_scantable; /* top */
|
||||
scan_table = s->intra_h_scantable.permutated; /* top */
|
||||
} else {
|
||||
scan_table = s->intra_scantable;
|
||||
scan_table = s->intra_scantable.permutated;
|
||||
}
|
||||
set_stat(ST_INTRA_AC);
|
||||
rl_vlc= rl->rl_vlc[0];
|
||||
@ -1727,7 +1716,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
|
||||
s->block_last_index[n] = i;
|
||||
return 0;
|
||||
}
|
||||
scan_table = s->inter_scantable;
|
||||
scan_table = s->inter_scantable.permutated;
|
||||
set_stat(ST_INTER_AC);
|
||||
rl_vlc= rl->rl_vlc[s->qscale];
|
||||
}
|
||||
|
@ -1819,7 +1819,7 @@ static UINT8 old_ff_c_dc_scale_table[32]={
|
||||
|
||||
#define WMV1_SCANTABLE_COUNT 4
|
||||
|
||||
static UINT8 wmv1_scantable00[64]= {
|
||||
static const UINT8 wmv1_scantable00[64]= {
|
||||
0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11,
|
||||
0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28,
|
||||
0x30, 0x38, 0x29, 0x21, 0x1A, 0x13, 0x0C, 0x05,
|
||||
@ -1829,7 +1829,7 @@ static UINT8 wmv1_scantable00[64]= {
|
||||
0x2C, 0x25, 0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x35,
|
||||
0x3D, 0x3E, 0x36, 0x2E, 0x27, 0x2F, 0x37, 0x3F,
|
||||
};
|
||||
static UINT8 wmv1_scantable01[64]= {
|
||||
static const UINT8 wmv1_scantable01[64]= {
|
||||
0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11,
|
||||
0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28,
|
||||
0x21, 0x30, 0x1A, 0x13, 0x0C, 0x05, 0x06, 0x0D,
|
||||
@ -1839,7 +1839,7 @@ static UINT8 wmv1_scantable01[64]= {
|
||||
0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3C, 0x35,
|
||||
0x3D, 0x2E, 0x27, 0x2F, 0x36, 0x3E, 0x37, 0x3F,
|
||||
};
|
||||
static UINT8 wmv1_scantable02[64]= {
|
||||
static const UINT8 wmv1_scantable02[64]= {
|
||||
0x00, 0x01, 0x08, 0x02, 0x03, 0x09, 0x10, 0x18,
|
||||
0x11, 0x0A, 0x04, 0x05, 0x0B, 0x12, 0x19, 0x20,
|
||||
0x28, 0x30, 0x21, 0x1A, 0x13, 0x0C, 0x06, 0x07,
|
||||
@ -1849,7 +1849,7 @@ static UINT8 wmv1_scantable02[64]= {
|
||||
0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3B, 0x3C, 0x35,
|
||||
0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F,
|
||||
};
|
||||
static UINT8 wmv1_scantable03[64]= {
|
||||
static const UINT8 wmv1_scantable03[64]= {
|
||||
0x00, 0x08, 0x10, 0x01, 0x18, 0x20, 0x28, 0x09,
|
||||
0x02, 0x03, 0x0A, 0x11, 0x19, 0x30, 0x38, 0x29,
|
||||
0x21, 0x1A, 0x12, 0x0B, 0x04, 0x05, 0x0C, 0x13,
|
||||
@ -1860,7 +1860,7 @@ static UINT8 wmv1_scantable03[64]= {
|
||||
0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F,
|
||||
};
|
||||
|
||||
static UINT8 *wmv1_scantable[WMV1_SCANTABLE_COUNT+1]={
|
||||
static const UINT8 *wmv1_scantable[WMV1_SCANTABLE_COUNT+1]={
|
||||
wmv1_scantable00,
|
||||
wmv1_scantable01,
|
||||
wmv1_scantable02,
|
||||
|
@ -20,5 +20,7 @@
|
||||
|
||||
void simple_idct_put(UINT8 *dest, int line_size, INT16 *block);
|
||||
void simple_idct_add(UINT8 *dest, int line_size, INT16 *block);
|
||||
void simple_idct_mmx(short *block);
|
||||
void ff_simple_idct_mmx(short *block);
|
||||
void ff_simple_idct_add_mmx(UINT8 *dest, int line_size, INT16 *block);
|
||||
void ff_simple_idct_put_mmx(UINT8 *dest, int line_size, INT16 *block);
|
||||
void simple_idct(short *block);
|
||||
|
@ -51,8 +51,8 @@ do_ffmpeg()
|
||||
{
|
||||
f="$1"
|
||||
shift
|
||||
echo $ffmpeg -bitexact -dct_algo 1 $*
|
||||
$ffmpeg -bitexact -dct_algo 1 -benchmark $* > $datadir/bench.tmp
|
||||
echo $ffmpeg -bitexact -dct_algo 1 -idct_algo 2 $*
|
||||
$ffmpeg -bitexact -dct_algo 1 -idct_algo 2 -benchmark $* > $datadir/bench.tmp
|
||||
md5sum -b $f >> $logfile
|
||||
expr "`cat $datadir/bench.tmp`" : '.*utime=\(.*s\)' > $datadir/bench2.tmp
|
||||
echo `cat $datadir/bench2.tmp` $f >> $benchfile
|
||||
@ -62,8 +62,8 @@ do_ffmpeg_crc()
|
||||
{
|
||||
f="$1"
|
||||
shift
|
||||
echo $ffmpeg -y -bitexact -dct_algo 1 $* -f crc $datadir/ffmpeg.crc
|
||||
$ffmpeg -y -bitexact -dct_algo 1 $* -f crc $datadir/ffmpeg.crc
|
||||
echo $ffmpeg -y -bitexact -dct_algo 1 -idct_algo 2 $* -f crc $datadir/ffmpeg.crc
|
||||
$ffmpeg -y -bitexact -dct_algo 1 -idct_algo 2 $* -f crc $datadir/ffmpeg.crc
|
||||
echo -n "$f " >> $logfile
|
||||
cat $datadir/ffmpeg.crc >> $logfile
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user