mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
* using DSPContext - so each codec could use its local (sub)set of CPU extension
Originally committed as revision 1194 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
fb602cd15e
commit
eb4b3dd3c3
@ -20,7 +20,7 @@
|
||||
*/
|
||||
#include "avcodec.h"
|
||||
#include "dsputil.h"
|
||||
|
||||
/*
|
||||
void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
|
||||
void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
|
||||
void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
|
||||
@ -41,7 +41,7 @@ op_pixels_abs_func pix_abs8x8;
|
||||
op_pixels_abs_func pix_abs8x8_x2;
|
||||
op_pixels_abs_func pix_abs8x8_y2;
|
||||
op_pixels_abs_func pix_abs8x8_xy2;
|
||||
|
||||
*/
|
||||
int ff_bit_exact=0;
|
||||
|
||||
UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
|
||||
@ -84,7 +84,7 @@ const UINT8 ff_alternate_vertical_scan[64] = {
|
||||
};
|
||||
|
||||
/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
|
||||
UINT32 inverse[256]={
|
||||
const UINT32 inverse[256]={
|
||||
0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
|
||||
536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
|
||||
268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
|
||||
@ -119,7 +119,7 @@ UINT32 inverse[256]={
|
||||
17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
|
||||
};
|
||||
|
||||
int pix_sum_c(UINT8 * pix, int line_size)
|
||||
static int pix_sum_c(UINT8 * pix, int line_size)
|
||||
{
|
||||
int s, i, j;
|
||||
|
||||
@ -141,7 +141,7 @@ int pix_sum_c(UINT8 * pix, int line_size)
|
||||
return s;
|
||||
}
|
||||
|
||||
int pix_norm1_c(UINT8 * pix, int line_size)
|
||||
static int pix_norm1_c(UINT8 * pix, int line_size)
|
||||
{
|
||||
int s, i, j;
|
||||
UINT32 *sq = squareTbl + 256;
|
||||
@ -165,7 +165,7 @@ int pix_norm1_c(UINT8 * pix, int line_size)
|
||||
}
|
||||
|
||||
|
||||
void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
|
||||
static void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -184,8 +184,8 @@ void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
|
||||
}
|
||||
}
|
||||
|
||||
void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, const UINT8 *s2,
|
||||
int stride){
|
||||
static void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1,
|
||||
const UINT8 *s2, int stride){
|
||||
int i;
|
||||
|
||||
/* read the pixels */
|
||||
@ -205,8 +205,8 @@ void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, const UINT8 *s2,
|
||||
}
|
||||
|
||||
|
||||
void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
|
||||
int line_size)
|
||||
static void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
|
||||
int line_size)
|
||||
{
|
||||
int i;
|
||||
UINT8 *cm = cropTbl + MAX_NEG_CROP;
|
||||
@ -227,7 +227,7 @@ void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
|
||||
}
|
||||
}
|
||||
|
||||
void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
|
||||
static void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
|
||||
int line_size)
|
||||
{
|
||||
int i;
|
||||
@ -1353,7 +1353,7 @@ QPEL_MC(0, avg_ , _ , op_avg)
|
||||
#undef op_put
|
||||
#undef op_put_no_rnd
|
||||
|
||||
int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
|
||||
static int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
|
||||
{
|
||||
int s, i;
|
||||
|
||||
@ -1381,7 +1381,7 @@ int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
|
||||
return s;
|
||||
}
|
||||
|
||||
int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
|
||||
static int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
|
||||
{
|
||||
int s, i;
|
||||
|
||||
@ -1409,7 +1409,7 @@ int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
|
||||
return s;
|
||||
}
|
||||
|
||||
int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
|
||||
static int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
|
||||
{
|
||||
int s, i;
|
||||
UINT8 *pix3 = pix2 + line_size;
|
||||
@ -1439,7 +1439,7 @@ int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
|
||||
return s;
|
||||
}
|
||||
|
||||
int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
|
||||
static int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
|
||||
{
|
||||
int s, i;
|
||||
UINT8 *pix3 = pix2 + line_size;
|
||||
@ -1469,7 +1469,7 @@ int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
|
||||
return s;
|
||||
}
|
||||
|
||||
int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
|
||||
static int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
|
||||
{
|
||||
int s, i;
|
||||
|
||||
@ -1489,7 +1489,7 @@ int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
|
||||
return s;
|
||||
}
|
||||
|
||||
int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
|
||||
static int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
|
||||
{
|
||||
int s, i;
|
||||
|
||||
@ -1509,7 +1509,7 @@ int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
|
||||
return s;
|
||||
}
|
||||
|
||||
int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
|
||||
static int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
|
||||
{
|
||||
int s, i;
|
||||
UINT8 *pix3 = pix2 + line_size;
|
||||
@ -1531,7 +1531,7 @@ int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
|
||||
return s;
|
||||
}
|
||||
|
||||
int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
|
||||
static int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
|
||||
{
|
||||
int s, i;
|
||||
UINT8 *pix3 = pix2 + line_size;
|
||||
@ -1574,12 +1574,12 @@ void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable,
|
||||
}
|
||||
}
|
||||
|
||||
void clear_blocks_c(DCTELEM *blocks)
|
||||
static void clear_blocks_c(DCTELEM *blocks)
|
||||
{
|
||||
memset(blocks, 0, sizeof(DCTELEM)*6*64);
|
||||
}
|
||||
|
||||
void dsputil_init(void)
|
||||
void dsputil_init(DSPContext* c, unsigned mask)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -1593,42 +1593,82 @@ void dsputil_init(void)
|
||||
squareTbl[i] = (i - 256) * (i - 256);
|
||||
}
|
||||
|
||||
get_pixels = get_pixels_c;
|
||||
diff_pixels = diff_pixels_c;
|
||||
put_pixels_clamped = put_pixels_clamped_c;
|
||||
add_pixels_clamped = add_pixels_clamped_c;
|
||||
ff_gmc1= gmc1_c;
|
||||
ff_gmc= gmc_c;
|
||||
clear_blocks= clear_blocks_c;
|
||||
pix_sum= pix_sum_c;
|
||||
pix_norm1= pix_norm1_c;
|
||||
c->get_pixels = get_pixels_c;
|
||||
c->diff_pixels = diff_pixels_c;
|
||||
c->put_pixels_clamped = put_pixels_clamped_c;
|
||||
c->add_pixels_clamped = add_pixels_clamped_c;
|
||||
c->gmc1 = gmc1_c;
|
||||
c->gmc = gmc_c;
|
||||
c->clear_blocks = clear_blocks_c;
|
||||
c->pix_sum = pix_sum_c;
|
||||
c->pix_norm1 = pix_norm1_c;
|
||||
|
||||
pix_abs16x16 = pix_abs16x16_c;
|
||||
pix_abs16x16_x2 = pix_abs16x16_x2_c;
|
||||
pix_abs16x16_y2 = pix_abs16x16_y2_c;
|
||||
pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
|
||||
pix_abs8x8 = pix_abs8x8_c;
|
||||
pix_abs8x8_x2 = pix_abs8x8_x2_c;
|
||||
pix_abs8x8_y2 = pix_abs8x8_y2_c;
|
||||
pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
|
||||
c->pix_abs16x16 = pix_abs16x16_c;
|
||||
c->pix_abs16x16_x2 = pix_abs16x16_x2_c;
|
||||
c->pix_abs16x16_y2 = pix_abs16x16_y2_c;
|
||||
c->pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
|
||||
c->pix_abs8x8 = pix_abs8x8_c;
|
||||
c->pix_abs8x8_x2 = pix_abs8x8_x2_c;
|
||||
c->pix_abs8x8_y2 = pix_abs8x8_y2_c;
|
||||
c->pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
|
||||
|
||||
c->put_pixels_tab[0][0] = put_pixels16;
|
||||
c->put_pixels_tab[0][1] = put_pixels16_x2;
|
||||
c->put_pixels_tab[0][2] = put_pixels16_y2;
|
||||
c->put_pixels_tab[0][3] = put_pixels16_xy2;
|
||||
|
||||
c->put_no_rnd_pixels_tab[0][0] = put_pixels16;
|
||||
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2;
|
||||
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2;
|
||||
c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2;
|
||||
|
||||
c->avg_pixels_tab[0][0] = avg_pixels16;
|
||||
c->avg_pixels_tab[0][1] = avg_pixels16_x2;
|
||||
c->avg_pixels_tab[0][2] = avg_pixels16_y2;
|
||||
c->avg_pixels_tab[0][3] = avg_pixels16_xy2;
|
||||
|
||||
c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16;
|
||||
c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2;
|
||||
c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2;
|
||||
c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2;
|
||||
|
||||
c->put_pixels_tab[1][0] = put_pixels8;
|
||||
c->put_pixels_tab[1][1] = put_pixels8_x2;
|
||||
c->put_pixels_tab[1][2] = put_pixels8_y2;
|
||||
c->put_pixels_tab[1][3] = put_pixels8_xy2;
|
||||
|
||||
c->put_no_rnd_pixels_tab[1][0] = put_pixels8;
|
||||
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2;
|
||||
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2;
|
||||
c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2;
|
||||
|
||||
c->avg_pixels_tab[1][0] = avg_pixels8;
|
||||
c->avg_pixels_tab[1][1] = avg_pixels8_x2;
|
||||
c->avg_pixels_tab[1][2] = avg_pixels8_y2;
|
||||
c->avg_pixels_tab[1][3] = avg_pixels8_xy2;
|
||||
|
||||
c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8;
|
||||
c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2;
|
||||
c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2;
|
||||
c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2;
|
||||
|
||||
#ifdef HAVE_MMX
|
||||
dsputil_init_mmx();
|
||||
dsputil_init_mmx(c, mask);
|
||||
#endif
|
||||
#ifdef ARCH_ARMV4L
|
||||
dsputil_init_armv4l();
|
||||
dsputil_init_armv4l(c, mask);
|
||||
#endif
|
||||
#ifdef HAVE_MLIB
|
||||
dsputil_init_mlib();
|
||||
dsputil_init_mlib(c, mask);
|
||||
#endif
|
||||
#ifdef ARCH_ALPHA
|
||||
dsputil_init_alpha();
|
||||
dsputil_init_alpha(c, mask);
|
||||
#endif
|
||||
#ifdef ARCH_POWERPC
|
||||
dsputil_init_ppc();
|
||||
dsputil_init_ppc(c, mask);
|
||||
#endif
|
||||
#ifdef HAVE_MMI
|
||||
dsputil_init_mmi();
|
||||
dsputil_init_mmi(c, mask);
|
||||
#endif
|
||||
|
||||
for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
|
||||
@ -1639,7 +1679,8 @@ void avcodec_set_bit_exact(void)
|
||||
{
|
||||
ff_bit_exact=1;
|
||||
#ifdef HAVE_MMX
|
||||
dsputil_set_bit_exact_mmx();
|
||||
#warning FIXME - set_bit_exact
|
||||
// dsputil_set_bit_exact_mmx();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -45,7 +45,6 @@ extern const UINT8 ff_zigzag_direct[64];
|
||||
extern UINT32 squareTbl[512];
|
||||
extern UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
|
||||
|
||||
void dsputil_init(void);
|
||||
|
||||
/* minimum alignment rules ;)
|
||||
if u notice errors in the align stuff, need more alignment for some asm code for some cpu
|
||||
@ -57,39 +56,20 @@ i (michael) didnt check them, these are just the alignents which i think could b
|
||||
!future video codecs might need functions with less strict alignment
|
||||
*/
|
||||
|
||||
/* pixel ops : interface with DCT */
|
||||
extern void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size);
|
||||
extern void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride);
|
||||
extern void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
|
||||
extern void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
|
||||
extern void (*ff_gmc1)(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
|
||||
extern void (*ff_gmc )(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int stride, int h, int ox, int oy,
|
||||
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
|
||||
extern void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
|
||||
extern int (*pix_sum)(UINT8 * pix, int line_size);
|
||||
extern int (*pix_norm1)(UINT8 * pix, int line_size);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size);
|
||||
void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
|
||||
void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
|
||||
void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
|
||||
void clear_blocks_c(DCTELEM *blocks);
|
||||
*/
|
||||
|
||||
/* add and put pixel (decoding) */
|
||||
// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
|
||||
typedef void (*op_pixels_func)(UINT8 *block/*align width (8 or 16)*/, const UINT8 *pixels/*align 1*/, int line_size, int h);
|
||||
typedef void (*qpel_mc_func)(UINT8 *dst/*align width (8 or 16)*/, UINT8 *src/*align 1*/, int stride);
|
||||
|
||||
extern op_pixels_func put_pixels_tab[2][4];
|
||||
extern op_pixels_func avg_pixels_tab[2][4];
|
||||
extern op_pixels_func put_no_rnd_pixels_tab[2][4];
|
||||
extern op_pixels_func avg_no_rnd_pixels_tab[2][4];
|
||||
extern qpel_mc_func put_qpel_pixels_tab[2][16];
|
||||
extern qpel_mc_func avg_qpel_pixels_tab[2][16];
|
||||
extern qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
|
||||
extern qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
|
||||
|
||||
|
||||
#define CALL_2X_PIXELS(a, b, n)\
|
||||
static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
||||
@ -100,20 +80,46 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
||||
/* motion estimation */
|
||||
|
||||
typedef int (*op_pixels_abs_func)(UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size);
|
||||
|
||||
extern op_pixels_abs_func pix_abs16x16;
|
||||
extern op_pixels_abs_func pix_abs16x16_x2;
|
||||
extern op_pixels_abs_func pix_abs16x16_y2;
|
||||
extern op_pixels_abs_func pix_abs16x16_xy2;
|
||||
extern op_pixels_abs_func pix_abs8x8;
|
||||
extern op_pixels_abs_func pix_abs8x8_x2;
|
||||
extern op_pixels_abs_func pix_abs8x8_y2;
|
||||
extern op_pixels_abs_func pix_abs8x8_xy2;
|
||||
|
||||
/*
|
||||
int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx);
|
||||
int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx);
|
||||
int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx);
|
||||
int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx);
|
||||
*/
|
||||
typedef struct DSPContext {
|
||||
/* pixel ops : interface with DCT */
|
||||
void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size);
|
||||
void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride);
|
||||
void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
|
||||
void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
|
||||
void (*gmc1)(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
|
||||
void (*gmc )(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int stride, int h, int ox, int oy,
|
||||
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
|
||||
void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
|
||||
int (*pix_sum)(UINT8 * pix, int line_size);
|
||||
int (*pix_norm1)(UINT8 * pix, int line_size);
|
||||
|
||||
/* maybe create an array for 16/8 functions */
|
||||
op_pixels_func put_pixels_tab[2][4];
|
||||
op_pixels_func avg_pixels_tab[2][4];
|
||||
op_pixels_func put_no_rnd_pixels_tab[2][4];
|
||||
op_pixels_func avg_no_rnd_pixels_tab[2][4];
|
||||
qpel_mc_func put_qpel_pixels_tab[2][16];
|
||||
qpel_mc_func avg_qpel_pixels_tab[2][16];
|
||||
qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
|
||||
qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
|
||||
|
||||
op_pixels_abs_func pix_abs16x16;
|
||||
op_pixels_abs_func pix_abs16x16_x2;
|
||||
op_pixels_abs_func pix_abs16x16_y2;
|
||||
op_pixels_abs_func pix_abs16x16_xy2;
|
||||
op_pixels_abs_func pix_abs8x8;
|
||||
op_pixels_abs_func pix_abs8x8_x2;
|
||||
op_pixels_abs_func pix_abs8x8_y2;
|
||||
op_pixels_abs_func pix_abs8x8_xy2;
|
||||
} DSPContext;
|
||||
|
||||
void dsputil_init(DSPContext* p, unsigned mask);
|
||||
|
||||
/**
|
||||
* permute block according to permuatation.
|
||||
@ -121,8 +127,12 @@ int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx);
|
||||
*/
|
||||
void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last);
|
||||
|
||||
#define emms_c()
|
||||
|
||||
#if defined(HAVE_MMX)
|
||||
|
||||
#undef emms_c()
|
||||
|
||||
#define MM_MMX 0x0001 /* standard MMX */
|
||||
#define MM_3DNOW 0x0004 /* AMD 3DNOW */
|
||||
#define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */
|
||||
@ -132,6 +142,8 @@ void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable,
|
||||
extern int mm_flags;
|
||||
|
||||
int mm_support(void);
|
||||
void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size);
|
||||
void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size);
|
||||
|
||||
static inline void emms(void)
|
||||
{
|
||||
@ -146,54 +158,44 @@ static inline void emms(void)
|
||||
|
||||
#define __align8 __attribute__ ((aligned (8)))
|
||||
|
||||
void dsputil_init_mmx(void);
|
||||
void dsputil_set_bit_exact_mmx(void);
|
||||
void dsputil_init_mmx(DSPContext* c, unsigned mask);
|
||||
void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask);
|
||||
|
||||
#elif defined(ARCH_ARMV4L)
|
||||
|
||||
#define emms_c()
|
||||
|
||||
/* This is to use 4 bytes read to the IDCT pointers for some 'zero'
|
||||
line ptimizations */
|
||||
#define __align8 __attribute__ ((aligned (4)))
|
||||
|
||||
void dsputil_init_armv4l(void);
|
||||
void dsputil_init_armv4l(DSPContext* c, unsigned mask);
|
||||
|
||||
#elif defined(HAVE_MLIB)
|
||||
|
||||
#define emms_c()
|
||||
|
||||
/* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */
|
||||
#define __align8 __attribute__ ((aligned (8)))
|
||||
|
||||
void dsputil_init_mlib(void);
|
||||
void dsputil_init_mlib(DSPContext* c, unsigned mask);
|
||||
|
||||
#elif defined(ARCH_ALPHA)
|
||||
|
||||
#define emms_c()
|
||||
#define __align8 __attribute__ ((aligned (8)))
|
||||
|
||||
void dsputil_init_alpha(void);
|
||||
void dsputil_init_alpha(DSPContext* c, unsigned mask);
|
||||
|
||||
#elif defined(ARCH_POWERPC)
|
||||
|
||||
#define emms_c()
|
||||
#define __align8 __attribute__ ((aligned (16)))
|
||||
|
||||
void dsputil_init_ppc(void);
|
||||
void dsputil_init_ppc(DSPContext* c, unsigned mask);
|
||||
|
||||
#elif defined(HAVE_MMI)
|
||||
|
||||
#define emms_c()
|
||||
|
||||
#define __align8 __attribute__ ((aligned (16)))
|
||||
|
||||
void dsputil_init_mmi(void);
|
||||
void dsputil_init_mmi(DSPContext* c, unsigned mask);
|
||||
|
||||
#else
|
||||
|
||||
#define emms_c()
|
||||
|
||||
#define __align8
|
||||
|
||||
#endif
|
||||
|
@ -114,6 +114,7 @@ static int dvvideo_decode_init(AVCodecContext *avctx)
|
||||
/* XXX: fix it */
|
||||
memset(&s2, 0, sizeof(MpegEncContext));
|
||||
s2.avctx = avctx;
|
||||
dsputil_init(&s2.dsp, avctx->dsp_mask);
|
||||
if (DCT_common_init(&s2) < 0)
|
||||
return -1;
|
||||
|
||||
|
@ -331,7 +331,7 @@ static void guess_mv(MpegEncContext *s){
|
||||
s->mv_type = MV_TYPE_16X16;
|
||||
s->mb_skiped=0;
|
||||
|
||||
clear_blocks(s->block[0]);
|
||||
s->dsp.clear_blocks(s->block[0]);
|
||||
|
||||
s->mb_x= mb_x;
|
||||
s->mb_y= mb_y;
|
||||
@ -458,7 +458,7 @@ int score_sum=0;
|
||||
s->mv_type = MV_TYPE_16X16;
|
||||
s->mb_skiped=0;
|
||||
|
||||
clear_blocks(s->block[0]);
|
||||
s->dsp.clear_blocks(s->block[0]);
|
||||
|
||||
s->mb_x= mb_x;
|
||||
s->mb_y= mb_y;
|
||||
@ -559,8 +559,8 @@ static int is_intra_more_likely(MpegEncContext *s){
|
||||
UINT8 *mb_ptr = s->current_picture[0] + mb_x*16 + mb_y*16*s->linesize;
|
||||
UINT8 *last_mb_ptr= s->last_picture [0] + mb_x*16 + mb_y*16*s->linesize;
|
||||
|
||||
is_intra_likely += pix_abs16x16(last_mb_ptr, mb_ptr , s->linesize);
|
||||
is_intra_likely -= pix_abs16x16(last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize);
|
||||
is_intra_likely += s->dsp.pix_abs16x16(last_mb_ptr, mb_ptr , s->linesize);
|
||||
is_intra_likely -= s->dsp.pix_abs16x16(last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize);
|
||||
}else{
|
||||
if(s->mbintra_table[i]) //HACK (this is allways inited but we should use mb_type[])
|
||||
is_intra_likely++;
|
||||
@ -738,7 +738,7 @@ void ff_error_resilience(MpegEncContext *s){
|
||||
s->mv[0][0][1] = s->motion_val[ mb_x*2+1 + (mb_y*2+1)*s->block_wrap[0] ][1];
|
||||
}
|
||||
|
||||
clear_blocks(s->block[0]);
|
||||
s->dsp.clear_blocks(s->block[0]);
|
||||
|
||||
s->mb_x= mb_x;
|
||||
s->mb_y= mb_y;
|
||||
@ -779,7 +779,7 @@ void ff_error_resilience(MpegEncContext *s){
|
||||
s->mv[1][0][1]= 0;
|
||||
}
|
||||
|
||||
clear_blocks(s->block[0]);
|
||||
s->dsp.clear_blocks(s->block[0]);
|
||||
s->mb_x= mb_x;
|
||||
s->mb_y= mb_y;
|
||||
MPV_decode_mb(s, s->block);
|
||||
|
@ -538,7 +538,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
|
||||
if(s->coded_order[i+1].pict_type!=B_TYPE) break;
|
||||
|
||||
b_pic= s->coded_order[i+1].picture[0] + offset;
|
||||
diff= pix_abs16x16(p_pic, b_pic, s->linesize);
|
||||
diff= s->dsp.pix_abs16x16(p_pic, b_pic, s->linesize);
|
||||
if(diff>s->qscale*70){ //FIXME check that 70 is optimal
|
||||
s->mb_skiped=0;
|
||||
break;
|
||||
|
@ -195,7 +195,7 @@ static int decode_slice(MpegEncContext *s){
|
||||
}
|
||||
|
||||
/* DCT & quantize */
|
||||
clear_blocks(s->block[0]);
|
||||
s->dsp.clear_blocks(s->block[0]);
|
||||
|
||||
s->mv_dir = MV_DIR_FORWARD;
|
||||
s->mv_type = MV_TYPE_16X16;
|
||||
|
@ -22,7 +22,7 @@
|
||||
#include "../dsputil.h"
|
||||
|
||||
int mm_flags; /* multimedia extension flags */
|
||||
|
||||
/* FIXME use them in static form */
|
||||
int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
|
||||
int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
|
||||
int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
|
||||
@ -242,7 +242,7 @@ static void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, in
|
||||
);
|
||||
}
|
||||
|
||||
static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
|
||||
void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
|
||||
{
|
||||
const DCTELEM *p;
|
||||
UINT8 *pix;
|
||||
@ -297,7 +297,7 @@ static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line
|
||||
:"memory");
|
||||
}
|
||||
|
||||
static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
|
||||
void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
|
||||
{
|
||||
const DCTELEM *p;
|
||||
UINT8 *pix;
|
||||
@ -457,7 +457,7 @@ static int pix_sum16_mmx(UINT8 * pix, int line_size){
|
||||
static void just_return() { return; }
|
||||
#endif
|
||||
|
||||
void dsputil_init_mmx(void)
|
||||
void dsputil_init_mmx(DSPContext* c, unsigned mask)
|
||||
{
|
||||
mm_flags = mm_support();
|
||||
#if 0
|
||||
@ -476,112 +476,112 @@ void dsputil_init_mmx(void)
|
||||
#endif
|
||||
|
||||
if (mm_flags & MM_MMX) {
|
||||
get_pixels = get_pixels_mmx;
|
||||
diff_pixels = diff_pixels_mmx;
|
||||
put_pixels_clamped = put_pixels_clamped_mmx;
|
||||
add_pixels_clamped = add_pixels_clamped_mmx;
|
||||
clear_blocks= clear_blocks_mmx;
|
||||
pix_sum= pix_sum16_mmx;
|
||||
c->get_pixels = get_pixels_mmx;
|
||||
c->diff_pixels = diff_pixels_mmx;
|
||||
c->put_pixels_clamped = put_pixels_clamped_mmx;
|
||||
c->add_pixels_clamped = add_pixels_clamped_mmx;
|
||||
c->clear_blocks = clear_blocks_mmx;
|
||||
c->pix_sum = pix_sum16_mmx;
|
||||
|
||||
pix_abs16x16 = pix_abs16x16_mmx;
|
||||
pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
|
||||
pix_abs16x16_y2 = pix_abs16x16_y2_mmx;
|
||||
pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
|
||||
pix_abs8x8 = pix_abs8x8_mmx;
|
||||
pix_abs8x8_x2 = pix_abs8x8_x2_mmx;
|
||||
pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
|
||||
pix_abs8x8_xy2= pix_abs8x8_xy2_mmx;
|
||||
c->pix_abs16x16 = pix_abs16x16_mmx;
|
||||
c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
|
||||
c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx;
|
||||
c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
|
||||
c->pix_abs8x8 = pix_abs8x8_mmx;
|
||||
c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx;
|
||||
c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
|
||||
c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx;
|
||||
|
||||
put_pixels_tab[0][0] = put_pixels16_mmx;
|
||||
put_pixels_tab[0][1] = put_pixels16_x2_mmx;
|
||||
put_pixels_tab[0][2] = put_pixels16_y2_mmx;
|
||||
put_pixels_tab[0][3] = put_pixels16_xy2_mmx;
|
||||
c->put_pixels_tab[0][0] = put_pixels16_mmx;
|
||||
c->put_pixels_tab[0][1] = put_pixels16_x2_mmx;
|
||||
c->put_pixels_tab[0][2] = put_pixels16_y2_mmx;
|
||||
c->put_pixels_tab[0][3] = put_pixels16_xy2_mmx;
|
||||
|
||||
put_no_rnd_pixels_tab[0][0] = put_pixels16_mmx;
|
||||
put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx;
|
||||
put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx;
|
||||
put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_mmx;
|
||||
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmx;
|
||||
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx;
|
||||
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx;
|
||||
c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_mmx;
|
||||
|
||||
avg_pixels_tab[0][0] = avg_pixels16_mmx;
|
||||
avg_pixels_tab[0][1] = avg_pixels16_x2_mmx;
|
||||
avg_pixels_tab[0][2] = avg_pixels16_y2_mmx;
|
||||
avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx;
|
||||
c->avg_pixels_tab[0][0] = avg_pixels16_mmx;
|
||||
c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx;
|
||||
c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx;
|
||||
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx;
|
||||
|
||||
avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_mmx;
|
||||
avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_mmx;
|
||||
avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_mmx;
|
||||
avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_mmx;
|
||||
c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_mmx;
|
||||
c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_mmx;
|
||||
c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_mmx;
|
||||
c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_mmx;
|
||||
|
||||
put_pixels_tab[1][0] = put_pixels8_mmx;
|
||||
put_pixels_tab[1][1] = put_pixels8_x2_mmx;
|
||||
put_pixels_tab[1][2] = put_pixels8_y2_mmx;
|
||||
put_pixels_tab[1][3] = put_pixels8_xy2_mmx;
|
||||
c->put_pixels_tab[1][0] = put_pixels8_mmx;
|
||||
c->put_pixels_tab[1][1] = put_pixels8_x2_mmx;
|
||||
c->put_pixels_tab[1][2] = put_pixels8_y2_mmx;
|
||||
c->put_pixels_tab[1][3] = put_pixels8_xy2_mmx;
|
||||
|
||||
put_no_rnd_pixels_tab[1][0] = put_pixels8_mmx;
|
||||
put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx;
|
||||
put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx;
|
||||
put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_mmx;
|
||||
c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mmx;
|
||||
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx;
|
||||
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx;
|
||||
c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_mmx;
|
||||
|
||||
avg_pixels_tab[1][0] = avg_pixels8_mmx;
|
||||
avg_pixels_tab[1][1] = avg_pixels8_x2_mmx;
|
||||
avg_pixels_tab[1][2] = avg_pixels8_y2_mmx;
|
||||
avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx;
|
||||
c->avg_pixels_tab[1][0] = avg_pixels8_mmx;
|
||||
c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx;
|
||||
c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx;
|
||||
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx;
|
||||
|
||||
avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_mmx;
|
||||
avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx;
|
||||
avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx;
|
||||
avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx;
|
||||
c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_mmx;
|
||||
c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx;
|
||||
c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx;
|
||||
c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx;
|
||||
|
||||
if (mm_flags & MM_MMXEXT) {
|
||||
pix_abs16x16 = pix_abs16x16_mmx2;
|
||||
pix_abs16x16_x2 = pix_abs16x16_x2_mmx2;
|
||||
pix_abs16x16_y2 = pix_abs16x16_y2_mmx2;
|
||||
pix_abs16x16_xy2= pix_abs16x16_xy2_mmx2;
|
||||
c->pix_abs16x16 = pix_abs16x16_mmx2;
|
||||
c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx2;
|
||||
c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx2;
|
||||
c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx2;
|
||||
|
||||
pix_abs8x8 = pix_abs8x8_mmx2;
|
||||
pix_abs8x8_x2 = pix_abs8x8_x2_mmx2;
|
||||
pix_abs8x8_y2 = pix_abs8x8_y2_mmx2;
|
||||
pix_abs8x8_xy2= pix_abs8x8_xy2_mmx2;
|
||||
c->pix_abs8x8 = pix_abs8x8_mmx2;
|
||||
c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx2;
|
||||
c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx2;
|
||||
c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2;
|
||||
|
||||
put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
|
||||
put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
|
||||
put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
|
||||
put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
|
||||
c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
|
||||
c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
|
||||
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
|
||||
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
|
||||
|
||||
avg_pixels_tab[0][0] = avg_pixels16_mmx2;
|
||||
avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2;
|
||||
avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2;
|
||||
avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
|
||||
c->avg_pixels_tab[0][0] = avg_pixels16_mmx2;
|
||||
c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2;
|
||||
c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2;
|
||||
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
|
||||
|
||||
put_pixels_tab[1][1] = put_pixels8_x2_mmx2;
|
||||
put_pixels_tab[1][2] = put_pixels8_y2_mmx2;
|
||||
put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
|
||||
put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
|
||||
c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2;
|
||||
c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2;
|
||||
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
|
||||
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
|
||||
|
||||
avg_pixels_tab[1][0] = avg_pixels8_mmx2;
|
||||
avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
|
||||
avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
|
||||
avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
|
||||
c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;
|
||||
c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
|
||||
c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
|
||||
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
|
||||
} else if (mm_flags & MM_3DNOW) {
|
||||
put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
|
||||
put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
|
||||
put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
|
||||
put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow;
|
||||
c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
|
||||
c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
|
||||
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
|
||||
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow;
|
||||
|
||||
avg_pixels_tab[0][0] = avg_pixels16_3dnow;
|
||||
avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
|
||||
avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
|
||||
avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
|
||||
c->avg_pixels_tab[0][0] = avg_pixels16_3dnow;
|
||||
c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
|
||||
c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
|
||||
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
|
||||
|
||||
put_pixels_tab[1][1] = put_pixels8_x2_3dnow;
|
||||
put_pixels_tab[1][2] = put_pixels8_y2_3dnow;
|
||||
put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow;
|
||||
put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow;
|
||||
c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow;
|
||||
c->put_pixels_tab[1][2] = put_pixels8_y2_3dnow;
|
||||
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow;
|
||||
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow;
|
||||
|
||||
avg_pixels_tab[1][0] = avg_pixels8_3dnow;
|
||||
avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow;
|
||||
avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow;
|
||||
avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
|
||||
c->avg_pixels_tab[1][0] = avg_pixels8_3dnow;
|
||||
c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow;
|
||||
c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow;
|
||||
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
|
||||
}
|
||||
}
|
||||
|
||||
@ -624,25 +624,24 @@ void dsputil_init_mmx(void)
|
||||
/* remove any non bit exact operation (testing purpose). NOTE that
|
||||
this function should be kept as small as possible because it is
|
||||
always difficult to test automatically non bit exact cases. */
|
||||
void dsputil_set_bit_exact_mmx(void)
|
||||
void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask)
|
||||
{
|
||||
if (mm_flags & MM_MMX) {
|
||||
|
||||
/* MMX2 & 3DNOW */
|
||||
put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx;
|
||||
put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx;
|
||||
avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx;
|
||||
put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx;
|
||||
put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx;
|
||||
avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx;
|
||||
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx;
|
||||
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx;
|
||||
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx;
|
||||
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx;
|
||||
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx;
|
||||
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx;
|
||||
|
||||
if (mm_flags & MM_MMXEXT) {
|
||||
pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
|
||||
pix_abs16x16_y2 = pix_abs16x16_y2_mmx;
|
||||
pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
|
||||
pix_abs8x8_x2 = pix_abs8x8_x2_mmx;
|
||||
pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
|
||||
pix_abs8x8_xy2= pix_abs8x8_xy2_mmx;
|
||||
c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx;
|
||||
c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx;
|
||||
c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
|
||||
c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx;
|
||||
c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
|
||||
c->pix_abs8x8_xy2= pix_abs8x8_xy2_mmx;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -88,8 +88,8 @@ static int pix_norm(UINT8 * pix1, UINT8 * pix2, int line_size)
|
||||
return s;
|
||||
}
|
||||
|
||||
static void no_motion_search(MpegEncContext * s,
|
||||
int *mx_ptr, int *my_ptr)
|
||||
static inline void no_motion_search(MpegEncContext * s,
|
||||
int *mx_ptr, int *my_ptr)
|
||||
{
|
||||
*mx_ptr = 16 * s->mb_x;
|
||||
*my_ptr = 16 * s->mb_y;
|
||||
@ -123,7 +123,7 @@ static int full_motion_search(MpegEncContext * s,
|
||||
my = 0;
|
||||
for (y = y1; y <= y2; y++) {
|
||||
for (x = x1; x <= x2; x++) {
|
||||
d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x,
|
||||
d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x,
|
||||
s->linesize);
|
||||
if (d < dmin ||
|
||||
(d == dmin &&
|
||||
@ -188,7 +188,7 @@ static int log_motion_search(MpegEncContext * s,
|
||||
do {
|
||||
for (y = y1; y <= y2; y += range) {
|
||||
for (x = x1; x <= x2; x += range) {
|
||||
d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
|
||||
d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
|
||||
if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
|
||||
dmin = d;
|
||||
mx = x;
|
||||
@ -268,7 +268,7 @@ static int phods_motion_search(MpegEncContext * s,
|
||||
|
||||
lastx = x;
|
||||
for (x = x1; x <= x2; x += range) {
|
||||
d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
|
||||
d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
|
||||
if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
|
||||
dminx = d;
|
||||
mx = x;
|
||||
@ -277,7 +277,7 @@ static int phods_motion_search(MpegEncContext * s,
|
||||
|
||||
x = lastx;
|
||||
for (y = y1; y <= y2; y += range) {
|
||||
d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
|
||||
d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
|
||||
if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
|
||||
dminy = d;
|
||||
my = y;
|
||||
@ -324,7 +324,7 @@ static int phods_motion_search(MpegEncContext * s,
|
||||
const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
|
||||
const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
|
||||
if(map[index]!=key){\
|
||||
d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
|
||||
d = s->dsp.pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
|
||||
d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
|
||||
COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
|
||||
map[index]= key;\
|
||||
@ -355,7 +355,7 @@ static int phods_motion_search(MpegEncContext * s,
|
||||
const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
|
||||
const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
|
||||
if(map[index]!=key){\
|
||||
d = pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
|
||||
d = s->dsp.pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
|
||||
d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
|
||||
COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
|
||||
map[index]= key;\
|
||||
@ -590,7 +590,7 @@ static int epzs_motion_search(MpegEncContext * s,
|
||||
|
||||
map_generation= update_map_generation(s);
|
||||
|
||||
dmin = pix_abs16x16(new_pic, old_pic, pic_stride);
|
||||
dmin = s->dsp.pix_abs16x16(new_pic, old_pic, pic_stride);
|
||||
map[0]= map_generation;
|
||||
score_map[0]= dmin;
|
||||
|
||||
@ -644,11 +644,11 @@ static int epzs_motion_search(MpegEncContext * s,
|
||||
if(s->me_method==ME_EPZS)
|
||||
dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride,
|
||||
pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax,
|
||||
shift, map, score_map, map_generation, pix_abs16x16);
|
||||
shift, map, score_map, map_generation, s->dsp.pix_abs16x16);
|
||||
else
|
||||
dmin= cross_search(s, best, dmin, new_pic, old_pic, pic_stride,
|
||||
pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax,
|
||||
shift, map, score_map, map_generation, pix_abs16x16);
|
||||
shift, map, score_map, map_generation, s->dsp.pix_abs16x16);
|
||||
//check(best[0],best[1],0, b1)
|
||||
*mx_ptr= best[0];
|
||||
*my_ptr= best[1];
|
||||
@ -683,7 +683,7 @@ static int epzs_motion_search4(MpegEncContext * s, int block,
|
||||
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
|
||||
/* first line */
|
||||
if ((s->mb_y == 0 || s->first_slice_line) && block<2) {
|
||||
CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
|
||||
CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
|
||||
CHECK_MV4(P_LAST[0]>>shift, P_LAST[1]>>shift)
|
||||
CHECK_MV4(P_MV1[0]>>shift, P_MV1[1]>>shift)
|
||||
}else{
|
||||
@ -705,11 +705,11 @@ static int epzs_motion_search4(MpegEncContext * s, int block,
|
||||
if(s->me_method==ME_EPZS)
|
||||
dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride,
|
||||
pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax,
|
||||
shift, map, score_map, map_generation, pix_abs8x8);
|
||||
shift, map, score_map, map_generation, s->dsp.pix_abs8x8);
|
||||
else
|
||||
dmin= cross_search(s, best, dmin, new_pic, old_pic, pic_stride,
|
||||
pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax,
|
||||
shift, map, score_map, map_generation, pix_abs8x8);
|
||||
shift, map, score_map, map_generation, s->dsp.pix_abs8x8);
|
||||
|
||||
*mx_ptr= best[0];
|
||||
*my_ptr= best[1];
|
||||
@ -1023,8 +1023,8 @@ static inline int mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, in
|
||||
dmin4 = epzs_motion_search4(s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, ref_picture);
|
||||
|
||||
dmin4= fast_halfpel_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4,
|
||||
pred_x4, pred_y4, ref_picture, pix_abs8x8_x2,
|
||||
pix_abs8x8_y2, pix_abs8x8_xy2, block);
|
||||
pred_x4, pred_y4, ref_picture, s->dsp.pix_abs8x8_x2,
|
||||
s->dsp.pix_abs8x8_y2, s->dsp.pix_abs8x8_xy2, block);
|
||||
|
||||
s->motion_val[ s->block_index[block] ][0]= mx4;
|
||||
s->motion_val[ s->block_index[block] ][1]= my4;
|
||||
@ -1133,9 +1133,10 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
|
||||
/* At this point (mx,my) are full-pell and the relative displacement */
|
||||
ppix = ref_picture + ((yy+my) * s->linesize) + (xx+mx);
|
||||
|
||||
sum = pix_sum(pix, s->linesize);
|
||||
sum = s->dsp.pix_sum(pix, s->linesize);
|
||||
|
||||
varc = (pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
|
||||
varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
|
||||
// FIXME: MMX OPTIMIZE
|
||||
vard = (pix_norm(pix, ppix, s->linesize)+128)>>8;
|
||||
|
||||
//printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
|
||||
@ -1162,12 +1163,12 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
|
||||
mb_type|= MB_TYPE_INTER;
|
||||
if(s->me_method >= ME_EPZS)
|
||||
fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
|
||||
pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2,
|
||||
pix_abs16x16_xy2, 0);
|
||||
pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2,
|
||||
s->dsp.pix_abs16x16_y2, s->dsp.pix_abs16x16_xy2, 0);
|
||||
else
|
||||
halfpel_motion_search( s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
|
||||
pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2,
|
||||
pix_abs16x16_xy2, 0);
|
||||
pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2,
|
||||
s->dsp.pix_abs16x16_y2, s->dsp.pix_abs16x16_xy2, 0);
|
||||
}else{
|
||||
mx <<=1;
|
||||
my <<=1;
|
||||
@ -1186,13 +1187,13 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
|
||||
mb_type|= MB_TYPE_INTER;
|
||||
if (s->me_method != ME_ZERO) {
|
||||
if(s->me_method >= ME_EPZS)
|
||||
dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
|
||||
pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2,
|
||||
pix_abs16x16_xy2, 0);
|
||||
dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
|
||||
pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2,
|
||||
s->dsp.pix_abs16x16_xy2, 0);
|
||||
else
|
||||
dmin= halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
|
||||
pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2,
|
||||
pix_abs16x16_xy2, 0);
|
||||
pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2,
|
||||
s->dsp.pix_abs16x16_xy2, 0);
|
||||
if((s->flags&CODEC_FLAG_4MV)
|
||||
&& !s->skip_me && varc>50 && vard>10){
|
||||
int dmin4= mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
|
||||
@ -1304,8 +1305,8 @@ int ff_estimate_motion_b(MpegEncContext * s,
|
||||
}
|
||||
|
||||
dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
|
||||
pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2,
|
||||
pix_abs16x16_xy2, 0);
|
||||
pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2,
|
||||
s->dsp.pix_abs16x16_xy2, 0);
|
||||
//printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my);
|
||||
// s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
|
||||
mv_table[mot_xy][0]= mx;
|
||||
@ -1343,7 +1344,7 @@ static inline int check_bidir_mv(MpegEncContext * s,
|
||||
dxy&= 1;
|
||||
|
||||
ptr = s->last_picture[0] + (src_y * s->linesize) + src_x;
|
||||
put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
|
||||
s->dsp.put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
|
||||
|
||||
fbmin += (mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->qscale;
|
||||
|
||||
@ -1358,9 +1359,9 @@ static inline int check_bidir_mv(MpegEncContext * s,
|
||||
dxy&= 1;
|
||||
|
||||
ptr = s->next_picture[0] + (src_y * s->linesize) + src_x;
|
||||
avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
|
||||
s->dsp.avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
|
||||
|
||||
fbmin += pix_abs16x16(s->new_picture[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize);
|
||||
fbmin += s->dsp.pix_abs16x16(s->new_picture[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize);
|
||||
return fbmin;
|
||||
}
|
||||
|
||||
@ -1443,7 +1444,7 @@ static inline int direct_search(MpegEncContext * s,
|
||||
if (src_y == height) dxy &= ~2;
|
||||
|
||||
ptr = s->last_picture[0] + (src_y * s->linesize) + src_x;
|
||||
put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
|
||||
s->dsp.put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
|
||||
|
||||
dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
|
||||
src_x = (mb_x + bx) * 16 + (motion_bx >> 1);
|
||||
@ -1453,7 +1454,7 @@ static inline int direct_search(MpegEncContext * s,
|
||||
src_y = clip(src_y, -16, height);
|
||||
if (src_y == height) dxy &= ~2;
|
||||
|
||||
avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
|
||||
s->dsp.avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1623,7 +1623,7 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
|
||||
s->mb_incr= 1;
|
||||
|
||||
for(;;) {
|
||||
clear_blocks(s->block[0]);
|
||||
s->dsp.clear_blocks(s->block[0]);
|
||||
|
||||
ret = mpeg_decode_mb(s, s->block);
|
||||
dprintf("ret=%d\n", ret);
|
||||
|
@ -57,7 +57,7 @@ static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int bl
|
||||
/* for jpeg fast DCT */
|
||||
#define CONST_BITS 14
|
||||
|
||||
static const unsigned short aanscales[64] = {
|
||||
static const uint16_t aanscales[64] = {
|
||||
/* precomputed values scaled up by 14 bits */
|
||||
16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
|
||||
22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
|
||||
@ -70,7 +70,7 @@ static const unsigned short aanscales[64] = {
|
||||
};
|
||||
|
||||
/* Input permutation for the simple_idct_mmx */
|
||||
static const UINT8 simple_mmx_permutation[64]={
|
||||
static const uint8_t simple_mmx_permutation[64]={
|
||||
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
|
||||
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
|
||||
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
|
||||
@ -81,7 +81,7 @@ static const UINT8 simple_mmx_permutation[64]={
|
||||
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
|
||||
};
|
||||
|
||||
static UINT8 h263_chroma_roundtab[16] = {
|
||||
static const uint8_t h263_chroma_roundtab[16] = {
|
||||
0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
|
||||
};
|
||||
|
||||
@ -172,16 +172,19 @@ void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scanta
|
||||
}
|
||||
|
||||
/* XXX: those functions should be suppressed ASAP when all IDCTs are
|
||||
converted */
|
||||
converted */
|
||||
// *FIXME* this is ugly hack using local static
|
||||
static void (*ff_put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
|
||||
static void (*ff_add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
|
||||
static void ff_jref_idct_put(UINT8 *dest, int line_size, DCTELEM *block)
|
||||
{
|
||||
j_rev_dct (block);
|
||||
put_pixels_clamped(block, dest, line_size);
|
||||
ff_put_pixels_clamped(block, dest, line_size);
|
||||
}
|
||||
static void ff_jref_idct_add(UINT8 *dest, int line_size, DCTELEM *block)
|
||||
{
|
||||
j_rev_dct (block);
|
||||
add_pixels_clamped(block, dest, line_size);
|
||||
ff_add_pixels_clamped(block, dest, line_size);
|
||||
}
|
||||
|
||||
/* init common dct for both encoder and decoder */
|
||||
@ -189,6 +192,9 @@ int DCT_common_init(MpegEncContext *s)
|
||||
{
|
||||
int i;
|
||||
|
||||
ff_put_pixels_clamped = s->dsp.put_pixels_clamped;
|
||||
ff_add_pixels_clamped = s->dsp.add_pixels_clamped;
|
||||
|
||||
s->dct_unquantize_h263 = dct_unquantize_h263_c;
|
||||
s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c;
|
||||
s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_c;
|
||||
@ -268,6 +274,7 @@ int MPV_common_init(MpegEncContext *s)
|
||||
UINT8 *pict;
|
||||
int y_size, c_size, yc_size, i;
|
||||
|
||||
dsputil_init(&s->dsp, s->avctx->dsp_mask);
|
||||
DCT_common_init(s);
|
||||
|
||||
s->flags= s->avctx->flags;
|
||||
@ -275,22 +282,22 @@ int MPV_common_init(MpegEncContext *s)
|
||||
s->mb_width = (s->width + 15) / 16;
|
||||
s->mb_height = (s->height + 15) / 16;
|
||||
|
||||
y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
|
||||
c_size = (s->mb_width + 2) * (s->mb_height + 2);
|
||||
yc_size = y_size + 2 * c_size;
|
||||
|
||||
/* set default edge pos, will be overriden in decode_header if needed */
|
||||
s->h_edge_pos= s->mb_width*16;
|
||||
s->v_edge_pos= s->mb_height*16;
|
||||
|
||||
s->mb_num = s->mb_width * s->mb_height;
|
||||
|
||||
y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
|
||||
c_size = (s->mb_width + 2) * (s->mb_height + 2);
|
||||
yc_size = y_size + 2 * c_size;
|
||||
|
||||
/* convert fourcc to upper case */
|
||||
s->avctx->fourcc= toupper( s->avctx->fourcc &0xFF)
|
||||
+ (toupper((s->avctx->fourcc>>8 )&0xFF)<<8 )
|
||||
+ (toupper((s->avctx->fourcc>>16)&0xFF)<<16)
|
||||
+ (toupper((s->avctx->fourcc>>24)&0xFF)<<24);
|
||||
|
||||
s->mb_num = s->mb_width * s->mb_height;
|
||||
|
||||
if(!(s->flags&CODEC_FLAG_DR1)){
|
||||
s->linesize = s->mb_width * 16 + 2 * EDGE_WIDTH;
|
||||
s->uvlinesize = s->mb_width * 8 + EDGE_WIDTH;
|
||||
@ -1133,16 +1140,16 @@ static inline void gmc1_motion(MpegEncContext *s,
|
||||
}
|
||||
|
||||
if((motion_x|motion_y)&7){
|
||||
ff_gmc1(dest_y , ptr , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
|
||||
ff_gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
|
||||
s->dsp.gmc1(dest_y , ptr , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
|
||||
s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
|
||||
}else{
|
||||
int dxy;
|
||||
|
||||
dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
|
||||
if (s->no_rounding){
|
||||
put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
|
||||
s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
|
||||
}else{
|
||||
put_pixels_tab [0][dxy](dest_y, ptr, linesize, 16);
|
||||
s->dsp.put_pixels_tab [0][dxy](dest_y, ptr, linesize, 16);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1167,14 +1174,14 @@ static inline void gmc1_motion(MpegEncContext *s,
|
||||
emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
|
||||
ptr= s->edge_emu_buffer;
|
||||
}
|
||||
ff_gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
|
||||
s->dsp.gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
|
||||
|
||||
ptr = ref_picture[2] + offset;
|
||||
if(emu){
|
||||
emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
|
||||
ptr= s->edge_emu_buffer;
|
||||
}
|
||||
ff_gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
|
||||
s->dsp.gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
|
||||
|
||||
return;
|
||||
}
|
||||
@ -1199,14 +1206,14 @@ static inline void gmc_motion(MpegEncContext *s,
|
||||
ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
|
||||
oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
|
||||
|
||||
ff_gmc(dest_y, ptr, linesize, 16,
|
||||
s->dsp.gmc(dest_y, ptr, linesize, 16,
|
||||
ox,
|
||||
oy,
|
||||
s->sprite_delta[0][0], s->sprite_delta[0][1],
|
||||
s->sprite_delta[1][0], s->sprite_delta[1][1],
|
||||
a+1, (1<<(2*a+1)) - s->no_rounding,
|
||||
s->h_edge_pos, s->v_edge_pos);
|
||||
ff_gmc(dest_y+8, ptr, linesize, 16,
|
||||
s->dsp.gmc(dest_y+8, ptr, linesize, 16,
|
||||
ox + s->sprite_delta[0][0]*8,
|
||||
oy + s->sprite_delta[1][0]*8,
|
||||
s->sprite_delta[0][0], s->sprite_delta[0][1],
|
||||
@ -1224,7 +1231,7 @@ static inline void gmc_motion(MpegEncContext *s,
|
||||
oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
|
||||
|
||||
ptr = ref_picture[1] + (src_offset>>1);
|
||||
ff_gmc(dest_cb, ptr, uvlinesize, 8,
|
||||
s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
|
||||
ox,
|
||||
oy,
|
||||
s->sprite_delta[0][0], s->sprite_delta[0][1],
|
||||
@ -1233,7 +1240,7 @@ static inline void gmc_motion(MpegEncContext *s,
|
||||
s->h_edge_pos>>1, s->v_edge_pos>>1);
|
||||
|
||||
ptr = ref_picture[2] + (src_offset>>1);
|
||||
ff_gmc(dest_cr, ptr, uvlinesize, 8,
|
||||
s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
|
||||
ox,
|
||||
oy,
|
||||
s->sprite_delta[0][0], s->sprite_delta[0][1],
|
||||
@ -1860,17 +1867,17 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
|
||||
/* decoding or more than one mb_type (MC was allready done otherwise) */
|
||||
if((!s->encoding) || (s->mb_type[mb_xy]&(s->mb_type[mb_xy]-1))){
|
||||
if ((!s->no_rounding) || s->pict_type==B_TYPE){
|
||||
op_pix = put_pixels_tab;
|
||||
op_qpix= put_qpel_pixels_tab;
|
||||
op_pix = s->dsp.put_pixels_tab;
|
||||
op_qpix= s->dsp.put_qpel_pixels_tab;
|
||||
}else{
|
||||
op_pix = put_no_rnd_pixels_tab;
|
||||
op_qpix= put_no_rnd_qpel_pixels_tab;
|
||||
op_pix = s->dsp.put_no_rnd_pixels_tab;
|
||||
op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
|
||||
}
|
||||
|
||||
if (s->mv_dir & MV_DIR_FORWARD) {
|
||||
MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix, op_qpix);
|
||||
op_pix = avg_pixels_tab;
|
||||
op_qpix= avg_qpel_pixels_tab;
|
||||
op_pix = s->dsp.avg_pixels_tab;
|
||||
op_qpix= s->dsp.avg_qpel_pixels_tab;
|
||||
}
|
||||
if (s->mv_dir & MV_DIR_BACKWARD) {
|
||||
MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture, op_pix, op_qpix);
|
||||
@ -2224,10 +2231,10 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
|
||||
s->interlaced_dct=0;
|
||||
}
|
||||
|
||||
get_pixels(s->block[0], ptr , wrap_y);
|
||||
get_pixels(s->block[1], ptr + 8, wrap_y);
|
||||
get_pixels(s->block[2], ptr + dct_offset , wrap_y);
|
||||
get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y);
|
||||
s->dsp.get_pixels(s->block[0], ptr , wrap_y);
|
||||
s->dsp.get_pixels(s->block[1], ptr + 8, wrap_y);
|
||||
s->dsp.get_pixels(s->block[2], ptr + dct_offset , wrap_y);
|
||||
s->dsp.get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y);
|
||||
|
||||
if(s->flags&CODEC_FLAG_GRAY){
|
||||
skip_dct[4]= 1;
|
||||
@ -2239,14 +2246,14 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
|
||||
emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
|
||||
ptr= s->edge_emu_buffer;
|
||||
}
|
||||
get_pixels(s->block[4], ptr, wrap_c);
|
||||
s->dsp.get_pixels(s->block[4], ptr, wrap_c);
|
||||
|
||||
ptr = s->new_picture[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
|
||||
if(emu){
|
||||
emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
|
||||
ptr= s->edge_emu_buffer;
|
||||
}
|
||||
get_pixels(s->block[5], ptr, wrap_c);
|
||||
s->dsp.get_pixels(s->block[5], ptr, wrap_c);
|
||||
}
|
||||
}else{
|
||||
op_pixels_func (*op_pix)[4];
|
||||
@ -2266,17 +2273,17 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
|
||||
ptr_cr = s->new_picture[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
|
||||
|
||||
if ((!s->no_rounding) || s->pict_type==B_TYPE){
|
||||
op_pix = put_pixels_tab;
|
||||
op_qpix= put_qpel_pixels_tab;
|
||||
op_pix = s->dsp.put_pixels_tab;
|
||||
op_qpix= s->dsp.put_qpel_pixels_tab;
|
||||
}else{
|
||||
op_pix = put_no_rnd_pixels_tab;
|
||||
op_qpix= put_no_rnd_qpel_pixels_tab;
|
||||
op_pix = s->dsp.put_no_rnd_pixels_tab;
|
||||
op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
|
||||
}
|
||||
|
||||
if (s->mv_dir & MV_DIR_FORWARD) {
|
||||
MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix, op_qpix);
|
||||
op_pix = avg_pixels_tab;
|
||||
op_qpix= avg_qpel_pixels_tab;
|
||||
op_pix = s->dsp.avg_pixels_tab;
|
||||
op_qpix= s->dsp.avg_qpel_pixels_tab;
|
||||
}
|
||||
if (s->mv_dir & MV_DIR_BACKWARD) {
|
||||
MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture, op_pix, op_qpix);
|
||||
@ -2305,10 +2312,10 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
|
||||
s->interlaced_dct=0;
|
||||
}
|
||||
|
||||
diff_pixels(s->block[0], ptr_y , dest_y , wrap_y);
|
||||
diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
|
||||
diff_pixels(s->block[2], ptr_y + dct_offset , dest_y + dct_offset , wrap_y);
|
||||
diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
|
||||
s->dsp.diff_pixels(s->block[0], ptr_y , dest_y , wrap_y);
|
||||
s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
|
||||
s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset , dest_y + dct_offset , wrap_y);
|
||||
s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
|
||||
|
||||
if(s->flags&CODEC_FLAG_GRAY){
|
||||
skip_dct[4]= 1;
|
||||
@ -2318,23 +2325,23 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
|
||||
emulated_edge_mc(s, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
|
||||
ptr_cb= s->edge_emu_buffer;
|
||||
}
|
||||
diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
|
||||
s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
|
||||
if(emu){
|
||||
emulated_edge_mc(s, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
|
||||
ptr_cr= s->edge_emu_buffer;
|
||||
}
|
||||
diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
|
||||
s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
|
||||
}
|
||||
|
||||
/* pre quantization */
|
||||
if(s->mc_mb_var[s->mb_width*mb_y+ mb_x]<2*s->qscale*s->qscale){
|
||||
//FIXME optimize
|
||||
if(pix_abs8x8(ptr_y , dest_y , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
|
||||
if(pix_abs8x8(ptr_y + 8, dest_y + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
|
||||
if(pix_abs8x8(ptr_y +dct_offset , dest_y +dct_offset , wrap_y) < 20*s->qscale) skip_dct[2]= 1;
|
||||
if(pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1;
|
||||
if(pix_abs8x8(ptr_cb , dest_cb , wrap_y) < 20*s->qscale) skip_dct[4]= 1;
|
||||
if(pix_abs8x8(ptr_cr , dest_cr , wrap_y) < 20*s->qscale) skip_dct[5]= 1;
|
||||
if(s->dsp.pix_abs8x8(ptr_y , dest_y , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
|
||||
if(s->dsp.pix_abs8x8(ptr_y + 8, dest_y + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
|
||||
if(s->dsp.pix_abs8x8(ptr_y +dct_offset , dest_y +dct_offset , wrap_y) < 20*s->qscale) skip_dct[2]= 1;
|
||||
if(s->dsp.pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1;
|
||||
if(s->dsp.pix_abs8x8(ptr_cb , dest_cb , wrap_y) < 20*s->qscale) skip_dct[4]= 1;
|
||||
if(s->dsp.pix_abs8x8(ptr_cr , dest_cr , wrap_y) < 20*s->qscale) skip_dct[5]= 1;
|
||||
#if 0
|
||||
{
|
||||
static int stat[7];
|
||||
@ -2601,9 +2608,9 @@ static void encode_picture(MpegEncContext *s, int picture_number)
|
||||
int yy = mb_y * 16;
|
||||
uint8_t *pix = s->new_picture[0] + (yy * s->linesize) + xx;
|
||||
int varc;
|
||||
int sum = pix_sum(pix, s->linesize);
|
||||
int sum = s->dsp.pix_sum(pix, s->linesize);
|
||||
|
||||
varc = (pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
|
||||
varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
|
||||
|
||||
s->mb_var [s->mb_width * mb_y + mb_x] = varc;
|
||||
s->mb_mean[s->mb_width * mb_y + mb_x] = (sum+128)>>8;
|
||||
|
@ -221,6 +221,7 @@ typedef struct MpegEncContext {
|
||||
int unrestricted_mv;
|
||||
int h263_long_vectors; /* use horrible h263v1 long vector mode */
|
||||
|
||||
DSPContext dsp; /* pointers for accelerated dsp fucntions */
|
||||
int f_code; /* forward MV resolution */
|
||||
int b_code; /* backward MV resolution for B Frames (mpeg4) */
|
||||
INT16 (*motion_val)[2]; /* used for MV prediction (4MV per MB) */
|
||||
|
@ -447,7 +447,7 @@ static int rv10_decode_packet(AVCodecContext *avctx,
|
||||
printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
|
||||
#endif
|
||||
|
||||
clear_blocks(s->block[0]);
|
||||
s->dsp.clear_blocks(s->block[0]);
|
||||
s->mv_dir = MV_DIR_FORWARD;
|
||||
s->mv_type = MV_TYPE_16X16;
|
||||
if (ff_h263_decode_mb(s, s->block) == SLICE_ERROR) {
|
||||
|
@ -804,7 +804,7 @@ static void svq1_skip_block (uint8_t *current, uint8_t *previous, int pitch, int
|
||||
}
|
||||
}
|
||||
|
||||
static int svq1_motion_inter_block (bit_buffer_t *bitbuf,
|
||||
static int svq1_motion_inter_block (MpegEncContext *s, bit_buffer_t *bitbuf,
|
||||
uint8_t *current, uint8_t *previous, int pitch,
|
||||
svq1_pmv_t *motion, int x, int y) {
|
||||
uint8_t *src;
|
||||
@ -839,12 +839,12 @@ static int svq1_motion_inter_block (bit_buffer_t *bitbuf,
|
||||
src = &previous[(x + (mv.x >> 1)) + (y + (mv.y >> 1))*pitch];
|
||||
dst = current;
|
||||
|
||||
put_pixels_tab[0][((mv.y & 1) << 1) | (mv.x & 1)](dst,src,pitch,16);
|
||||
s->dsp.put_pixels_tab[0][((mv.y & 1) << 1) | (mv.x & 1)](dst,src,pitch,16);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int svq1_motion_inter_4v_block (bit_buffer_t *bitbuf,
|
||||
static int svq1_motion_inter_4v_block (MpegEncContext *s, bit_buffer_t *bitbuf,
|
||||
uint8_t *current, uint8_t *previous, int pitch,
|
||||
svq1_pmv_t *motion,int x, int y) {
|
||||
uint8_t *src;
|
||||
@ -906,7 +906,7 @@ static int svq1_motion_inter_4v_block (bit_buffer_t *bitbuf,
|
||||
src = &previous[(x + (pmv[i]->x >> 1)) + (y + (pmv[i]->y >> 1))*pitch];
|
||||
dst = current;
|
||||
|
||||
put_pixels_tab[1][((pmv[i]->y & 1) << 1) | (pmv[i]->x & 1)](dst,src,pitch,8);
|
||||
s->dsp.put_pixels_tab[1][((pmv[i]->y & 1) << 1) | (pmv[i]->x & 1)](dst,src,pitch,8);
|
||||
|
||||
/* select next block */
|
||||
if (i & 1) {
|
||||
@ -921,7 +921,7 @@ static int svq1_motion_inter_4v_block (bit_buffer_t *bitbuf,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int svq1_decode_delta_block (bit_buffer_t *bitbuf,
|
||||
static int svq1_decode_delta_block (MpegEncContext *s, bit_buffer_t *bitbuf,
|
||||
uint8_t *current, uint8_t *previous, int pitch,
|
||||
svq1_pmv_t *motion, int x, int y) {
|
||||
uint32_t bit_cache;
|
||||
@ -951,7 +951,7 @@ static int svq1_decode_delta_block (bit_buffer_t *bitbuf,
|
||||
break;
|
||||
|
||||
case SVQ1_BLOCK_INTER:
|
||||
result = svq1_motion_inter_block (bitbuf, current, previous, pitch, motion, x, y);
|
||||
result = svq1_motion_inter_block (s, bitbuf, current, previous, pitch, motion, x, y);
|
||||
|
||||
if (result != 0)
|
||||
{
|
||||
@ -964,7 +964,7 @@ static int svq1_decode_delta_block (bit_buffer_t *bitbuf,
|
||||
break;
|
||||
|
||||
case SVQ1_BLOCK_INTER_4V:
|
||||
result = svq1_motion_inter_4v_block (bitbuf, current, previous, pitch, motion, x, y);
|
||||
result = svq1_motion_inter_4v_block (s, bitbuf, current, previous, pitch, motion, x, y);
|
||||
|
||||
if (result != 0)
|
||||
{
|
||||
@ -1142,8 +1142,8 @@ static int svq1_decode_frame(AVCodecContext *avctx,
|
||||
|
||||
for (y=0; y < height; y+=16) {
|
||||
for (x=0; x < width; x+=16) {
|
||||
result = svq1_decode_delta_block (&s->gb, ¤t[x], previous,
|
||||
linesize, pmv, x, y);
|
||||
result = svq1_decode_delta_block (s, &s->gb, ¤t[x], previous,
|
||||
linesize, pmv, x, y);
|
||||
if (result != 0)
|
||||
{
|
||||
#ifdef DEBUG_SVQ1
|
||||
|
Loading…
Reference in New Issue
Block a user