mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
MMX(2) optimized YUY2 input
avoid duplicate checks for formats by changeing them (Y8->Y800, IYUV->I420) Originally committed as revision 4482 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
This commit is contained in:
parent
1747031459
commit
6ff0ad6bfd
@ -17,7 +17,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
supported Input formats: YV12, I420, IYUV, YUY2, BGR32, BGR24 (grayscale soon too)
|
supported Input formats: YV12, I420, IYUV, YUY2, BGR32, BGR24, Y8, Y800
|
||||||
supported output formats: YV12, I420, IYUV, BGR15, BGR16, BGR24, BGR32 (grayscale soon too)
|
supported output formats: YV12, I420, IYUV, BGR15, BGR16, BGR24, BGR32 (grayscale soon too)
|
||||||
BGR15/16 support dithering
|
BGR15/16 support dithering
|
||||||
*/
|
*/
|
||||||
@ -58,13 +58,19 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
//FIXME replace this with something faster
|
//FIXME replace this with something faster
|
||||||
#define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV)
|
#define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420)
|
||||||
#define isYUV(x) ((x)==IMGFMT_YUY2 || isPlanarYUV(x))
|
#define isYUV(x) ((x)==IMGFMT_YUY2 || isPlanarYUV(x))
|
||||||
#define isHalfChrV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV)
|
#define isHalfChrV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420)
|
||||||
#define isHalfChrH(x) ((x)==IMGFMT_YUY2 || (x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV)
|
#define isHalfChrH(x) ((x)==IMGFMT_YUY2 || (x)==IMGFMT_YV12 || (x)==IMGFMT_I420)
|
||||||
#define isPacked(x) ((x)==IMGFMT_YUY2 || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24)
|
#define isPacked(x) ((x)==IMGFMT_YUY2 || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24)
|
||||||
|
#define isGray(x) ((x)==IMGFMT_Y800)
|
||||||
|
#define isSupportedIn(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_YUY2 \
|
||||||
|
|| (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24\
|
||||||
|
|| (x)==IMGFMT_Y800)
|
||||||
|
#define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 \
|
||||||
|
|| (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15)
|
||||||
|
|
||||||
#define RGB2YUV_SHIFT 8
|
#define RGB2YUV_SHIFT 16
|
||||||
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
|
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
|
||||||
#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
|
#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
|
||||||
#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
|
#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
|
||||||
@ -90,7 +96,8 @@ change the distance of the u & v buffer
|
|||||||
write special vertical cubic upscale version
|
write special vertical cubic upscale version
|
||||||
Optimize C code (yv12 / minmax)
|
Optimize C code (yv12 / minmax)
|
||||||
add support for packed pixel yuv input & output
|
add support for packed pixel yuv input & output
|
||||||
add support for Y8 input & output
|
add support for Y8 output
|
||||||
|
optimize bgr24 & bgr32
|
||||||
add BGR4 output support
|
add BGR4 output support
|
||||||
write special BGR->BGR scaler
|
write special BGR->BGR scaler
|
||||||
*/
|
*/
|
||||||
@ -118,6 +125,7 @@ static uint64_t __attribute__((aligned(8))) w02= 0x0002000200020002LL;
|
|||||||
static uint64_t __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;
|
static uint64_t __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;
|
||||||
static uint64_t __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;
|
static uint64_t __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;
|
||||||
static uint64_t __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;
|
static uint64_t __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;
|
||||||
|
static uint64_t __attribute__((aligned(8))) bm01010101=0x00FF00FF00FF00FFLL;
|
||||||
|
|
||||||
static volatile uint64_t __attribute__((aligned(8))) b5Dither;
|
static volatile uint64_t __attribute__((aligned(8))) b5Dither;
|
||||||
static volatile uint64_t __attribute__((aligned(8))) g5Dither;
|
static volatile uint64_t __attribute__((aligned(8))) g5Dither;
|
||||||
@ -198,7 +206,7 @@ void in_asm_used_var_warning_killer()
|
|||||||
{
|
{
|
||||||
volatile int i= yCoeff+vrCoeff+ubCoeff+vgCoeff+ugCoeff+bF8+bFC+w400+w80+w10+
|
volatile int i= yCoeff+vrCoeff+ubCoeff+vgCoeff+ugCoeff+bF8+bFC+w400+w80+w10+
|
||||||
bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+asm_yalpha1+ asm_uvalpha1+
|
bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+asm_yalpha1+ asm_uvalpha1+
|
||||||
M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0];
|
M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101;
|
||||||
if(i) i=0;
|
if(i) i=0;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -1114,12 +1122,16 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
|
|||||||
|
|
||||||
if(swScale==NULL) globalInit();
|
if(swScale==NULL) globalInit();
|
||||||
|
|
||||||
|
/* avoid dupplicate Formats, so we dont need to check to much */
|
||||||
|
if(srcFormat==IMGFMT_IYUV) srcFormat=IMGFMT_I420;
|
||||||
|
if(srcFormat==IMGFMT_Y8) srcFormat=IMGFMT_Y800;
|
||||||
|
|
||||||
|
if(!isSupportedIn(srcFormat)) return NULL;
|
||||||
|
if(!isSupportedOut(dstFormat)) return NULL;
|
||||||
|
|
||||||
/* sanity check */
|
/* sanity check */
|
||||||
if(srcW<4 || srcH<1 || dstW<8 || dstH<1) return NULL; //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
|
if(srcW<4 || srcH<1 || dstW<8 || dstH<1) return NULL; //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
|
||||||
|
|
||||||
// if(!isSupportedIn(srcFormat)) return NULL;
|
|
||||||
// if(!isSupportedOut(dstFormat)) return NULL;
|
|
||||||
|
|
||||||
if(!dstFilter) dstFilter= &dummyFilter;
|
if(!dstFilter) dstFilter= &dummyFilter;
|
||||||
if(!srcFilter) srcFilter= &dummyFilter;
|
if(!srcFilter) srcFilter= &dummyFilter;
|
||||||
|
|
||||||
|
@ -1535,9 +1535,26 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t *
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//FIXME yuy2* can read upto 7 samples to much
|
||||||
|
|
||||||
static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, int width)
|
static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, int width)
|
||||||
{
|
{
|
||||||
#ifdef HAVE_MMXFIXME
|
#ifdef HAVE_MMX
|
||||||
|
asm volatile(
|
||||||
|
"movq "MANGLE(bm01010101)", %%mm2\n\t"
|
||||||
|
"movl %0, %%eax \n\t"
|
||||||
|
"1: \n\t"
|
||||||
|
"movq (%1, %%eax,2), %%mm0 \n\t"
|
||||||
|
"movq 8(%1, %%eax,2), %%mm1 \n\t"
|
||||||
|
"pand %%mm2, %%mm0 \n\t"
|
||||||
|
"pand %%mm2, %%mm1 \n\t"
|
||||||
|
"packuswb %%mm1, %%mm0 \n\t"
|
||||||
|
"movq %%mm0, (%2, %%eax) \n\t"
|
||||||
|
"addl $8, %%eax \n\t"
|
||||||
|
" js 1b \n\t"
|
||||||
|
: : "g" (-width), "r" (src+width*2), "r" (dst+width)
|
||||||
|
: "%eax"
|
||||||
|
);
|
||||||
#else
|
#else
|
||||||
int i;
|
int i;
|
||||||
for(i=0; i<width; i++)
|
for(i=0; i<width; i++)
|
||||||
@ -1547,7 +1564,32 @@ static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, int width)
|
|||||||
|
|
||||||
static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
|
static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
|
||||||
{
|
{
|
||||||
#ifdef HAVE_MMXFIXME
|
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
|
||||||
|
asm volatile(
|
||||||
|
"movq "MANGLE(bm01010101)", %%mm4\n\t"
|
||||||
|
"movl %0, %%eax \n\t"
|
||||||
|
"1: \n\t"
|
||||||
|
"movq (%1, %%eax,4), %%mm0 \n\t"
|
||||||
|
"movq 8(%1, %%eax,4), %%mm1 \n\t"
|
||||||
|
"movq (%2, %%eax,4), %%mm2 \n\t"
|
||||||
|
"movq 8(%2, %%eax,4), %%mm3 \n\t"
|
||||||
|
PAVGB(%%mm2, %%mm0)
|
||||||
|
PAVGB(%%mm3, %%mm1)
|
||||||
|
"psrlw $8, %%mm0 \n\t"
|
||||||
|
"psrlw $8, %%mm1 \n\t"
|
||||||
|
"packuswb %%mm1, %%mm0 \n\t"
|
||||||
|
"movq %%mm0, %%mm1 \n\t"
|
||||||
|
"psrlw $8, %%mm0 \n\t"
|
||||||
|
"pand %%mm4, %%mm1 \n\t"
|
||||||
|
"packuswb %%mm0, %%mm0 \n\t"
|
||||||
|
"packuswb %%mm1, %%mm1 \n\t"
|
||||||
|
"movd %%mm0, (%4, %%eax) \n\t"
|
||||||
|
"movd %%mm1, (%3, %%eax) \n\t"
|
||||||
|
"addl $4, %%eax \n\t"
|
||||||
|
" js 1b \n\t"
|
||||||
|
: : "g" (-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width)
|
||||||
|
: "%eax"
|
||||||
|
);
|
||||||
#else
|
#else
|
||||||
int i;
|
int i;
|
||||||
for(i=0; i<width; i++)
|
for(i=0; i<width; i++)
|
||||||
@ -1954,6 +1996,10 @@ inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth, uint8_t *src1, u
|
|||||||
src1= formatConvBuffer;
|
src1= formatConvBuffer;
|
||||||
src2= formatConvBuffer+2048;
|
src2= formatConvBuffer+2048;
|
||||||
}
|
}
|
||||||
|
else if(isGray(srcFormat))
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef HAVE_MMX
|
#ifdef HAVE_MMX
|
||||||
// use the new MMX scaler if th mmx2 cant be used (its faster than the x86asm one)
|
// use the new MMX scaler if th mmx2 cant be used (its faster than the x86asm one)
|
||||||
@ -2170,7 +2216,7 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStridePar
|
|||||||
srcStride[1]=
|
srcStride[1]=
|
||||||
srcStride[2]= srcStrideParam[0]<<1;
|
srcStride[2]= srcStrideParam[0]<<1;
|
||||||
}
|
}
|
||||||
else if(c->srcFormat==IMGFMT_Y8){
|
else if(isGray(c->srcFormat)){
|
||||||
src[0]= srcParam[0];
|
src[0]= srcParam[0];
|
||||||
src[1]=
|
src[1]=
|
||||||
src[2]= NULL;
|
src[2]= NULL;
|
||||||
|
Loading…
Reference in New Issue
Block a user