diff --git a/postproc/swscale.c b/postproc/swscale.c index 8b8d899dd9..0583d1f82b 100644 --- a/postproc/swscale.c +++ b/postproc/swscale.c @@ -17,7 +17,7 @@ */ /* - supported Input formats: YV12, I420, IYUV (grayscale soon too) + supported Input formats: YV12, I420, IYUV, YUY2, BGR32, BGR24 (grayscale soon too) supported output formats: YV12, I420, IYUV, BGR15, BGR16, BGR24, BGR32 (grayscale soon too) BGR15/16 support dithering */ @@ -43,7 +43,7 @@ //#undef ARCH_X86 #define DITHER1XBPP -#define RET 0xC3 //near return opcode +#define RET 0xC3 //near return opcode for X86 #ifdef MP_DEBUG #define ASSERT(x) if(!(x)) { printf("ASSERT " #x " failed\n"); *((int*)0)=0; } @@ -58,10 +58,22 @@ #endif //FIXME replace this with something faster -#define isYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV) #define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV) +#define isYUV(x) ((x)==IMGFMT_YUY2 || isPlanarYUV(x)) #define isHalfChrV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV) -#define isHalfChrH(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV) +#define isHalfChrH(x) ((x)==IMGFMT_YUY2 || (x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV) +#define isPacked(x) ((x)==IMGFMT_YUY2 || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24) + +#define RGB2YUV_SHIFT 8 +#define BY ((int)( 0.098*(1<BGR scaler */ #define ABS(a) ((a) > 0 ? (a) : (-(a))) @@ -1105,7 +1117,8 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, /* sanity check */ if(srcW<4 || srcH<1 || dstW<8 || dstH<1) return NULL; //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code - if(srcFormat!=IMGFMT_YV12 && srcFormat!=IMGFMT_I420 && srcFormat!=IMGFMT_IYUV) return NULL; +// if(!isSupportedIn(srcFormat)) return NULL; +// if(!isSupportedOut(dstFormat)) return NULL; if(!dstFilter) dstFilter= &dummyFilter; if(!srcFilter) srcFilter= &dummyFilter; @@ -1135,6 +1148,30 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, else c->canMMX2BeUsed=0; + + /* dont use full vertical UV input/internaly if the source doesnt even have it */ + if(isHalfChrV(srcFormat)) c->flags= flags= flags&(~SWS_FULL_CHR_V); + /* dont use full horizontal UV input if the source doesnt even have it */ + if(isHalfChrH(srcFormat)) c->flags= flags= flags&(~SWS_FULL_CHR_H_INP); + /* dont use full horizontal UV internally if the destination doesnt even have it */ + if(isHalfChrH(dstFormat)) c->flags= flags= flags&(~SWS_FULL_CHR_H_INT); + + if(flags&SWS_FULL_CHR_H_INP) c->chrSrcW= srcW; + else c->chrSrcW= (srcW+1)>>1; + + if(flags&SWS_FULL_CHR_H_INT) c->chrDstW= dstW; + else c->chrDstW= (dstW+1)>>1; + + if(flags&SWS_FULL_CHR_V) c->chrSrcH= srcH; + else c->chrSrcH= (srcH+1)>>1; + + if(isHalfChrV(dstFormat)) c->chrDstH= (dstH+1)>>1; + else c->chrDstH= dstH; + + c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW; + c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH; + + // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst // but only for the FAST_BILINEAR mode otherwise do correct scaling // n-2 is the last chrominance sample available @@ -1143,22 +1180,19 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, // first and last pixel if(flags&SWS_FAST_BILINEAR) { - if(c->canMMX2BeUsed) c->lumXInc+= 20; + if(c->canMMX2BeUsed) + { + c->lumXInc+= 20; + c->chrXInc+= 20; + } //we dont use the x86asm scaler if mmx is available - else if(cpuCaps.hasMMX) c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20; + else if(cpuCaps.hasMMX) + { + c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20; + c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20; + } } - /* set chrXInc & chrDstW */ - if((flags&SWS_FULL_UV_IPOL) && !isHalfChrH(dstFormat)) - c->chrXInc= c->lumXInc>>1, c->chrDstW= dstW; - else - c->chrXInc= c->lumXInc, c->chrDstW= (dstW+1)>>1; - - /* set chrYInc & chrDstH */ - if(isHalfChrV(dstFormat)) - c->chrYInc= c->lumYInc, c->chrDstH= (dstH+1)>>1; - else c->chrYInc= c->lumYInc>>1, c->chrDstH= dstH; - /* precalculate horizontal scaler filter coefficients */ { const int filterAlign= cpuCaps.hasMMX ? 4 : 1; @@ -1246,6 +1280,8 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, fprintf(stderr, "\nSwScaler: BILINEAR scaler "); else if(flags&SWS_BICUBIC) fprintf(stderr, "\nSwScaler: BICUBIC scaler "); + else if(flags&SWS_X) + fprintf(stderr, "\nSwScaler: Experimental scaler "); else if(flags&SWS_POINT) fprintf(stderr, "\nSwScaler: Nearest Neighbor / POINT scaler "); else if(flags&SWS_AREA) @@ -1344,7 +1380,14 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, printf("SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH); } - + if((flags & SWS_PRINT_INFO) && verbose>1) + { + printf("SwScaler:Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", + c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc); + printf("SwScaler:Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", + c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc); + } + return c; } diff --git a/postproc/swscale.h b/postproc/swscale.h index 755d07e07e..1440745078 100644 --- a/postproc/swscale.h +++ b/postproc/swscale.h @@ -23,7 +23,15 @@ #define SWS_X 8 #define SWS_POINT 0x10 #define SWS_AREA 0x20 -#define SWS_FULL_UV_IPOL 0x100 + +//the following 4 flags are not completly implemented +//internal chrominace subsamling info +#define SWS_FULL_CHR_V 0x100 +#define SWS_FULL_CHR_H_INT 0x200 +//input subsampling info +#define SWS_FULL_CHR_H_INP 0x400 +#define SWS_DIRECT_BGR 0x800 + #define SWS_PRINT_INFO 0x1000 #define SWS_MAX_REDUCE_CUTOFF 0.002 @@ -31,7 +39,7 @@ /* this struct should be aligned on at least 32-byte boundary */ typedef struct{ int srcW, srcH, dstW, dstH; - int chrDstW, chrDstH; + int chrSrcW, chrSrcH, chrDstW, chrDstH; int lumXInc, chrXInc; int lumYInc, chrYInc; int dstFormat, srcFormat; @@ -50,6 +58,7 @@ typedef struct{ // Contain simply the values from v(Lum|Chr)Filter just nicely packed for mmx int16_t *lumMmxFilter; int16_t *chrMmxFilter; + uint8_t formatConvBuffer[4000]; //FIXME dynamic alloc, but we have to change alot of code for this to be usefull int hLumFilterSize; int hChrFilterSize; diff --git a/postproc/swscale_template.c b/postproc/swscale_template.c index 1066495bcf..18f459093a 100644 --- a/postproc/swscale_template.c +++ b/postproc/swscale_template.c @@ -841,7 +841,7 @@ static inline void RENAME(yuv2rgb2)(uint16_t *buf0, uint16_t *buf1, uint16_t *uv int yalpha1=yalpha^4095; int uvalpha1=uvalpha^4095; - if(flags&SWS_FULL_UV_IPOL) + if(flags&SWS_FULL_CHR_H_INT) { #ifdef HAVE_MMX @@ -1267,7 +1267,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t * int uvalpha1=uvalpha^4095; const int yalpha1=0; - if(flags&SWS_FULL_UV_IPOL) + if(flags&SWS_FULL_CHR_H_INT) { RENAME(yuv2rgb2)(buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, dstFormat, flags); return; @@ -1535,6 +1535,96 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t * #endif } +static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, int width) +{ +#ifdef HAVE_MMXFIXME +#else + int i; + for(i=0; i>1; + dstV[i]= (src1[4*i + 3] + src2[4*i + 3])>>1; + } +#endif +} + +static inline void RENAME(bgr32ToY)(uint8_t *dst, uint8_t *src, int width) +{ +#ifdef HAVE_MMXFIXME +#else + int i; + for(i=0; i>RGB2YUV_SHIFT) + 16; + } +#endif +} + +static inline void RENAME(bgr32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) +{ +#ifdef HAVE_MMXFIXME +#else + int i; + for(i=0; i>(RGB2YUV_SHIFT+2)) + 128; + dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128; + } +#endif +} + +static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, int width) +{ +#ifdef HAVE_MMXFIXME +#else + int i; + for(i=0; i>RGB2YUV_SHIFT) + 16; + } +#endif +} + +static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) +{ +#ifdef HAVE_MMXFIXME +#else + int i; + for(i=0; i>(RGB2YUV_SHIFT+2)) + 128; + dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128; + } +#endif +} + + // Bilinear / Bicubic scaling static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc, int16_t *filter, int16_t *filterPos, int filterSize) @@ -1699,8 +1789,25 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW // *** horizontal scale Y line to temp buffer static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hLumFilter, - int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode) + int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode, + int srcFormat, uint8_t *formatConvBuffer) { + if(srcFormat==IMGFMT_YUY2) + { + RENAME(yuy2ToY)(formatConvBuffer, src, srcW); + src= formatConvBuffer; + } + else if(srcFormat==IMGFMT_BGR32) + { + RENAME(bgr32ToY)(formatConvBuffer, src, srcW); + src= formatConvBuffer; + } + else if(srcFormat==IMGFMT_BGR24) + { + RENAME(bgr24ToY)(formatConvBuffer, src, srcW); + src= formatConvBuffer; + } + #ifdef HAVE_MMX // use the new MMX scaler if th mmx2 cant be used (its faster than the x86asm one) if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed)) @@ -1826,8 +1933,28 @@ FUNNY_Y_CODE inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth, uint8_t *src1, uint8_t *src2, int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter, - int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode) + int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode, + int srcFormat, uint8_t *formatConvBuffer) { + if(srcFormat==IMGFMT_YUY2) + { + RENAME(yuy2ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); + src1= formatConvBuffer; + src2= formatConvBuffer+2048; + } + else if(srcFormat==IMGFMT_BGR32) + { + RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); + src1= formatConvBuffer; + src2= formatConvBuffer+2048; + } + else if(srcFormat==IMGFMT_BGR24) + { + RENAME(bgr24ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); + src1= formatConvBuffer; + src2= formatConvBuffer+2048; + } + #ifdef HAVE_MMX // use the new MMX scaler if th mmx2 cant be used (its faster than the x86asm one) if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed)) @@ -1974,7 +2101,7 @@ FUNNYUVCODE } } -static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStride[], int srcSliceY, +static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[], int srcSliceY, int srcSliceH, uint8_t* dstParam[], int dstStride[]){ /* load a few things into local vars to make the code more readable? and faster */ @@ -2007,6 +2134,7 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStride[], const int vChrBufSize= c->vChrBufSize; uint8_t *funnyYCode= c->funnyYCode; uint8_t *funnyUVCode= c->funnyUVCode; + uint8_t *formatConvBuffer= c->formatConvBuffer; /* vars whch will change and which we need to storw back in the context */ int dstY= c->dstY; @@ -2014,6 +2142,7 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStride[], int chrBufIndex= c->chrBufIndex; int lastInLumBuf= c->lastInLumBuf; int lastInChrBuf= c->lastInChrBuf; + int srcStride[3]; uint8_t *src[3]; uint8_t *dst[3]; @@ -2021,11 +2150,33 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStride[], src[0]= srcParam[0]; src[1]= srcParam[2]; src[2]= srcParam[1]; - - }else{ + srcStride[0]= srcStrideParam[0]; + srcStride[1]= srcStrideParam[2]; + srcStride[2]= srcStrideParam[1]; + } + else if(c->srcFormat==IMGFMT_YV12){ src[0]= srcParam[0]; src[1]= srcParam[1]; src[2]= srcParam[2]; + srcStride[0]= srcStrideParam[0]; + srcStride[1]= srcStrideParam[1]; + srcStride[2]= srcStrideParam[2]; + } + else if(isPacked(c->srcFormat)){ + src[0]= + src[1]= + src[2]= srcParam[0]; + srcStride[0]= srcStrideParam[0]; + srcStride[1]= + srcStride[2]= srcStrideParam[0]<<1; + } + else if(c->srcFormat==IMGFMT_Y8){ + src[0]= srcParam[0]; + src[1]= + src[2]= NULL; + srcStride[0]= srcStrideParam[0]; + srcStride[1]= + srcStride[2]= 0; } if((c->dstFormat == IMGFMT_IYUV) || (c->dstFormat == IMGFMT_I420)){ @@ -2038,6 +2189,7 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStride[], dst[1]= dstParam[1]; dst[2]= dstParam[2]; } + if(dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0) { @@ -2050,10 +2202,12 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStride[], } } + /* Note the user might start scaling the picture in the middle so this will not get executed + this is not really intended but works currently, so ppl might do it */ if(srcSliceY ==0){ lumBufIndex=0; chrBufIndex=0; - dstY=0; + dstY=0; lastInLumBuf= -1; lastInChrBuf= -1; } @@ -2091,7 +2245,7 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStride[], // printf("%d %d\n", lumBufIndex, vLumBufSize); RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc, flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize, - funnyYCode); + funnyYCode, c->srcFormat, formatConvBuffer); lastInLumBuf++; } while(lastInChrBuf < lastChrSrcY) @@ -2105,7 +2259,7 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStride[], //FIXME replace parameters through context struct (some at least) RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, (srcW+1)>>1, chrXInc, flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize, - funnyUVCode); + funnyUVCode, c->srcFormat, formatConvBuffer); lastInChrBuf++; } //wrap buf index around to stay inside the ring buffer @@ -2129,7 +2283,7 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStride[], ASSERT(lastInLumBuf + 1 - srcSliceY >= 0) RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc, flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize, - funnyYCode); + funnyYCode, c->srcFormat, formatConvBuffer); lastInLumBuf++; } while(lastInChrBuf+1 < ((srcSliceY + srcSliceH)>>1)) @@ -2142,7 +2296,7 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStride[], ASSERT(lastInChrBuf + 1 - (srcSliceY>>1) >= 0) RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, (srcW+1)>>1, chrXInc, flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize, - funnyUVCode); + funnyUVCode, c->srcFormat, formatConvBuffer); lastInChrBuf++; } //wrap buf index around to stay inside the ring buffer