diff --git a/postproc/swscale.c b/postproc/swscale.c index 7acdfbd074..26724ebfd7 100644 --- a/postproc/swscale.c +++ b/postproc/swscale.c @@ -17,8 +17,9 @@ */ /* - supported Input formats: YV12 (grayscale soon too) - supported output formats: YV12, BGR15, BGR16, BGR24, BGR32 (grayscale soon too) + supported Input formats: YV12, I420, IYUV (grayscale soon too) + supported output formats: YV12, I420, IYUV, BGR15, BGR16, BGR24, BGR32 (grayscale soon too) + BGR15/16 support dithering */ #include @@ -56,6 +57,12 @@ #define PI 3.14159265358979323846 #endif +//FIXME replace this with something faster +#define isYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV) +#define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV) +#define isHalfChrV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV) +#define isHalfChrH(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV) + extern int verbose; // defined in mplayer.c /* NOTES @@ -63,8 +70,6 @@ NOTES known BUGS with known cause (no bugreports please!, but patches are welcome :) ) horizontal fast_bilinear MMX2 scaler reads 1-7 samples too much (might cause a sig11) -Supported output formats BGR15 BGR16 BGR24 BGR32 YV12 -BGR15 & BGR16 MMX verions support dithering Special versions: fast Y 1:1 scaling (no interpolation in y direction) TODO @@ -507,7 +512,7 @@ static inline void yuv2rgbXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt // minor note: the HAVE_xyz is messed up after that line so dont use it -// old global scaler, dont use for new code, unless it uses only the stuff from the command line +// old global scaler, dont use for new code // will use sws_flags from the command line void SwScale_YV12slice(unsigned char* src[], int srcStride[], int srcSliceY , int srcSliceH, uint8_t* dst[], int dstStride, int dstbpp, @@ -515,77 +520,8 @@ void SwScale_YV12slice(unsigned char* src[], int srcStride[], int srcSliceY , static SwsContext *context=NULL; int dstFormat; - int flags=0; - static int firstTime=1; int dstStride3[3]= {dstStride, dstStride>>1, dstStride>>1}; - if(firstTime) - { -#ifdef ARCH_X86 - if(gCpuCaps.hasMMX) - asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions) -#endif - flags= SWS_PRINT_INFO; - firstTime=0; - - if(src_filter.lumH) freeVec(src_filter.lumH); - if(src_filter.lumV) freeVec(src_filter.lumV); - if(src_filter.chrH) freeVec(src_filter.chrH); - if(src_filter.chrV) freeVec(src_filter.chrV); - - if(sws_lum_gblur!=0.0){ - src_filter.lumH= getGaussianVec(sws_lum_gblur, 3.0); - src_filter.lumV= getGaussianVec(sws_lum_gblur, 3.0); - }else{ - src_filter.lumH= getIdentityVec(); - src_filter.lumV= getIdentityVec(); - } - - if(sws_chr_gblur!=0.0){ - src_filter.chrH= getGaussianVec(sws_chr_gblur, 3.0); - src_filter.chrV= getGaussianVec(sws_chr_gblur, 3.0); - }else{ - src_filter.chrH= getIdentityVec(); - src_filter.chrV= getIdentityVec(); - } - - if(sws_chr_sharpen!=0.0){ - SwsVector *g= getConstVec(-1.0, 3); - SwsVector *id= getConstVec(10.0/sws_chr_sharpen, 1); - g->coeff[1]=2.0; - addVec(id, g); - convVec(src_filter.chrH, id); - convVec(src_filter.chrV, id); - freeVec(g); - freeVec(id); - } - - if(sws_lum_sharpen!=0.0){ - SwsVector *g= getConstVec(-1.0, 3); - SwsVector *id= getConstVec(10.0/sws_lum_sharpen, 1); - g->coeff[1]=2.0; - addVec(id, g); - convVec(src_filter.lumH, id); - convVec(src_filter.lumV, id); - freeVec(g); - freeVec(id); - } - - if(sws_chr_hshift) - shiftVec(src_filter.chrH, sws_chr_hshift); - - if(sws_chr_vshift) - shiftVec(src_filter.chrV, sws_chr_vshift); - - normalizeVec(src_filter.chrH, 1.0); - normalizeVec(src_filter.chrV, 1.0); - normalizeVec(src_filter.lumH, 1.0); - normalizeVec(src_filter.lumV, 1.0); - - if(verbose > 1) printVec(src_filter.chrH); - if(verbose > 1) printVec(src_filter.lumH); - } - switch(dstbpp) { case 8 : dstFormat= IMGFMT_Y8; break; @@ -597,6 +533,85 @@ void SwScale_YV12slice(unsigned char* src[], int srcStride[], int srcSliceY , default: return; } + if(!context) context=getSwsContextFromCmdLine(srcW, srcH, IMGFMT_YV12, dstW, dstH, dstFormat); + + swScale(context, src, srcStride, srcSliceY, srcSliceH, dst, dstStride3); +} + +// will use sws_flags & src_filter (from cmd line) +SwsContext *getSwsContextFromCmdLine(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat) +{ + int flags=0; + static int firstTime=1; + +#ifdef ARCH_X86 + if(gCpuCaps.hasMMX) + asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions) +#endif + if(firstTime) + { + firstTime=0; + flags= SWS_PRINT_INFO; + } + else if(verbose>1) flags= SWS_PRINT_INFO; + + if(src_filter.lumH) freeVec(src_filter.lumH); + if(src_filter.lumV) freeVec(src_filter.lumV); + if(src_filter.chrH) freeVec(src_filter.chrH); + if(src_filter.chrV) freeVec(src_filter.chrV); + + if(sws_lum_gblur!=0.0){ + src_filter.lumH= getGaussianVec(sws_lum_gblur, 3.0); + src_filter.lumV= getGaussianVec(sws_lum_gblur, 3.0); + }else{ + src_filter.lumH= getIdentityVec(); + src_filter.lumV= getIdentityVec(); + } + + if(sws_chr_gblur!=0.0){ + src_filter.chrH= getGaussianVec(sws_chr_gblur, 3.0); + src_filter.chrV= getGaussianVec(sws_chr_gblur, 3.0); + }else{ + src_filter.chrH= getIdentityVec(); + src_filter.chrV= getIdentityVec(); + } + + if(sws_chr_sharpen!=0.0){ + SwsVector *g= getConstVec(-1.0, 3); + SwsVector *id= getConstVec(10.0/sws_chr_sharpen, 1); + g->coeff[1]=2.0; + addVec(id, g); + convVec(src_filter.chrH, id); + convVec(src_filter.chrV, id); + freeVec(g); + freeVec(id); + } + + if(sws_lum_sharpen!=0.0){ + SwsVector *g= getConstVec(-1.0, 3); + SwsVector *id= getConstVec(10.0/sws_lum_sharpen, 1); + g->coeff[1]=2.0; + addVec(id, g); + convVec(src_filter.lumH, id); + convVec(src_filter.lumV, id); + freeVec(g); + freeVec(id); + } + + if(sws_chr_hshift) + shiftVec(src_filter.chrH, sws_chr_hshift); + + if(sws_chr_vshift) + shiftVec(src_filter.chrV, sws_chr_vshift); + + normalizeVec(src_filter.chrH, 1.0); + normalizeVec(src_filter.chrV, 1.0); + normalizeVec(src_filter.lumH, 1.0); + normalizeVec(src_filter.lumV, 1.0); + + if(verbose > 1) printVec(src_filter.chrH); + if(verbose > 1) printVec(src_filter.lumH); + switch(sws_flags) { case 0: flags|= SWS_FAST_BILINEAR; break; @@ -608,12 +623,10 @@ void SwScale_YV12slice(unsigned char* src[], int srcStride[], int srcSliceY , default:flags|= SWS_BILINEAR; break; } - if(!context) context=getSwsContext(srcW, srcH, IMGFMT_YV12, dstW, dstH, dstFormat, flags, &src_filter, NULL); - - - swScale(context, src, srcStride, srcSliceY, srcSliceH, dst, dstStride3); + return getSwsContext(srcW, srcH, srcFormat, dstW, dstH, dstFormat, flags, &src_filter, NULL); } + static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc, int srcW, int dstW, int filterAlign, int one, int flags, SwsVector *srcFilter, SwsVector *dstFilter) @@ -629,7 +642,9 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions) #endif - *filterPos = (int16_t*)memalign(8, dstW*sizeof(int16_t)); + *filterPos = (int16_t*)memalign(8, (dstW+1)*sizeof(int16_t)); + (*filterPos)[dstW]=0; // the MMX scaler will read over the end + if(ABS(xInc - 0x10000) <10) // unscaled { int i; @@ -846,18 +861,26 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out if(min>minFilterSize) minFilterSize= min; } + filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1)); + filter= (double*)memalign(8, filterSize*dstW*sizeof(double)); + *outFilterSize= filterSize; + + if((flags&SWS_PRINT_INFO) && verbose) + printf("SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize); /* try to reduce the filter-size (step2 reduce it) */ for(i=0; i=filter2Size) filter[i*filterSize + j]= 0.0; + else filter[i*filterSize + j]= filter2[i*filter2Size + j]; + } } - if((flags&SWS_PRINT_INFO) && verbose) - printf("SwScaler: reducing filtersize %d -> %d\n", filter2Size, minFilterSize); - filter2Size= minFilterSize; - ASSERT(filter2Size > 0) + free(filter2); filter2=NULL; + + ASSERT(filterSize > 0) //FIXME try to align filterpos if possible @@ -868,33 +891,32 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out if((*filterPos)[i] < 0) { // Move filter coeffs left to compensate for filterPos - for(j=1; j srcW) + if((*filterPos)[i] + filterSize > srcW) { - int shift= (*filterPos)[i] + filter2Size - srcW; + int shift= (*filterPos)[i] + filterSize - srcW; // Move filter coeffs right to compensate for filterPos - for(j=filter2Size-2; j>=0; j--) + for(j=filterSize-2; j>=0; j--) { - int right= MIN(j + shift, filter2Size-1); - filter2[i*filter2Size +right] += filter2[i*filter2Size +j]; - filter2[i*filter2Size +j]=0; + int right= MIN(j + shift, filterSize-1); + filter[i*filterSize +right] += filter[i*filterSize +j]; + filter[i*filterSize +j]=0; } - (*filterPos)[i]= srcW - filter2Size; + (*filterPos)[i]= srcW - filterSize; } } - - *outFilterSize= (filter2Size +(filterAlign-1)) & (~(filterAlign-1)); - *outFilter= (int16_t*)memalign(8, *outFilterSize*dstW*sizeof(int16_t)); - memset(*outFilter, 0, *outFilterSize*dstW*sizeof(int16_t)); + // Note the +1 is for the MMXscaler which reads over the end + *outFilter= (int16_t*)memalign(8, *outFilterSize*(dstW+1)*sizeof(int16_t)); + memset(*outFilter, 0, *outFilterSize*(dstW+1)*sizeof(int16_t)); /* Normalize & Store in outFilter */ for(i=0; icannot do aligned memory acesses anymore\n", - widthAlign); - } -*/ if(!dstFilter) dstFilter= &dummyFilter; if(!srcFilter) srcFilter= &dummyFilter; @@ -1135,14 +1149,15 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, } /* set chrXInc & chrDstW */ - if((flags&SWS_FULL_UV_IPOL) && dstFormat!=IMGFMT_YV12) + if((flags&SWS_FULL_UV_IPOL) && !isHalfChrH(dstFormat)) c->chrXInc= c->lumXInc>>1, c->chrDstW= dstW; else c->chrXInc= c->lumXInc, c->chrDstW= (dstW+1)>>1; /* set chrYInc & chrDstH */ - if(dstFormat==IMGFMT_YV12) c->chrYInc= c->lumYInc, c->chrDstH= (dstH+1)>>1; - else c->chrYInc= c->lumYInc>>1, c->chrDstH= dstH; + if(isHalfChrV(dstFormat)) + c->chrYInc= c->lumYInc, c->chrDstH= (dstH+1)>>1; + else c->chrYInc= c->lumYInc>>1, c->chrDstH= dstH; /* precalculate horizontal scaler filter coefficients */ { @@ -1191,9 +1206,9 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, } // allocate pixbufs (we use dynamic allocation because otherwise we would need to - // allocate several megabytes to handle all possible cases) c->lumPixBuf= (int16_t**)memalign(4, c->vLumBufSize*2*sizeof(int16_t*)); c->chrPixBuf= (int16_t**)memalign(4, c->vChrBufSize*2*sizeof(int16_t*)); + //Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000) for(i=0; ivLumBufSize; i++) c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= (uint16_t*)memalign(8, 4000); for(i=0; ivChrBufSize; i++) @@ -1248,6 +1263,10 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, fprintf(stderr, "with BGR32 output "); else if(dstFormat==IMGFMT_YV12) fprintf(stderr, "with YV12 output "); + else if(dstFormat==IMGFMT_I420) + fprintf(stderr, "with I420 output "); + else if(dstFormat==IMGFMT_IYUV) + fprintf(stderr, "with IYUV output "); else fprintf(stderr, "without output "); @@ -1295,12 +1314,12 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, printf("SwScaler: using C scaler for horizontal scaling\n"); #endif } - if(dstFormat==IMGFMT_YV12) + if(isPlanarYUV(dstFormat)) { if(c->vLumFilterSize==1) - printf("SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12)\n", cpuCaps.hasMMX ? "MMX" : "C"); + printf("SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", cpuCaps.hasMMX ? "MMX" : "C"); else - printf("SwScaler: using n-tap %s scaler for vertical scaling (YV12)\n", cpuCaps.hasMMX ? "MMX" : "C"); + printf("SwScaler: using n-tap %s scaler for vertical scaling (YV12 like)\n", cpuCaps.hasMMX ? "MMX" : "C"); } else { @@ -1561,7 +1580,7 @@ void freeSwsContext(SwsContext *c){ if(c->lumPixBuf) { - for(i=0; ivLumBufSize*2; i++) + for(i=0; ivLumBufSize; i++) { if(c->lumPixBuf[i]) free(c->lumPixBuf[i]); c->lumPixBuf[i]=NULL; @@ -1572,7 +1591,7 @@ void freeSwsContext(SwsContext *c){ if(c->chrPixBuf) { - for(i=0; ivChrBufSize*2; i++) + for(i=0; ivChrBufSize; i++) { if(c->chrPixBuf[i]) free(c->chrPixBuf[i]); c->chrPixBuf[i]=NULL; diff --git a/postproc/swscale.h b/postproc/swscale.h index 6ddd5ae44a..755d07e07e 100644 --- a/postproc/swscale.h +++ b/postproc/swscale.h @@ -103,6 +103,7 @@ void SwScale_Init(); void freeSwsContext(SwsContext *swsContext); +SwsContext *getSwsContextFromCmdLine(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat); SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags, SwsFilter *srcFilter, SwsFilter *dstFilter); diff --git a/postproc/swscale_template.c b/postproc/swscale_template.c index 4f4aec40fe..1066495bcf 100644 --- a/postproc/swscale_template.c +++ b/postproc/swscale_template.c @@ -1974,8 +1974,8 @@ FUNNYUVCODE } } -static void RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, - int srcSliceH, uint8_t* dst[], int dstStride[]){ +static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dstParam[], int dstStride[]){ /* load a few things into local vars to make the code more readable? and faster */ const int srcW= c->srcW; @@ -2014,6 +2014,41 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int int chrBufIndex= c->chrBufIndex; int lastInLumBuf= c->lastInLumBuf; int lastInChrBuf= c->lastInChrBuf; + uint8_t *src[3]; + uint8_t *dst[3]; + + if((c->srcFormat == IMGFMT_IYUV) || (c->srcFormat == IMGFMT_I420)){ + src[0]= srcParam[0]; + src[1]= srcParam[2]; + src[2]= srcParam[1]; + + }else{ + src[0]= srcParam[0]; + src[1]= srcParam[1]; + src[2]= srcParam[2]; + } + + if((c->dstFormat == IMGFMT_IYUV) || (c->dstFormat == IMGFMT_I420)){ + dst[0]= dstParam[0]; + dst[1]= dstParam[2]; + dst[2]= dstParam[1]; + + }else{ + dst[0]= dstParam[0]; + dst[1]= dstParam[1]; + dst[2]= dstParam[2]; + } + + if(dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0) + { + static int firstTime=1; //FIXME move this into the context perhaps + if(flags & SWS_PRINT_INFO && firstTime) + { + fprintf(stderr, "SwScaler: Warning: dstStride is not aligned!\n" + "SwScaler: ->cannot do aligned memory acesses anymore\n"); + firstTime=0; + } + } if(srcSliceY ==0){ lumBufIndex=0; @@ -2027,7 +2062,7 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int unsigned char *dest =dst[0]+dstStride[0]*dstY; unsigned char *uDest=dst[1]+dstStride[1]*(dstY>>1); unsigned char *vDest=dst[2]+dstStride[2]*(dstY>>1); - const int chrDstY= dstFormat==IMGFMT_YV12 ? (dstY>>1) : dstY; + const int chrDstY= isHalfChrV(dstFormat) ? (dstY>>1) : dstY; const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input @@ -2124,7 +2159,7 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int #endif if(dstY < dstH-2) { - if(dstFormat==IMGFMT_YV12) //YV12 + if(isPlanarYUV(dstFormat)) //YV12 like { if(dstY&1) uDest=vDest= NULL; //FIXME split functions in lumi / chromi if(vLumFilterSize == 1 && vChrFilterSize == 1) // Unscaled YV12 @@ -2180,7 +2215,7 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int { int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; - if(dstFormat==IMGFMT_YV12) //YV12 + if(isPlanarYUV(dstFormat)) //YV12 { if(dstY&1) uDest=vDest= NULL; //FIXME split functions in lumi / chromi yuv2yuvXinC(