mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
horizontal up/downscale linear & cubic
-sws command line option (0 -> fast_bilinear, 1->bilinear, 2->bicubic) Originally committed as revision 3273 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
This commit is contained in:
parent
af91b8b3c8
commit
077ea8a7f4
@ -7,6 +7,7 @@
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
//#include <stdio.h> //FOR DEBUG ONLY
|
||||
#include "../config.h"
|
||||
#include "swscale.h"
|
||||
@ -60,6 +61,7 @@ static uint64_t __attribute__((aligned(8))) bFC= 0xFCFCFCFCFCFCFCFCLL;
|
||||
static uint64_t __attribute__((aligned(8))) w400= 0x0400040004000400LL;
|
||||
static uint64_t __attribute__((aligned(8))) w80= 0x0080008000800080LL;
|
||||
static uint64_t __attribute__((aligned(8))) w10= 0x0010001000100010LL;
|
||||
static uint64_t __attribute__((aligned(8))) w02= 0x0002000200020002LL;
|
||||
static uint64_t __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;
|
||||
static uint64_t __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;
|
||||
static uint64_t __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;
|
||||
@ -96,9 +98,17 @@ static uint64_t __attribute__((aligned(8))) asm_uvalpha1;
|
||||
// 16bit for now (mmx likes it more compact)
|
||||
static uint16_t __attribute__((aligned(8))) pix_buf_y[4][2048];
|
||||
static uint16_t __attribute__((aligned(8))) pix_buf_uv[2][2048*2];
|
||||
static int16_t __attribute__((aligned(8))) hLumFilter[8000];
|
||||
static int16_t __attribute__((aligned(8))) hLumFilterPos[2000];
|
||||
static int16_t __attribute__((aligned(8))) hChrFilter[8000];
|
||||
static int16_t __attribute__((aligned(8))) hChrFilterPos[2000];
|
||||
#else
|
||||
static uint16_t pix_buf_y[4][2048];
|
||||
static uint16_t pix_buf_uv[2][2048*2];
|
||||
static int16_t hLumFilter[8000];
|
||||
static int16_t hLumFilterPos[2000];
|
||||
static int16_t hChrFilter[8000];
|
||||
static int16_t hChrFilterPos[2000];
|
||||
#endif
|
||||
|
||||
// clipping helper table for C implementations:
|
||||
@ -118,6 +128,11 @@ static int yuvtab_0c92[256];
|
||||
static int yuvtab_1a1e[256];
|
||||
static int yuvtab_40cf[256];
|
||||
|
||||
static int hLumFilterSize;
|
||||
static int hChrFilterSize;
|
||||
|
||||
int sws_flags=0;
|
||||
|
||||
#ifdef CAN_COMPILE_X86_ASM
|
||||
static uint8_t funnyYCode[10000];
|
||||
static uint8_t funnyUVCode[10000];
|
||||
@ -128,9 +143,9 @@ static int canMMX2BeUsed=0;
|
||||
#ifdef CAN_COMPILE_X86_ASM
|
||||
void in_asm_used_var_warning_killer()
|
||||
{
|
||||
int i= yCoeff+vrCoeff+ubCoeff+vgCoeff+ugCoeff+bF8+bFC+w400+w80+w10+
|
||||
volatile int i= yCoeff+vrCoeff+ubCoeff+vgCoeff+ugCoeff+bF8+bFC+w400+w80+w10+
|
||||
bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+temp0+asm_yalpha1+ asm_uvalpha1+
|
||||
M24A+M24B+M24C;
|
||||
M24A+M24B+M24C+w02 + funnyYCode[0]+ funnyUVCode[0]+b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0];
|
||||
if(i) i=0;
|
||||
}
|
||||
#endif
|
||||
|
@ -1,4 +1,8 @@
|
||||
|
||||
#define SWS_FAST_BILINEAR 0
|
||||
#define SWS_BILINEAR 1
|
||||
#define SWS_BICUBIC 2
|
||||
|
||||
// *** bilinear scaling and yuv->rgb & yuv->yuv conversion of yv12 slices:
|
||||
// *** Note: it's called multiple times while decoding a frame, first time y==0
|
||||
// *** Designed to upscale, but may work for downscale too.
|
||||
|
@ -5,6 +5,11 @@
|
||||
// current version mostly by Michael Niedermayer (michaelni@gmx.at)
|
||||
// the parts written by michael are under GNU GPL
|
||||
|
||||
/* TODO
|
||||
Move static / global vars into a struct so multiple scalers can be used
|
||||
write vertical cubic upscale / linear downscale stuff
|
||||
*/
|
||||
|
||||
#undef MOVNTQ
|
||||
#undef PAVGB
|
||||
#undef PREFETCH
|
||||
@ -1154,10 +1159,176 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *buf1, uint16_t *uv
|
||||
#endif
|
||||
}
|
||||
|
||||
// Bilinear / Bicubic scaling
|
||||
static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc,
|
||||
int16_t *filter, int16_t *filterPos, int filterSize)
|
||||
{
|
||||
#ifdef HAVE_MMX
|
||||
if(filterSize==4) // allways true for upscaling, sometimes for down too
|
||||
{
|
||||
int counter= -2*dstW;
|
||||
filter-= counter*2;
|
||||
filterPos-= counter/2;
|
||||
dst-= counter/2;
|
||||
asm volatile(
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"movq w02, %%mm6 \n\t"
|
||||
"pushl %%ebp \n\t" // we use 7 regs here ...
|
||||
"movl %%eax, %%ebp \n\t"
|
||||
".balign 16 \n\t"
|
||||
"1: \n\t"
|
||||
"movzwl (%2, %%ebp), %%eax \n\t"
|
||||
"movzwl 2(%2, %%ebp), %%ebx \n\t"
|
||||
"movq (%1, %%ebp, 4), %%mm1 \n\t"
|
||||
"movq 8(%1, %%ebp, 4), %%mm3 \n\t"
|
||||
"movd (%3, %%eax), %%mm0 \n\t"
|
||||
"movd (%3, %%ebx), %%mm2 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||
"pmaddwd %%mm1, %%mm0 \n\t"
|
||||
"pmaddwd %%mm2, %%mm3 \n\t"
|
||||
"psrad $8, %%mm0 \n\t"
|
||||
"psrad $8, %%mm3 \n\t"
|
||||
"packssdw %%mm3, %%mm0 \n\t"
|
||||
"pmaddwd %%mm6, %%mm0 \n\t"
|
||||
"packssdw %%mm0, %%mm0 \n\t"
|
||||
"movd %%mm0, (%4, %%ebp) \n\t"
|
||||
"addl $4, %%ebp \n\t"
|
||||
" jnc 1b \n\t"
|
||||
|
||||
"popl %%ebp \n\t"
|
||||
: "+a" (counter)
|
||||
: "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
|
||||
: "%ebx"
|
||||
);
|
||||
}
|
||||
else if(filterSize==8)
|
||||
{
|
||||
int counter= -2*dstW;
|
||||
filter-= counter*4;
|
||||
filterPos-= counter/2;
|
||||
dst-= counter/2;
|
||||
asm volatile(
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"movq w02, %%mm6 \n\t"
|
||||
"pushl %%ebp \n\t" // we use 7 regs here ...
|
||||
"movl %%eax, %%ebp \n\t"
|
||||
".balign 16 \n\t"
|
||||
"1: \n\t"
|
||||
"movzwl (%2, %%ebp), %%eax \n\t"
|
||||
"movzwl 2(%2, %%ebp), %%ebx \n\t"
|
||||
"movq (%1, %%ebp, 8), %%mm1 \n\t"
|
||||
"movq 16(%1, %%ebp, 8), %%mm3 \n\t"
|
||||
"movd (%3, %%eax), %%mm0 \n\t"
|
||||
"movd (%3, %%ebx), %%mm2 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||
"pmaddwd %%mm1, %%mm0 \n\t"
|
||||
"pmaddwd %%mm2, %%mm3 \n\t"
|
||||
|
||||
"movq 8(%1, %%ebp, 8), %%mm1 \n\t"
|
||||
"movq 24(%1, %%ebp, 8), %%mm5 \n\t"
|
||||
"movd 4(%3, %%eax), %%mm4 \n\t"
|
||||
"movd 4(%3, %%ebx), %%mm2 \n\t"
|
||||
"punpcklbw %%mm7, %%mm4 \n\t"
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||
"pmaddwd %%mm1, %%mm4 \n\t"
|
||||
"pmaddwd %%mm2, %%mm5 \n\t"
|
||||
"paddd %%mm4, %%mm0 \n\t"
|
||||
"paddd %%mm5, %%mm3 \n\t"
|
||||
|
||||
"psrad $8, %%mm0 \n\t"
|
||||
"psrad $8, %%mm3 \n\t"
|
||||
"packssdw %%mm3, %%mm0 \n\t"
|
||||
"pmaddwd %%mm6, %%mm0 \n\t"
|
||||
"packssdw %%mm0, %%mm0 \n\t"
|
||||
"movd %%mm0, (%4, %%ebp) \n\t"
|
||||
"addl $4, %%ebp \n\t"
|
||||
" jnc 1b \n\t"
|
||||
|
||||
"popl %%ebp \n\t"
|
||||
: "+a" (counter)
|
||||
: "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
|
||||
: "%ebx"
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
int counter= -2*dstW;
|
||||
// filter-= counter*filterSize/2;
|
||||
filterPos-= counter/2;
|
||||
dst-= counter/2;
|
||||
asm volatile(
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"movq w02, %%mm6 \n\t"
|
||||
".balign 16 \n\t"
|
||||
"1: \n\t"
|
||||
"movl %2, %%ecx \n\t"
|
||||
"movzwl (%%ecx, %0), %%eax \n\t"
|
||||
"movzwl 2(%%ecx, %0), %%ebx \n\t"
|
||||
"movl %5, %%ecx \n\t"
|
||||
"pxor %%mm4, %%mm4 \n\t"
|
||||
"pxor %%mm5, %%mm5 \n\t"
|
||||
"2: \n\t"
|
||||
"movq (%1), %%mm1 \n\t"
|
||||
"movq (%1, %6), %%mm3 \n\t"
|
||||
"movd (%%ecx, %%eax), %%mm0 \n\t"
|
||||
"movd (%%ecx, %%ebx), %%mm2 \n\t"
|
||||
"punpcklbw %%mm7, %%mm0 \n\t"
|
||||
"punpcklbw %%mm7, %%mm2 \n\t"
|
||||
"pmaddwd %%mm1, %%mm0 \n\t"
|
||||
"pmaddwd %%mm2, %%mm3 \n\t"
|
||||
"paddd %%mm3, %%mm5 \n\t"
|
||||
"paddd %%mm0, %%mm4 \n\t"
|
||||
"addl $8, %1 \n\t"
|
||||
"addl $4, %%ecx \n\t"
|
||||
"cmpl %4, %%ecx \n\t"
|
||||
" jb 2b \n\t"
|
||||
"addl %6, %1 \n\t"
|
||||
"psrad $8, %%mm4 \n\t"
|
||||
"psrad $8, %%mm5 \n\t"
|
||||
"packssdw %%mm5, %%mm4 \n\t"
|
||||
"pmaddwd %%mm6, %%mm4 \n\t"
|
||||
"packssdw %%mm4, %%mm4 \n\t"
|
||||
"movl %3, %%eax \n\t"
|
||||
"movd %%mm4, (%%eax, %0) \n\t"
|
||||
"addl $4, %0 \n\t"
|
||||
" jnc 1b \n\t"
|
||||
|
||||
: "+r" (counter)
|
||||
: "r" (filter), "m" (filterPos), "m" (dst), "m"(src+filterSize),
|
||||
"m" (src), "r" (filterSize*2)
|
||||
: "%ebx", "%eax", "%ecx", "%edx"
|
||||
);
|
||||
}
|
||||
#else
|
||||
int i;
|
||||
for(i=0; i<dstW; i++)
|
||||
{
|
||||
int j;
|
||||
int srcPos= filterPos[i];
|
||||
int val=0;
|
||||
// printf("filterPos: %d\n", hFilterPos[i]);
|
||||
for(j=0; j<filterSize; j++)
|
||||
{
|
||||
// printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]);
|
||||
val += ((int)src[srcPos + j])*filter[filterSize*i + j];
|
||||
}
|
||||
// filter += hFilterSize;
|
||||
dst[i] = MIN(MAX(0, val>>7), (1<<15)-1); // the cubic equation does overflow ...
|
||||
// dst[i] = val>>7;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
// *** horizontal scale Y line to temp buffer
|
||||
static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, int srcW, int xInc)
|
||||
{
|
||||
// *** horizontal scale Y line to temp buffer
|
||||
if(sws_flags != SWS_FAST_BILINEAR)
|
||||
{
|
||||
RENAME(hScale)(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
|
||||
}
|
||||
else // Fast Bilinear upscale / crap downscale
|
||||
{
|
||||
#ifdef ARCH_X86
|
||||
#ifdef HAVE_MMX2
|
||||
int i;
|
||||
@ -1267,11 +1438,19 @@ FUNNY_Y_CODE
|
||||
xpos+=xInc;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth,
|
||||
uint8_t *src1, uint8_t *src2, int srcW, int xInc)
|
||||
{
|
||||
if(sws_flags != SWS_FAST_BILINEAR)
|
||||
{
|
||||
RENAME(hScale)(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
|
||||
RENAME(hScale)(dst+2048, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
|
||||
}
|
||||
else // Fast Bilinear upscale / crap downscale
|
||||
{
|
||||
#ifdef ARCH_X86
|
||||
#ifdef HAVE_MMX2
|
||||
int i;
|
||||
@ -1402,6 +1581,162 @@ FUNNYUVCODE
|
||||
xpos+=xInc;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static void inline RENAME(initFilter)(int16_t *filter, int16_t *filterPos, int *filterSize, int xInc,
|
||||
int srcW, int dstW)
|
||||
{
|
||||
int i;
|
||||
#ifdef HAVE_MMX
|
||||
asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS
|
||||
#endif
|
||||
|
||||
if(xInc <= (1<<16)) // upscale / cubic interpolate
|
||||
{
|
||||
int i;
|
||||
int xDstInSrc;
|
||||
if(sws_flags==SWS_BICUBIC) *filterSize= 4;
|
||||
else *filterSize= 2;
|
||||
// printf("%d %d %d\n", filterSize, srcW, dstW);
|
||||
#ifdef HAVE_MMX
|
||||
*filterSize= (*filterSize +3) & (~3); // -> *filterSize %4 == 0
|
||||
#endif
|
||||
xDstInSrc= xInc - 0x8000;
|
||||
for(i=0; i<dstW; i++)
|
||||
{
|
||||
int xx= (xDstInSrc>>16) - (*filterSize>>1) + 1;
|
||||
int j;
|
||||
|
||||
filterPos[i]= xx;
|
||||
if(sws_flags == SWS_BICUBIC)
|
||||
{
|
||||
double d= ABS(((xx+1)<<16) - xDstInSrc)/(double)(1<<16);
|
||||
// int coeff;
|
||||
int y1,y2,y3,y4;
|
||||
double A= -0.75;
|
||||
// Equation is from VirtualDub
|
||||
y1 = (int)floor(0.5 + ( + A*d - 2.0*A*d*d + A*d*d*d) * 16384.0);
|
||||
y2 = (int)floor(0.5 + (+ 1.0 - (A+3.0)*d*d + (A+2.0)*d*d*d) * 16384.0);
|
||||
y3 = (int)floor(0.5 + ( - A*d + (2.0*A+3.0)*d*d - (A+2.0)*d*d*d) * 16384.0);
|
||||
y4 = (int)floor(0.5 + ( + A*d*d - A*d*d*d) * 16384.0);
|
||||
|
||||
// printf("%d %d %d \n", coeff, (int)d, xDstInSrc);
|
||||
filter[i*(*filterSize) + 0]= y1;
|
||||
filter[i*(*filterSize) + 1]= y2;
|
||||
filter[i*(*filterSize) + 2]= y3;
|
||||
filter[i*(*filterSize) + 3]= y4;
|
||||
// printf("%1.3f %d, %d, %d, %d\n",d , y1, y2, y3, y4);
|
||||
}
|
||||
else
|
||||
{
|
||||
for(j=0; j<*filterSize; j++)
|
||||
{
|
||||
double d= ABS((xx<<16) - xDstInSrc)/(double)(1<<16);
|
||||
int coeff;
|
||||
coeff= (int)(0.5 + (1.0 - d)*(1<<14));
|
||||
if(coeff<0) coeff=0;
|
||||
// printf("%d %d %d \n", coeff, (int)d, xDstInSrc);
|
||||
filter[i*(*filterSize) + j]= coeff;
|
||||
xx++;
|
||||
}
|
||||
}
|
||||
xDstInSrc+= xInc;
|
||||
}
|
||||
}
|
||||
else // downscale
|
||||
{
|
||||
int xDstInSrc;
|
||||
if(sws_flags==SWS_BICUBIC) *filterSize= (int)ceil(1 + 4.0*srcW / (double)dstW);
|
||||
else *filterSize= (int)ceil(1 + 2.0*srcW / (double)dstW);
|
||||
// printf("%d %d %d\n", *filterSize, srcW, dstW);
|
||||
#ifdef HAVE_MMX
|
||||
*filterSize= (*filterSize +3) & (~3); // -> *filterSize %4 == 0
|
||||
#endif
|
||||
xDstInSrc= xInc - 0x8000;
|
||||
for(i=0; i<dstW; i++)
|
||||
{
|
||||
int xx= (int)((double)xDstInSrc/(double)(1<<16) - *filterSize*0.5 + 0.5);
|
||||
int j;
|
||||
|
||||
filterPos[i]= xx;
|
||||
for(j=0; j<*filterSize; j++)
|
||||
{
|
||||
double d= ABS((xx<<16) - xDstInSrc)/(double)xInc;
|
||||
int coeff;
|
||||
if(sws_flags == SWS_BICUBIC)
|
||||
{
|
||||
double A= -0.75;
|
||||
// d*=2;
|
||||
// Equation is from VirtualDub
|
||||
if(d<1.0)
|
||||
coeff = (int)floor(0.5 + (1.0 - (A+3.0)*d*d
|
||||
+ (A+2.0)*d*d*d) * (1<<14));
|
||||
else if(d<2.0)
|
||||
coeff = (int)floor(0.5 + (-4.0*A + 8.0*A*d
|
||||
- 5.0*A*d*d + A*d*d*d) * (1<<14));
|
||||
else
|
||||
coeff=0;
|
||||
}
|
||||
else
|
||||
{
|
||||
coeff= (int)(0.5 + (1.0 - d)*(1<<14));
|
||||
if(coeff<0) coeff=0;
|
||||
}
|
||||
// printf("%d %d %d \n", coeff, (int)d, xDstInSrc);
|
||||
filter[i*(*filterSize) + j]= coeff;
|
||||
xx++;
|
||||
}
|
||||
xDstInSrc+= xInc;
|
||||
}
|
||||
}
|
||||
|
||||
//fix borders
|
||||
for(i=0; i<dstW; i++)
|
||||
{
|
||||
int j;
|
||||
if(filterPos[i] < 0)
|
||||
{
|
||||
// Move filter coeffs left to compensate for filterPos
|
||||
for(j=1; j<*filterSize; j++)
|
||||
{
|
||||
int left= MAX(j + filterPos[i], 0);
|
||||
filter[i*(*filterSize) + left] += filter[i*(*filterSize) + j];
|
||||
filter[i*(*filterSize) + j]=0;
|
||||
}
|
||||
filterPos[i]= 0;
|
||||
}
|
||||
|
||||
if(filterPos[i] + *filterSize > srcW)
|
||||
{
|
||||
int shift= filterPos[i] + *filterSize - srcW;
|
||||
// Move filter coeffs right to compensate for filterPos
|
||||
for(j=*filterSize-2; j>=0; j--)
|
||||
{
|
||||
int right= MIN(j + shift, *filterSize-1);
|
||||
filter[i*(*filterSize) +right] += filter[i*(*filterSize) +j];
|
||||
filter[i*(*filterSize) +j]=0;
|
||||
}
|
||||
filterPos[i]= srcW - *filterSize;
|
||||
}
|
||||
}
|
||||
|
||||
//Normalize
|
||||
for(i=0; i<dstW; i++)
|
||||
{
|
||||
int j;
|
||||
double sum=0;
|
||||
double scale=1<<14;
|
||||
for(j=0; j<*filterSize; j++)
|
||||
{
|
||||
sum+= filter[i*(*filterSize) + j];
|
||||
}
|
||||
scale/= sum;
|
||||
for(j=0; j<*filterSize; j++)
|
||||
{
|
||||
filter[i*(*filterSize) + j]= (int)(filter[i*(*filterSize) + j]*scale);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void RENAME(SwScale_YV12slice)(unsigned char* srcptr[],int stride[], int srcSliceY ,
|
||||
@ -1421,11 +1756,8 @@ static int dstY;
|
||||
static int lastLumSrcY;
|
||||
static int lastChrSrcY;
|
||||
|
||||
#ifdef HAVE_MMX2
|
||||
// used to detect a horizontal size change
|
||||
static int old_dstW= -1;
|
||||
static int old_s_xinc= -1;
|
||||
#endif
|
||||
static int oldDstW= -1;
|
||||
static int oldSrcW= -1;
|
||||
|
||||
int dstUVw;
|
||||
int i;
|
||||
@ -1469,10 +1801,19 @@ else s_xinc2= s_xinc;
|
||||
= pix_buf_uv[0][2048+i/2] = pix_buf_uv[1][2048+i/2] = 128*128;
|
||||
pix_buf_y[0][i]= pix_buf_y[1][i]= 0;
|
||||
}
|
||||
|
||||
//precalculate horizontal scaler filter coefficients
|
||||
if(oldDstW!=dstW || oldSrcW!=srcW)
|
||||
{
|
||||
// int i;
|
||||
oldDstW= dstW; oldSrcW= srcW;
|
||||
|
||||
RENAME(initFilter)(hLumFilter, hLumFilterPos, &hLumFilterSize, s_xinc, srcW, dstW);
|
||||
RENAME(initFilter)(hChrFilter, hChrFilterPos, &hChrFilterSize, s_xinc2, srcW, dstW);
|
||||
|
||||
#ifdef HAVE_MMX2
|
||||
// cant downscale !!!
|
||||
if((old_s_xinc != s_xinc || old_dstW!=dstW) && canMMX2BeUsed)
|
||||
if(canMMX2BeUsed)
|
||||
{
|
||||
uint8_t *fragment;
|
||||
int imm8OfPShufW1;
|
||||
@ -1481,9 +1822,6 @@ else s_xinc2= s_xinc;
|
||||
|
||||
int xpos, i;
|
||||
|
||||
old_s_xinc= s_xinc;
|
||||
old_dstW= dstW;
|
||||
|
||||
// create an optimized horizontal scaling routine
|
||||
|
||||
//code fragment
|
||||
@ -1532,20 +1870,6 @@ else s_xinc2= s_xinc;
|
||||
|
||||
xpos= 0; //s_xinc/2 - 0x8000; // difference between pixel centers
|
||||
|
||||
/* choose xinc so that all 8 parts fit exactly
|
||||
Note: we cannot use just 1 part because it would not fit in the code cache */
|
||||
// s_xinc2_diff= -((((s_xinc2*(dstW/8))&0xFFFF))/(dstW/8))-10;
|
||||
// s_xinc_diff= -((((s_xinc*(dstW/8))&0xFFFF))/(dstW/8));
|
||||
#ifdef ALT_ERROR
|
||||
// s_xinc2_diff+= ((0x10000/(dstW/8)));
|
||||
#endif
|
||||
// s_xinc_diff= s_xinc2_diff*2;
|
||||
|
||||
// s_xinc2+= s_xinc2_diff;
|
||||
// s_xinc+= s_xinc_diff;
|
||||
|
||||
// old_s_xinc= s_xinc;
|
||||
|
||||
for(i=0; i<dstW/8; i++)
|
||||
{
|
||||
int xx=xpos>>16;
|
||||
@ -1602,6 +1926,7 @@ else s_xinc2= s_xinc;
|
||||
}
|
||||
|
||||
#endif // HAVE_MMX2
|
||||
} // Init stuff
|
||||
} // reset counters
|
||||
|
||||
while(1){
|
||||
|
Loading…
Reference in New Issue
Block a user