mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-29 22:00:58 +02:00
swscale: dither for planar yuv outputs
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
877f76ad33
commit
6713989c23
@ -282,6 +282,8 @@ DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
|
||||
{ 112, 16,104, 8,118, 22,110, 14,},
|
||||
}};
|
||||
|
||||
static const uint8_t flat64[8]={64,64,64,64,64,64,64,64};
|
||||
|
||||
uint16_t dither_scale[15][16]={
|
||||
{ 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,},
|
||||
{ 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,},
|
||||
@ -417,12 +419,13 @@ static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSr
|
||||
static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
|
||||
const int16_t *chrFilter, const int16_t **chrUSrc,
|
||||
const int16_t **chrVSrc, int chrFilterSize,
|
||||
const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW)
|
||||
const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW,
|
||||
const uint8_t *lumDither, const uint8_t *chrDither)
|
||||
{
|
||||
//FIXME Optimize (just quickly written not optimized..)
|
||||
int i;
|
||||
for (i=0; i<dstW; i++) {
|
||||
int val=1<<18;
|
||||
int val = lumDither[i&7] << 12;
|
||||
int j;
|
||||
for (j=0; j<lumFilterSize; j++)
|
||||
val += lumSrc[j][i] * lumFilter[j];
|
||||
@ -432,8 +435,8 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc,
|
||||
|
||||
if (uDest)
|
||||
for (i=0; i<chrDstW; i++) {
|
||||
int u=1<<18;
|
||||
int v=1<<18;
|
||||
int u = chrDither[i&7] << 12;
|
||||
int v = chrDither[(i+3)&7] << 12;
|
||||
int j;
|
||||
for (j=0; j<chrFilterSize; j++) {
|
||||
u += chrUSrc[j][i] * chrFilter[j];
|
||||
@ -446,7 +449,7 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc,
|
||||
|
||||
if (CONFIG_SWSCALE_ALPHA && aDest)
|
||||
for (i=0; i<dstW; i++) {
|
||||
int val=1<<18;
|
||||
int val = lumDither[i&7] << 12;
|
||||
int j;
|
||||
for (j=0; j<lumFilterSize; j++)
|
||||
val += alpSrc[j][i] * lumFilter[j];
|
||||
@ -459,12 +462,13 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc,
|
||||
static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
|
||||
const int16_t *chrFilter, const int16_t **chrUSrc,
|
||||
const int16_t **chrVSrc, int chrFilterSize,
|
||||
uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
|
||||
uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat,
|
||||
const uint8_t *lumDither, const uint8_t *chrDither)
|
||||
{
|
||||
//FIXME Optimize (just quickly written not optimized..)
|
||||
int i;
|
||||
for (i=0; i<dstW; i++) {
|
||||
int val=1<<18;
|
||||
int val = lumDither[i&7]<<12;
|
||||
int j;
|
||||
for (j=0; j<lumFilterSize; j++)
|
||||
val += lumSrc[j][i] * lumFilter[j];
|
||||
@ -477,8 +481,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
|
||||
|
||||
if (dstFormat == PIX_FMT_NV12)
|
||||
for (i=0; i<chrDstW; i++) {
|
||||
int u=1<<18;
|
||||
int v=1<<18;
|
||||
int u = chrDither[i&7]<<12;
|
||||
int v = chrDither[(i+3)&7]<<12;
|
||||
int j;
|
||||
for (j=0; j<chrFilterSize; j++) {
|
||||
u += chrUSrc[j][i] * chrFilter[j];
|
||||
@ -490,8 +494,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
|
||||
}
|
||||
else
|
||||
for (i=0; i<chrDstW; i++) {
|
||||
int u=1<<18;
|
||||
int v=1<<18;
|
||||
int u = chrDither[i&7]<<12;
|
||||
int v = chrDither[(i+3)&7]<<12;
|
||||
int j;
|
||||
for (j=0; j<chrFilterSize; j++) {
|
||||
u += chrUSrc[j][i] * chrFilter[j];
|
||||
|
@ -195,6 +195,8 @@ typedef struct SwsContext {
|
||||
#define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
|
||||
#define UV_OFF "11*8+4*4*256*3+48"
|
||||
#define UV_OFFx2 "11*8+4*4*256*3+56"
|
||||
#define DITHER16 "11*8+4*4*256*3+64"
|
||||
#define DITHER32 "11*8+4*4*256*3+64+16"
|
||||
|
||||
DECLARE_ALIGNED(8, uint64_t, redDither);
|
||||
DECLARE_ALIGNED(8, uint64_t, greenDither);
|
||||
@ -219,6 +221,8 @@ typedef struct SwsContext {
|
||||
int32_t alpMmxFilter[4*MAX_FILTER_SIZE];
|
||||
DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes
|
||||
DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes
|
||||
uint16_t dither16[8];
|
||||
uint32_t dither32[8];
|
||||
|
||||
#if HAVE_ALTIVEC
|
||||
vector signed short CY;
|
||||
@ -255,13 +259,13 @@ typedef struct SwsContext {
|
||||
const int16_t *chrFilter, const int16_t **chrUSrc,
|
||||
const int16_t **chrVSrc, int chrFilterSize,
|
||||
uint8_t *dest, uint8_t *uDest,
|
||||
int dstW, int chrDstW, int dstFormat);
|
||||
int dstW, int chrDstW, int dstFormat, const uint8_t *lumDither, const uint8_t *chrDither);
|
||||
void (*yuv2yuv1 )(struct SwsContext *c,
|
||||
const int16_t *lumSrc, const int16_t *chrUSrc,
|
||||
const int16_t *chrVSrc, const int16_t *alpSrc,
|
||||
uint8_t *dest,
|
||||
uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
|
||||
int dstW, int chrDstW);
|
||||
int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither);
|
||||
void (*yuv2yuvX )(struct SwsContext *c,
|
||||
const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
|
||||
const int16_t *chrFilter, const int16_t **chrUSrc,
|
||||
@ -269,7 +273,7 @@ typedef struct SwsContext {
|
||||
const int16_t **alpSrc,
|
||||
uint8_t *dest,
|
||||
uint8_t *uDest, uint8_t *vDest, uint8_t *aDest,
|
||||
int dstW, int chrDstW);
|
||||
int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither);
|
||||
void (*yuv2packed1)(struct SwsContext *c,
|
||||
const uint16_t *buf0,
|
||||
const uint16_t *ubuf0, const uint16_t *ubuf1,
|
||||
|
@ -24,11 +24,11 @@ static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
|
||||
const int16_t **chrVSrc,
|
||||
int chrFilterSize, const int16_t **alpSrc,
|
||||
uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
|
||||
uint8_t *aDest, int dstW, int chrDstW)
|
||||
uint8_t *aDest, int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither)
|
||||
{
|
||||
yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
|
||||
chrFilter, chrUSrc, chrVSrc, chrFilterSize,
|
||||
alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
|
||||
alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
|
||||
}
|
||||
|
||||
static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
|
||||
@ -36,36 +36,37 @@ static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
|
||||
const int16_t *chrFilter, const int16_t **chrUSrc,
|
||||
const int16_t **chrVSrc,
|
||||
int chrFilterSize, uint8_t *dest, uint8_t *uDest,
|
||||
int dstW, int chrDstW, enum PixelFormat dstFormat)
|
||||
int dstW, int chrDstW, enum PixelFormat dstFormat, const uint8_t *dither, const uint8_t *chrDither)
|
||||
{
|
||||
yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
|
||||
chrFilter, chrUSrc, chrVSrc, chrFilterSize,
|
||||
dest, uDest, dstW, chrDstW, dstFormat);
|
||||
dest, uDest, dstW, chrDstW, dstFormat, dither, chrDither);
|
||||
}
|
||||
|
||||
static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
|
||||
const int16_t *chrUSrc, const int16_t *chrVSrc,
|
||||
const int16_t *alpSrc,
|
||||
uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
|
||||
uint8_t *aDest, int dstW, int chrDstW)
|
||||
uint8_t *aDest, int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<dstW; i++) {
|
||||
int val= (lumSrc[i]+64)>>7;
|
||||
int val= (lumSrc[i]+lumDither[i&7])>>7;
|
||||
dest[i]= av_clip_uint8(val);
|
||||
}
|
||||
|
||||
if (uDest)
|
||||
for (i=0; i<chrDstW; i++) {
|
||||
int u=(chrUSrc[i]+64)>>7;
|
||||
int v=(chrVSrc[i]+64)>>7;
|
||||
int u=(chrUSrc[i]+chrDither[i&7])>>7;
|
||||
int v=(chrVSrc[i]+chrDither[(i+3)&7])>>7;
|
||||
uDest[i]= av_clip_uint8(u);
|
||||
vDest[i]= av_clip_uint8(v);
|
||||
}
|
||||
|
||||
if (CONFIG_SWSCALE_ALPHA && aDest)
|
||||
for (i=0; i<dstW; i++) {
|
||||
int val= (alpSrc[i]+64)>>7;
|
||||
int val= (alpSrc[i]+lumDither[i&7])>>7;
|
||||
aDest[i]= av_clip_uint8(val);
|
||||
}
|
||||
}
|
||||
@ -609,6 +610,8 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
|
||||
unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
|
||||
unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
|
||||
unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
|
||||
const uint8_t *lumDither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat) ? dithers[7][dstY &7] : flat64;
|
||||
const uint8_t *chrDither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat) ? dithers[7][chrDstY&7] : flat64;
|
||||
|
||||
const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
|
||||
const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
|
||||
@ -699,7 +702,7 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
|
||||
c->yuv2nv12X(c,
|
||||
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
|
||||
vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
|
||||
dest, uDest, dstW, chrDstW, dstFormat);
|
||||
dest, uDest, dstW, chrDstW, dstFormat, lumDither, chrDither);
|
||||
} else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
|
||||
const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
|
||||
if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
|
||||
@ -716,13 +719,13 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
|
||||
const int16_t *chrVBuf= chrVSrcPtr[0];
|
||||
const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
|
||||
c->yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
|
||||
uDest, vDest, aDest, dstW, chrDstW);
|
||||
uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
|
||||
} else { //General YV12
|
||||
c->yuv2yuvX(c,
|
||||
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
|
||||
vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
|
||||
chrVSrcPtr, vChrFilterSize,
|
||||
alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
|
||||
alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
|
||||
}
|
||||
} else {
|
||||
assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
|
||||
@ -784,7 +787,7 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
|
||||
yuv2nv12XinC(
|
||||
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
|
||||
vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
|
||||
dest, uDest, dstW, chrDstW, dstFormat);
|
||||
dest, uDest, dstW, chrDstW, dstFormat, lumDither, chrDither);
|
||||
} else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
|
||||
const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
|
||||
if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
|
||||
@ -798,7 +801,7 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
|
||||
yuv2yuvXinC(
|
||||
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
|
||||
vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
|
||||
alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
|
||||
alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
|
||||
}
|
||||
} else {
|
||||
assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
|
||||
|
@ -39,8 +39,8 @@
|
||||
|
||||
#define YSCALEYUV2YV12X(offset, dest, end, pos) \
|
||||
__asm__ volatile(\
|
||||
"movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
|
||||
"movq %%mm3, %%mm4 \n\t"\
|
||||
"movq "DITHER16"+0(%0), %%mm3 \n\t"\
|
||||
"movq "DITHER16"+8(%0), %%mm4 \n\t"\
|
||||
"lea " offset "(%0), %%"REG_d" \n\t"\
|
||||
"mov (%%"REG_d"), %%"REG_S" \n\t"\
|
||||
".p2align 4 \n\t" /* FIXME Unroll? */\
|
||||
@ -62,8 +62,8 @@
|
||||
MOVNTQ(%%mm3, (%1, %3))\
|
||||
"add $8, %3 \n\t"\
|
||||
"cmp %2, %3 \n\t"\
|
||||
"movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
|
||||
"movq %%mm3, %%mm4 \n\t"\
|
||||
"movq "DITHER16"+0(%0), %%mm3 \n\t"\
|
||||
"movq "DITHER16"+8(%0), %%mm4 \n\t"\
|
||||
"lea " offset "(%0), %%"REG_d" \n\t"\
|
||||
"mov (%%"REG_d"), %%"REG_S" \n\t"\
|
||||
"jb 1b \n\t"\
|
||||
@ -78,13 +78,18 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
|
||||
const int16_t **chrVSrc,
|
||||
int chrFilterSize, const int16_t **alpSrc,
|
||||
uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
|
||||
uint8_t *aDest, int dstW, int chrDstW)
|
||||
uint8_t *aDest, int dstW, int chrDstW,
|
||||
const uint8_t *lumDither, const uint8_t *chrDither)
|
||||
{
|
||||
int i;
|
||||
if (uDest) {
|
||||
x86_reg uv_off = c->uv_off;
|
||||
for(i=0; i<8; i++) c->dither16[i] = chrDither[i]>>4;
|
||||
YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
|
||||
for(i=0; i<8; i++) c->dither16[i] = chrDither[(i+3)&7]>>4;
|
||||
YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off)
|
||||
}
|
||||
for(i=0; i<8; i++) c->dither16[i] = lumDither[i]>>4;
|
||||
if (CONFIG_SWSCALE_ALPHA && aDest) {
|
||||
YSCALEYUV2YV12X(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
|
||||
}
|
||||
@ -95,6 +100,10 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
|
||||
#define YSCALEYUV2YV12X_ACCURATE(offset, dest, end, pos) \
|
||||
__asm__ volatile(\
|
||||
"lea " offset "(%0), %%"REG_d" \n\t"\
|
||||
"movq "DITHER32"+0(%0), %%mm4 \n\t"\
|
||||
"movq "DITHER32"+8(%0), %%mm5 \n\t"\
|
||||
"movq "DITHER32"+16(%0), %%mm6 \n\t"\
|
||||
"movq "DITHER32"+24(%0), %%mm7 \n\t"\
|
||||
"pxor %%mm4, %%mm4 \n\t"\
|
||||
"pxor %%mm5, %%mm5 \n\t"\
|
||||
"pxor %%mm6, %%mm6 \n\t"\
|
||||
@ -126,26 +135,21 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
|
||||
"paddd %%mm2, %%mm6 \n\t"\
|
||||
"paddd %%mm0, %%mm7 \n\t"\
|
||||
" jnz 1b \n\t"\
|
||||
"psrad $16, %%mm4 \n\t"\
|
||||
"psrad $16, %%mm5 \n\t"\
|
||||
"psrad $16, %%mm6 \n\t"\
|
||||
"psrad $16, %%mm7 \n\t"\
|
||||
"movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
|
||||
"psrad $19, %%mm4 \n\t"\
|
||||
"psrad $19, %%mm5 \n\t"\
|
||||
"psrad $19, %%mm6 \n\t"\
|
||||
"psrad $19, %%mm7 \n\t"\
|
||||
"packssdw %%mm5, %%mm4 \n\t"\
|
||||
"packssdw %%mm7, %%mm6 \n\t"\
|
||||
"paddw %%mm0, %%mm4 \n\t"\
|
||||
"paddw %%mm0, %%mm6 \n\t"\
|
||||
"psraw $3, %%mm4 \n\t"\
|
||||
"psraw $3, %%mm6 \n\t"\
|
||||
"packuswb %%mm6, %%mm4 \n\t"\
|
||||
MOVNTQ(%%mm4, (%1, %3))\
|
||||
"add $8, %3 \n\t"\
|
||||
"cmp %2, %3 \n\t"\
|
||||
"lea " offset "(%0), %%"REG_d" \n\t"\
|
||||
"pxor %%mm4, %%mm4 \n\t"\
|
||||
"pxor %%mm5, %%mm5 \n\t"\
|
||||
"pxor %%mm6, %%mm6 \n\t"\
|
||||
"pxor %%mm7, %%mm7 \n\t"\
|
||||
"movq "DITHER32"+0(%0), %%mm4 \n\t"\
|
||||
"movq "DITHER32"+8(%0), %%mm5 \n\t"\
|
||||
"movq "DITHER32"+16(%0), %%mm6 \n\t"\
|
||||
"movq "DITHER32"+24(%0), %%mm7 \n\t"\
|
||||
"mov (%%"REG_d"), %%"REG_S" \n\t"\
|
||||
"jb 1b \n\t"\
|
||||
:: "r" (&c->redDither),\
|
||||
@ -159,13 +163,18 @@ static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
|
||||
const int16_t **chrVSrc,
|
||||
int chrFilterSize, const int16_t **alpSrc,
|
||||
uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
|
||||
uint8_t *aDest, int dstW, int chrDstW)
|
||||
uint8_t *aDest, int dstW, int chrDstW,
|
||||
const uint8_t *lumDither, const uint8_t *chrDither)
|
||||
{
|
||||
int i;
|
||||
if (uDest) {
|
||||
x86_reg uv_off = c->uv_off;
|
||||
for(i=0; i<8; i++) c->dither32[i] = chrDither[i]<<12;
|
||||
YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
|
||||
for(i=0; i<8; i++) c->dither32[i] = chrDither[(i+3)&7]<<12;
|
||||
YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off)
|
||||
}
|
||||
for(i=0; i<8; i++) c->dither32[i] = lumDither[i]<<12;
|
||||
if (CONFIG_SWSCALE_ALPHA && aDest) {
|
||||
YSCALEYUV2YV12X_ACCURATE(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
|
||||
}
|
||||
@ -190,7 +199,8 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
|
||||
const int16_t *chrUSrc, const int16_t *chrVSrc,
|
||||
const int16_t *alpSrc,
|
||||
uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
|
||||
uint8_t *aDest, int dstW, int chrDstW)
|
||||
uint8_t *aDest, int dstW, int chrDstW,
|
||||
const uint8_t *lumDither, const uint8_t *chrDither)
|
||||
{
|
||||
int p= 4;
|
||||
const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
|
||||
@ -211,14 +221,13 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
|
||||
|
||||
#define YSCALEYUV2YV121_ACCURATE \
|
||||
"mov %2, %%"REG_a" \n\t"\
|
||||
"pcmpeqw %%mm7, %%mm7 \n\t"\
|
||||
"psrlw $15, %%mm7 \n\t"\
|
||||
"psllw $6, %%mm7 \n\t"\
|
||||
"movq 0(%3), %%mm6 \n\t"\
|
||||
"movq 8(%3), %%mm7 \n\t"\
|
||||
".p2align 4 \n\t" /* FIXME Unroll? */\
|
||||
"1: \n\t"\
|
||||
"movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
|
||||
"movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\
|
||||
"paddsw %%mm7, %%mm0 \n\t"\
|
||||
"paddsw %%mm6, %%mm0 \n\t"\
|
||||
"paddsw %%mm7, %%mm1 \n\t"\
|
||||
"psraw $7, %%mm0 \n\t"\
|
||||
"psraw $7, %%mm1 \n\t"\
|
||||
@ -231,7 +240,8 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
|
||||
const int16_t *chrUSrc, const int16_t *chrVSrc,
|
||||
const int16_t *alpSrc,
|
||||
uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
|
||||
uint8_t *aDest, int dstW, int chrDstW)
|
||||
uint8_t *aDest, int dstW, int chrDstW,
|
||||
const uint8_t *lumDither, const uint8_t *chrDither)
|
||||
{
|
||||
int p= 4;
|
||||
const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW };
|
||||
@ -240,10 +250,12 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
|
||||
|
||||
while (p--) {
|
||||
if (dst[p]) {
|
||||
int i;
|
||||
for(i=0; i<8; i++) c->dither16[i] = i<2 ? lumDither[i] : chrDither[i];
|
||||
__asm__ volatile(
|
||||
YSCALEYUV2YV121_ACCURATE
|
||||
:: "r" (src[p]), "r" (dst[p] + counter[p]),
|
||||
"g" (-counter[p])
|
||||
"g" (-counter[p]), "r"(c->dither16)
|
||||
: "%"REG_a
|
||||
);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user