|
|
|
@ -2146,17 +2146,17 @@ static int packedCopyWrapper(SwsInternal *c, const uint8_t *const src[],
|
|
|
|
|
if (c->opts.dither == SWS_DITHER_NONE) {\
|
|
|
|
|
for (i = 0; i < height; i++) {\
|
|
|
|
|
for (j = 0; j < length-7; j+=8) {\
|
|
|
|
|
tmp = (bswap(src[j+0]) + bias)>>shift; dst[j+0] = dbswap(tmp - (tmp>>dst_depth));\
|
|
|
|
|
tmp = (bswap(src[j+1]) + bias)>>shift; dst[j+1] = dbswap(tmp - (tmp>>dst_depth));\
|
|
|
|
|
tmp = (bswap(src[j+2]) + bias)>>shift; dst[j+2] = dbswap(tmp - (tmp>>dst_depth));\
|
|
|
|
|
tmp = (bswap(src[j+3]) + bias)>>shift; dst[j+3] = dbswap(tmp - (tmp>>dst_depth));\
|
|
|
|
|
tmp = (bswap(src[j+4]) + bias)>>shift; dst[j+4] = dbswap(tmp - (tmp>>dst_depth));\
|
|
|
|
|
tmp = (bswap(src[j+5]) + bias)>>shift; dst[j+5] = dbswap(tmp - (tmp>>dst_depth));\
|
|
|
|
|
tmp = (bswap(src[j+6]) + bias)>>shift; dst[j+6] = dbswap(tmp - (tmp>>dst_depth));\
|
|
|
|
|
tmp = (bswap(src[j+7]) + bias)>>shift; dst[j+7] = dbswap(tmp - (tmp>>dst_depth));\
|
|
|
|
|
tmp = ((bswap(src[j+0]) >> src_shift) + bias)>>shift; dst[j+0] = dbswap((tmp - (tmp>>dst_depth)) << dst_shift);\
|
|
|
|
|
tmp = ((bswap(src[j+1]) >> src_shift) + bias)>>shift; dst[j+1] = dbswap((tmp - (tmp>>dst_depth)) << dst_shift);\
|
|
|
|
|
tmp = ((bswap(src[j+2]) >> src_shift) + bias)>>shift; dst[j+2] = dbswap((tmp - (tmp>>dst_depth)) << dst_shift);\
|
|
|
|
|
tmp = ((bswap(src[j+3]) >> src_shift) + bias)>>shift; dst[j+3] = dbswap((tmp - (tmp>>dst_depth)) << dst_shift);\
|
|
|
|
|
tmp = ((bswap(src[j+4]) >> src_shift) + bias)>>shift; dst[j+4] = dbswap((tmp - (tmp>>dst_depth)) << dst_shift);\
|
|
|
|
|
tmp = ((bswap(src[j+5]) >> src_shift) + bias)>>shift; dst[j+5] = dbswap((tmp - (tmp>>dst_depth)) << dst_shift);\
|
|
|
|
|
tmp = ((bswap(src[j+6]) >> src_shift) + bias)>>shift; dst[j+6] = dbswap((tmp - (tmp>>dst_depth)) << dst_shift);\
|
|
|
|
|
tmp = ((bswap(src[j+7]) >> src_shift) + bias)>>shift; dst[j+7] = dbswap((tmp - (tmp>>dst_depth)) << dst_shift);\
|
|
|
|
|
}\
|
|
|
|
|
for (; j < length; j++) {\
|
|
|
|
|
tmp = (bswap(src[j]) + bias)>>shift; dst[j] = dbswap(tmp - (tmp>>dst_depth));\
|
|
|
|
|
tmp = (bswap(src[j]) + bias)>>shift; dst[j] = dbswap(tmp - (tmp>>dst_depth) << dst_shift);\
|
|
|
|
|
}\
|
|
|
|
|
dst += dstStride;\
|
|
|
|
|
src += srcStride;\
|
|
|
|
@ -2165,17 +2165,17 @@ static int packedCopyWrapper(SwsInternal *c, const uint8_t *const src[],
|
|
|
|
|
for (i = 0; i < height; i++) {\
|
|
|
|
|
const uint8_t *dither= dithers[shift-1][i&7];\
|
|
|
|
|
for (j = 0; j < length-7; j+=8) {\
|
|
|
|
|
tmp = (bswap(src[j+0]) + dither[0])>>shift; dst[j+0] = dbswap(tmp - (tmp>>dst_depth));\
|
|
|
|
|
tmp = (bswap(src[j+1]) + dither[1])>>shift; dst[j+1] = dbswap(tmp - (tmp>>dst_depth));\
|
|
|
|
|
tmp = (bswap(src[j+2]) + dither[2])>>shift; dst[j+2] = dbswap(tmp - (tmp>>dst_depth));\
|
|
|
|
|
tmp = (bswap(src[j+3]) + dither[3])>>shift; dst[j+3] = dbswap(tmp - (tmp>>dst_depth));\
|
|
|
|
|
tmp = (bswap(src[j+4]) + dither[4])>>shift; dst[j+4] = dbswap(tmp - (tmp>>dst_depth));\
|
|
|
|
|
tmp = (bswap(src[j+5]) + dither[5])>>shift; dst[j+5] = dbswap(tmp - (tmp>>dst_depth));\
|
|
|
|
|
tmp = (bswap(src[j+6]) + dither[6])>>shift; dst[j+6] = dbswap(tmp - (tmp>>dst_depth));\
|
|
|
|
|
tmp = (bswap(src[j+7]) + dither[7])>>shift; dst[j+7] = dbswap(tmp - (tmp>>dst_depth));\
|
|
|
|
|
tmp = ((bswap(src[j+0]) >> src_shift) + dither[0])>>shift; dst[j+0] = dbswap((tmp - (tmp>>dst_depth)) << dst_shift);\
|
|
|
|
|
tmp = ((bswap(src[j+1]) >> src_shift) + dither[1])>>shift; dst[j+1] = dbswap((tmp - (tmp>>dst_depth)) << dst_shift);\
|
|
|
|
|
tmp = ((bswap(src[j+2]) >> src_shift) + dither[2])>>shift; dst[j+2] = dbswap((tmp - (tmp>>dst_depth)) << dst_shift);\
|
|
|
|
|
tmp = ((bswap(src[j+3]) >> src_shift) + dither[3])>>shift; dst[j+3] = dbswap((tmp - (tmp>>dst_depth)) << dst_shift);\
|
|
|
|
|
tmp = ((bswap(src[j+4]) >> src_shift) + dither[4])>>shift; dst[j+4] = dbswap((tmp - (tmp>>dst_depth)) << dst_shift);\
|
|
|
|
|
tmp = ((bswap(src[j+5]) >> src_shift) + dither[5])>>shift; dst[j+5] = dbswap((tmp - (tmp>>dst_depth)) << dst_shift);\
|
|
|
|
|
tmp = ((bswap(src[j+6]) >> src_shift) + dither[6])>>shift; dst[j+6] = dbswap((tmp - (tmp>>dst_depth)) << dst_shift);\
|
|
|
|
|
tmp = ((bswap(src[j+7]) >> src_shift) + dither[7])>>shift; dst[j+7] = dbswap((tmp - (tmp>>dst_depth)) << dst_shift);\
|
|
|
|
|
}\
|
|
|
|
|
for (; j < length; j++) {\
|
|
|
|
|
tmp = (bswap(src[j]) + dither[j&7])>>shift; dst[j] = dbswap(tmp - (tmp>>dst_depth));\
|
|
|
|
|
tmp = (bswap(src[j]) + dither[j&7])>>shift; dst[j] = dbswap(tmp - (tmp>>dst_depth) << dst_shift);\
|
|
|
|
|
}\
|
|
|
|
|
dst += dstStride;\
|
|
|
|
|
src += srcStride;\
|
|
|
|
@ -2184,14 +2184,14 @@ static int packedCopyWrapper(SwsInternal *c, const uint8_t *const src[],
|
|
|
|
|
for (i = 0; i < height; i++) {\
|
|
|
|
|
const uint8_t *dither= dithers[shift-1][i&7];\
|
|
|
|
|
for (j = 0; j < length-7; j+=8) {\
|
|
|
|
|
tmp = bswap(src[j+0]); dst[j+0] = dbswap((tmp - (tmp>>dst_depth) + dither[0])>>shift);\
|
|
|
|
|
tmp = bswap(src[j+1]); dst[j+1] = dbswap((tmp - (tmp>>dst_depth) + dither[1])>>shift);\
|
|
|
|
|
tmp = bswap(src[j+2]); dst[j+2] = dbswap((tmp - (tmp>>dst_depth) + dither[2])>>shift);\
|
|
|
|
|
tmp = bswap(src[j+3]); dst[j+3] = dbswap((tmp - (tmp>>dst_depth) + dither[3])>>shift);\
|
|
|
|
|
tmp = bswap(src[j+4]); dst[j+4] = dbswap((tmp - (tmp>>dst_depth) + dither[4])>>shift);\
|
|
|
|
|
tmp = bswap(src[j+5]); dst[j+5] = dbswap((tmp - (tmp>>dst_depth) + dither[5])>>shift);\
|
|
|
|
|
tmp = bswap(src[j+6]); dst[j+6] = dbswap((tmp - (tmp>>dst_depth) + dither[6])>>shift);\
|
|
|
|
|
tmp = bswap(src[j+7]); dst[j+7] = dbswap((tmp - (tmp>>dst_depth) + dither[7])>>shift);\
|
|
|
|
|
tmp = bswap(src[j+0]) >> src_shift; dst[j+0] = dbswap(((tmp - (tmp>>dst_depth) + dither[0])>>shift) << dst_shift);\
|
|
|
|
|
tmp = bswap(src[j+1]) >> src_shift; dst[j+1] = dbswap(((tmp - (tmp>>dst_depth) + dither[1])>>shift) << dst_shift);\
|
|
|
|
|
tmp = bswap(src[j+2]) >> src_shift; dst[j+2] = dbswap(((tmp - (tmp>>dst_depth) + dither[2])>>shift) << dst_shift);\
|
|
|
|
|
tmp = bswap(src[j+3]) >> src_shift; dst[j+3] = dbswap(((tmp - (tmp>>dst_depth) + dither[3])>>shift) << dst_shift);\
|
|
|
|
|
tmp = bswap(src[j+4]) >> src_shift; dst[j+4] = dbswap(((tmp - (tmp>>dst_depth) + dither[4])>>shift) << dst_shift);\
|
|
|
|
|
tmp = bswap(src[j+5]) >> src_shift; dst[j+5] = dbswap(((tmp - (tmp>>dst_depth) + dither[5])>>shift) << dst_shift);\
|
|
|
|
|
tmp = bswap(src[j+6]) >> src_shift; dst[j+6] = dbswap(((tmp - (tmp>>dst_depth) + dither[6])>>shift) << dst_shift);\
|
|
|
|
|
tmp = bswap(src[j+7]) >> src_shift; dst[j+7] = dbswap(((tmp - (tmp>>dst_depth) + dither[7])>>shift) << dst_shift);\
|
|
|
|
|
}\
|
|
|
|
|
for (; j < length; j++) {\
|
|
|
|
|
tmp = bswap(src[j]); dst[j] = dbswap((tmp - (tmp>>dst_depth) + dither[j&7])>>shift);\
|
|
|
|
@ -2235,6 +2235,8 @@ static int planarCopyWrapper(SwsInternal *c, const uint8_t *const src[],
|
|
|
|
|
) {
|
|
|
|
|
const int src_depth = desc_src->comp[plane].depth;
|
|
|
|
|
const int dst_depth = desc_dst->comp[plane].depth;
|
|
|
|
|
const int src_shift = desc_src->comp[plane].shift;
|
|
|
|
|
const int dst_shift = desc_dst->comp[plane].shift;
|
|
|
|
|
const uint16_t *srcPtr2 = (const uint16_t *) srcPtr;
|
|
|
|
|
uint16_t *dstPtr2 = (uint16_t*)dstPtr;
|
|
|
|
|
|
|
|
|
@ -2250,11 +2252,11 @@ static int planarCopyWrapper(SwsInternal *c, const uint8_t *const src[],
|
|
|
|
|
#define COPY816(w)\
|
|
|
|
|
if (shiftonly) {\
|
|
|
|
|
for (j = 0; j < length; j++)\
|
|
|
|
|
w(&dstPtr2[j], srcPtr[j]<<(dst_depth-8));\
|
|
|
|
|
w(&dstPtr2[j], (srcPtr[j]<<(dst_depth-8)) << dst_shift);\
|
|
|
|
|
} else {\
|
|
|
|
|
for (j = 0; j < length; j++)\
|
|
|
|
|
w(&dstPtr2[j], (srcPtr[j]<<(dst_depth-8)) |\
|
|
|
|
|
(srcPtr[j]>>(2*8-dst_depth)));\
|
|
|
|
|
w(&dstPtr2[j], ((srcPtr[j]<<(dst_depth-8)) |\
|
|
|
|
|
(srcPtr[j]>>(2*8-dst_depth))) << dst_shift);\
|
|
|
|
|
}
|
|
|
|
|
if(isBE(c->opts.dst_format)){
|
|
|
|
|
COPY816(AV_WB16)
|
|
|
|
@ -2274,13 +2276,13 @@ static int planarCopyWrapper(SwsInternal *c, const uint8_t *const src[],
|
|
|
|
|
#if HAVE_FAST_64BIT
|
|
|
|
|
#define FAST_COPY_UP(shift) \
|
|
|
|
|
for (; j < length - 3; j += 4) { \
|
|
|
|
|
uint64_t v = AV_RN64A(srcPtr2 + j); \
|
|
|
|
|
uint64_t v = AV_RN64A(srcPtr2 + j) >> src_shift; \
|
|
|
|
|
AV_WN64A(dstPtr2 + j, v << shift); \
|
|
|
|
|
}
|
|
|
|
|
#else
|
|
|
|
|
#define FAST_COPY_UP(shift) \
|
|
|
|
|
for (; j < length - 1; j += 2) { \
|
|
|
|
|
uint32_t v = AV_RN32A(srcPtr2 + j); \
|
|
|
|
|
uint32_t v = AV_RN32A(srcPtr2 + j) >> src_shift; \
|
|
|
|
|
AV_WN32A(dstPtr2 + j, v << shift); \
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
@ -2293,14 +2295,14 @@ static int planarCopyWrapper(SwsInternal *c, const uint8_t *const src[],
|
|
|
|
|
#define COPY_UP(r,w) \
|
|
|
|
|
if(shiftonly){\
|
|
|
|
|
for (; j < length; j++){ \
|
|
|
|
|
unsigned int v= r(&srcPtr2[j]);\
|
|
|
|
|
w(&dstPtr2[j], v<<(dst_depth-src_depth));\
|
|
|
|
|
unsigned int v= r(&srcPtr2[j]) >> src_shift;\
|
|
|
|
|
w(&dstPtr2[j], (v << (dst_depth-src_depth)) << dst_shift);\
|
|
|
|
|
}\
|
|
|
|
|
}else{\
|
|
|
|
|
for (; j < length; j++){ \
|
|
|
|
|
unsigned int v= r(&srcPtr2[j]);\
|
|
|
|
|
w(&dstPtr2[j], (v<<(dst_depth-src_depth)) | \
|
|
|
|
|
(v>>(2*src_depth-dst_depth)));\
|
|
|
|
|
unsigned int v= r(&srcPtr2[j]) >> src_shift;\
|
|
|
|
|
w(&dstPtr2[j], ((v<<(dst_depth-src_depth)) | \
|
|
|
|
|
(v>>(2*src_depth-dst_depth))) << dst_shift);\
|
|
|
|
|
}\
|
|
|
|
|
}
|
|
|
|
|
if(isBE(c->opts.src_format)){
|
|
|
|
|