mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
Optimized unscaled yuvp9/yuvp10 -> yuvp16 conversion.
About 30% faster on 32 bit Atom, 120% faster on 64 bit Phenom2. This is interesting because supporting P16 is easier in e.g. OpenGL (can misuse support for any 2-component 8 bit format), whereas supporting p9/p10 without conversion needs a texture format with at least 14 bits actual precision. The shiftonly == 0 case is not optimized since the code is more complex and the speed gain less obvious. Signed-off-by: Reimar Döffinger <Reimar.Doeffinger@gmx.de>
This commit is contained in:
parent
bb7073921c
commit
118bd609f0
@ -830,7 +830,34 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t *src[],
|
|||||||
srcPtr += srcStride[plane];
|
srcPtr += srcStride[plane];
|
||||||
}
|
}
|
||||||
} else if (src_depth <= dst_depth) {
|
} else if (src_depth <= dst_depth) {
|
||||||
|
int orig_length = length;
|
||||||
for (i = 0; i < height; i++) {
|
for (i = 0; i < height; i++) {
|
||||||
|
if(isBE(c->srcFormat) == HAVE_BIGENDIAN &&
|
||||||
|
isBE(c->dstFormat) == HAVE_BIGENDIAN &&
|
||||||
|
shiftonly) {
|
||||||
|
unsigned shift = dst_depth - src_depth;
|
||||||
|
length = orig_length;
|
||||||
|
#if HAVE_FAST_64BIT
|
||||||
|
#define FAST_COPY_UP(shift) \
|
||||||
|
for (j = 0; j < length - 3; j += 4) { \
|
||||||
|
uint64_t v = AV_RN64A(srcPtr2 + j); \
|
||||||
|
AV_WN64A(dstPtr2 + j, v << shift); \
|
||||||
|
} \
|
||||||
|
length &= 3;
|
||||||
|
#else
|
||||||
|
#define FAST_COPY_UP(shift) \
|
||||||
|
for (j = 0; j < length - 1; j += 2) { \
|
||||||
|
uint32_t v = AV_RN32A(srcPtr2 + j); \
|
||||||
|
AV_WN32A(dstPtr2 + j, v << shift); \
|
||||||
|
} \
|
||||||
|
length &= 1;
|
||||||
|
#endif
|
||||||
|
switch (shift)
|
||||||
|
{
|
||||||
|
case 6: FAST_COPY_UP(6); break;
|
||||||
|
case 7: FAST_COPY_UP(7); break;
|
||||||
|
}
|
||||||
|
}
|
||||||
#define COPY_UP(r,w) \
|
#define COPY_UP(r,w) \
|
||||||
if(shiftonly){\
|
if(shiftonly){\
|
||||||
for (j = 0; j < length; j++){ \
|
for (j = 0; j < length; j++){ \
|
||||||
|
Loading…
Reference in New Issue
Block a user