1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-08-04 22:03:09 +02:00

swscale/swscale_unscaled: make the fast planar copy path work with more formats

dst_depth - src_depth where the result is 6 or 7 in a high bd path means this
is only executed for 16 -> 10 and 16 -> 9.
This patch makes this path general, supporting arbitrary formats as long as
dst_depth > src_depth > 8.

Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
James Almer
2025-03-14 22:14:19 -03:00
parent 819dec697a
commit 63fa1f52b9

View File

@ -2267,42 +2267,34 @@ static int planarCopyWrapper(SwsInternal *c, const uint8_t *const src[],
srcPtr += srcStride[plane]; srcPtr += srcStride[plane];
} }
} else if (src_depth <= dst_depth) { } else if (src_depth <= dst_depth) {
unsigned shift = dst_depth - src_depth;
for (i = 0; i < height; i++) { for (i = 0; i < height; i++) {
j = 0; j = 0;
if(isBE(c->opts.src_format) == HAVE_BIGENDIAN && if(isBE(c->opts.src_format) == HAVE_BIGENDIAN &&
isBE(c->opts.dst_format) == HAVE_BIGENDIAN && isBE(c->opts.dst_format) == HAVE_BIGENDIAN &&
shiftonly) { shiftonly) {
unsigned shift = dst_depth - src_depth;
#if HAVE_FAST_64BIT #if HAVE_FAST_64BIT
#define FAST_COPY_UP(shift) \ for (; j < length - 3; j += 4) {
for (; j < length - 3; j += 4) { \ uint64_t v = AV_RN64A(srcPtr2 + j) >> src_shift;
uint64_t v = AV_RN64A(srcPtr2 + j) >> src_shift; \ AV_WN64A(dstPtr2 + j, (v << shift) << dst_shift);
AV_WN64A(dstPtr2 + j, v << shift); \ }
}
#else #else
#define FAST_COPY_UP(shift) \ for (; j < length - 1; j += 2) {
for (; j < length - 1; j += 2) { \ uint32_t v = AV_RN32A(srcPtr2 + j) >> src_shift;
uint32_t v = AV_RN32A(srcPtr2 + j) >> src_shift; \ AV_WN32A(dstPtr2 + j, (v << shift) << dst_shift);
AV_WN32A(dstPtr2 + j, v << shift); \ }
}
#endif #endif
switch (shift)
{
case 6: FAST_COPY_UP(6); break;
case 7: FAST_COPY_UP(7); break;
}
} }
#define COPY_UP(r,w) \ #define COPY_UP(r,w) \
if(shiftonly){\ if(shiftonly){\
for (; j < length; j++){ \ for (; j < length; j++){ \
unsigned int v= r(&srcPtr2[j]) >> src_shift;\ unsigned int v= r(&srcPtr2[j]) >> src_shift;\
w(&dstPtr2[j], (v << (dst_depth-src_depth)) << dst_shift);\ w(&dstPtr2[j], (v << shift) << dst_shift);\
}\ }\
}else{\ }else{\
for (; j < length; j++){ \ for (; j < length; j++){ \
unsigned int v= r(&srcPtr2[j]) >> src_shift;\ unsigned int v= r(&srcPtr2[j]) >> src_shift;\
w(&dstPtr2[j], ((v<<(dst_depth-src_depth)) | \ w(&dstPtr2[j], ((v << shift) | (v>>(2*src_depth-dst_depth))) << dst_shift);\
(v>>(2*src_depth-dst_depth))) << dst_shift);\
}\ }\
} }
if(isBE(c->opts.src_format)){ if(isBE(c->opts.src_format)){