swscale/swscale_unscaled: make the fast planar copy path work with more formats

dst_depth - src_depth where the result is 6 or 7 in a high bd path means this is only executed for 16 -> 10 and 16 -> 9. This patch makes this path general, supporting arbitrary formats as long as dst_depth > src_depth > 8. Signed-off-by: James Almer <jamrial@gmail.com>
2025-08-04 22:03:09 +02:00 · 2025-03-14 22:14:19 -03:00
parent 819dec697a
commit 63fa1f52b9
1 changed files with 11 additions and 19 deletions
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@ -2267,42 +2267,34 @@ static int planarCopyWrapper(SwsInternal *c, const uint8_t *const src[],
                        srcPtr  += srcStride[plane];
                    }
                } else if (src_depth <= dst_depth) {
                    unsigned shift = dst_depth - src_depth;
                    for (i = 0; i < height; i++) {
                        j = 0;
                        if(isBE(c->opts.src_format) == HAVE_BIGENDIAN &&
                           isBE(c->opts.dst_format) == HAVE_BIGENDIAN &&
                           shiftonly) {
                             unsigned shift = dst_depth - src_depth;
 #if HAVE_FAST_64BIT
-#define FAST_COPY_UP(shift) \
+                            for (; j < length - 3; j += 4) {
-    for (; j < length - 3; j += 4) { \
+                                uint64_t v = AV_RN64A(srcPtr2 + j) >> src_shift;
-        uint64_t v = AV_RN64A(srcPtr2 + j) >> src_shift; \
+                                AV_WN64A(dstPtr2 + j, (v << shift) << dst_shift);
-        AV_WN64A(dstPtr2 + j, v << shift); \
+                            }
    }
 #else
-#define FAST_COPY_UP(shift) \
+                            for (; j < length - 1; j += 2) {
-    for (; j < length - 1; j += 2) { \
+                                uint32_t v = AV_RN32A(srcPtr2 + j) >> src_shift;
-        uint32_t v = AV_RN32A(srcPtr2 + j) >> src_shift; \
+                                AV_WN32A(dstPtr2 + j, (v << shift) << dst_shift);
-        AV_WN32A(dstPtr2 + j, v << shift); \
+                            }
    }
 #endif
                             switch (shift)
                             {
                             case 6: FAST_COPY_UP(6); break;
                             case 7: FAST_COPY_UP(7); break;
                             }
                        }
 #define COPY_UP(r,w) \
    if(shiftonly){\
        for (; j < length; j++){ \
            unsigned int v= r(&srcPtr2[j]) >> src_shift;\
-            w(&dstPtr2[j], (v << (dst_depth-src_depth)) << dst_shift);\
+            w(&dstPtr2[j], (v << shift) << dst_shift);\
        }\
    }else{\
        for (; j < length; j++){ \
            unsigned int v= r(&srcPtr2[j]) >> src_shift;\
-            w(&dstPtr2[j], ((v<<(dst_depth-src_depth)) | \
+            w(&dstPtr2[j], ((v << shift) | (v>>(2*src_depth-dst_depth))) << dst_shift);\
                            (v>>(2*src_depth-dst_depth))) << dst_shift);\
        }\
    }
                        if(isBE(c->opts.src_format)){