avcodec/ppc/mpegvideo_altivec: Don't process coeffs as scalars

block_last_index and nCoeffs is an optimization designed to avoid processing unnecessarily many coefficients; yet it would be legal to always process all coefficients (all coefficients beyond nCoeffs are zero anyway and zeros are always unquantized to zeros). Therefore one does not need a scalar tail. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2025-08-04 22:03:09 +02:00 · 2025-03-22 07:53:34 +01:00
parent 46c5466dd8
commit 917652d7c8
1 changed files with 2 additions and 19 deletions
--- a/libavcodec/ppc/mpegvideo_altivec.c
+++ b/libavcodec/ppc/mpegvideo_altivec.c
@ -42,7 +42,7 @@
 static void dct_unquantize_h263_altivec(MpegEncContext *s,
                                 int16_t *block, int n, int qscale)
 {
-    int i, level, qmul, qadd;
+    int i, qmul, qadd;
    int nCoeffs;
    qadd = (qscale - 1) | 1;
@ -74,7 +74,6 @@ static void dct_unquantize_h263_altivec(MpegEncContext *s,
        register vector signed short blockv, qmulv, qaddv, nqaddv, temp1;
        register vector bool short blockv_null, blockv_neg;
        register short backup_0 = block[0];
        register int j = 0;
        qmulv = vec_splat((vec_s16)vec_lde(0, &qmul8), 0);
        qaddv = vec_splat((vec_s16)vec_lde(0, &qadd8), 0);
@ -82,7 +81,7 @@ static void dct_unquantize_h263_altivec(MpegEncContext *s,
        // vectorize all the 16 bytes-aligned blocks
        // of 8 elements
-        for(; (j + 7) <= nCoeffs ; j+=8) {
+        for (register int j = 0; j <= nCoeffs ; j += 8) {
            blockv = vec_ld(j << 1, block);
            blockv_neg = vec_cmplt(blockv, vczero);
            blockv_null = vec_cmpeq(blockv, vczero);
@ -95,22 +94,6 @@ static void dct_unquantize_h263_altivec(MpegEncContext *s,
            vec_st(blockv, j << 1, block);
        }
        // if nCoeffs isn't a multiple of 8, finish the job
        // using good old scalar units.
        // (we could do it using a truncated vector,
        // but I'm not sure it's worth the hassle)
        for(; j <= nCoeffs ; j++) {
            level = block[j];
            if (level) {
                if (level < 0) {
                    level = level * qmul - qadd;
                } else {
                    level = level * qmul + qadd;
                }
                block[j] = level;
            }
        }
        if (i == 1) {
            // cheat. this avoid special-casing the first iteration
            block[0] = backup_0;