1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-08-04 22:03:09 +02:00

avcodec/ppc/mpegvideo_altivec: Don't process coeffs as scalars

block_last_index and nCoeffs is an optimization designed
to avoid processing unnecessarily many coefficients; yet
it would be legal to always process all coefficients
(all coefficients beyond nCoeffs are zero anyway and
zeros are always unquantized to zeros). Therefore
one does not need a scalar tail.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
Andreas Rheinhardt
2025-03-22 07:53:34 +01:00
parent 46c5466dd8
commit 917652d7c8

View File

@ -42,7 +42,7 @@
static void dct_unquantize_h263_altivec(MpegEncContext *s, static void dct_unquantize_h263_altivec(MpegEncContext *s,
int16_t *block, int n, int qscale) int16_t *block, int n, int qscale)
{ {
int i, level, qmul, qadd; int i, qmul, qadd;
int nCoeffs; int nCoeffs;
qadd = (qscale - 1) | 1; qadd = (qscale - 1) | 1;
@ -74,7 +74,6 @@ static void dct_unquantize_h263_altivec(MpegEncContext *s,
register vector signed short blockv, qmulv, qaddv, nqaddv, temp1; register vector signed short blockv, qmulv, qaddv, nqaddv, temp1;
register vector bool short blockv_null, blockv_neg; register vector bool short blockv_null, blockv_neg;
register short backup_0 = block[0]; register short backup_0 = block[0];
register int j = 0;
qmulv = vec_splat((vec_s16)vec_lde(0, &qmul8), 0); qmulv = vec_splat((vec_s16)vec_lde(0, &qmul8), 0);
qaddv = vec_splat((vec_s16)vec_lde(0, &qadd8), 0); qaddv = vec_splat((vec_s16)vec_lde(0, &qadd8), 0);
@ -82,7 +81,7 @@ static void dct_unquantize_h263_altivec(MpegEncContext *s,
// vectorize all the 16 bytes-aligned blocks // vectorize all the 16 bytes-aligned blocks
// of 8 elements // of 8 elements
for(; (j + 7) <= nCoeffs ; j+=8) { for (register int j = 0; j <= nCoeffs ; j += 8) {
blockv = vec_ld(j << 1, block); blockv = vec_ld(j << 1, block);
blockv_neg = vec_cmplt(blockv, vczero); blockv_neg = vec_cmplt(blockv, vczero);
blockv_null = vec_cmpeq(blockv, vczero); blockv_null = vec_cmpeq(blockv, vczero);
@ -95,22 +94,6 @@ static void dct_unquantize_h263_altivec(MpegEncContext *s,
vec_st(blockv, j << 1, block); vec_st(blockv, j << 1, block);
} }
// if nCoeffs isn't a multiple of 8, finish the job
// using good old scalar units.
// (we could do it using a truncated vector,
// but I'm not sure it's worth the hassle)
for(; j <= nCoeffs ; j++) {
level = block[j];
if (level) {
if (level < 0) {
level = level * qmul - qadd;
} else {
level = level * qmul + qadd;
}
block[j] = level;
}
}
if (i == 1) { if (i == 1) {
// cheat. this avoid special-casing the first iteration // cheat. this avoid special-casing the first iteration
block[0] = backup_0; block[0] = backup_0;