You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-04 22:03:09 +02:00
avcodec/ppc/mpegvideo_altivec: Don't process coeffs as scalars
block_last_index and nCoeffs is an optimization designed to avoid processing unnecessarily many coefficients; yet it would be legal to always process all coefficients (all coefficients beyond nCoeffs are zero anyway and zeros are always unquantized to zeros). Therefore one does not need a scalar tail. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
@ -42,7 +42,7 @@
|
|||||||
static void dct_unquantize_h263_altivec(MpegEncContext *s,
|
static void dct_unquantize_h263_altivec(MpegEncContext *s,
|
||||||
int16_t *block, int n, int qscale)
|
int16_t *block, int n, int qscale)
|
||||||
{
|
{
|
||||||
int i, level, qmul, qadd;
|
int i, qmul, qadd;
|
||||||
int nCoeffs;
|
int nCoeffs;
|
||||||
|
|
||||||
qadd = (qscale - 1) | 1;
|
qadd = (qscale - 1) | 1;
|
||||||
@ -74,7 +74,6 @@ static void dct_unquantize_h263_altivec(MpegEncContext *s,
|
|||||||
register vector signed short blockv, qmulv, qaddv, nqaddv, temp1;
|
register vector signed short blockv, qmulv, qaddv, nqaddv, temp1;
|
||||||
register vector bool short blockv_null, blockv_neg;
|
register vector bool short blockv_null, blockv_neg;
|
||||||
register short backup_0 = block[0];
|
register short backup_0 = block[0];
|
||||||
register int j = 0;
|
|
||||||
|
|
||||||
qmulv = vec_splat((vec_s16)vec_lde(0, &qmul8), 0);
|
qmulv = vec_splat((vec_s16)vec_lde(0, &qmul8), 0);
|
||||||
qaddv = vec_splat((vec_s16)vec_lde(0, &qadd8), 0);
|
qaddv = vec_splat((vec_s16)vec_lde(0, &qadd8), 0);
|
||||||
@ -82,7 +81,7 @@ static void dct_unquantize_h263_altivec(MpegEncContext *s,
|
|||||||
|
|
||||||
// vectorize all the 16 bytes-aligned blocks
|
// vectorize all the 16 bytes-aligned blocks
|
||||||
// of 8 elements
|
// of 8 elements
|
||||||
for(; (j + 7) <= nCoeffs ; j+=8) {
|
for (register int j = 0; j <= nCoeffs ; j += 8) {
|
||||||
blockv = vec_ld(j << 1, block);
|
blockv = vec_ld(j << 1, block);
|
||||||
blockv_neg = vec_cmplt(blockv, vczero);
|
blockv_neg = vec_cmplt(blockv, vczero);
|
||||||
blockv_null = vec_cmpeq(blockv, vczero);
|
blockv_null = vec_cmpeq(blockv, vczero);
|
||||||
@ -95,22 +94,6 @@ static void dct_unquantize_h263_altivec(MpegEncContext *s,
|
|||||||
vec_st(blockv, j << 1, block);
|
vec_st(blockv, j << 1, block);
|
||||||
}
|
}
|
||||||
|
|
||||||
// if nCoeffs isn't a multiple of 8, finish the job
|
|
||||||
// using good old scalar units.
|
|
||||||
// (we could do it using a truncated vector,
|
|
||||||
// but I'm not sure it's worth the hassle)
|
|
||||||
for(; j <= nCoeffs ; j++) {
|
|
||||||
level = block[j];
|
|
||||||
if (level) {
|
|
||||||
if (level < 0) {
|
|
||||||
level = level * qmul - qadd;
|
|
||||||
} else {
|
|
||||||
level = level * qmul + qadd;
|
|
||||||
}
|
|
||||||
block[j] = level;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (i == 1) {
|
if (i == 1) {
|
||||||
// cheat. this avoid special-casing the first iteration
|
// cheat. this avoid special-casing the first iteration
|
||||||
block[0] = backup_0;
|
block[0] = backup_0;
|
||||||
|
Reference in New Issue
Block a user