1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-11-26 19:01:44 +02:00

part 2/2 of fixing Altivec-accelerated H264 luma inloop filter

In h264_deblock_q1, the result of the deblock needs to be kept to
be used in future deblocks, so return this value now.

Also change the sign of tc0 vector: It is really a signed value, so
treat it as such until after the >=0 check;
then, at that point, after being masked, it can be treated as unsigned.

Patch by Graham Booker % gbooker A tamu P edu%

Originally committed as revision 9349 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
Graham Booker 2007-06-17 09:37:13 +00:00 committed by Guillaume Poirier
parent 963eca226e
commit 22fa38f0c8

View File

@ -740,8 +740,8 @@ static inline vector unsigned char h264_deblock_mask ( register vector unsigned
return mask; return mask;
} }
// out: p1 = clip((p2 + ((p0 + q0 + 1) >> 1)) >> 1, p1-tc0, p1+tc0) // out: newp1 = clip((p2 + ((p0 + q0 + 1) >> 1)) >> 1, p1-tc0, p1+tc0)
static inline void h264_deblock_q1(register vector unsigned char p0, static inline vector unsigned char h264_deblock_q1(register vector unsigned char p0,
register vector unsigned char p1, register vector unsigned char p1,
register vector unsigned char p2, register vector unsigned char p2,
register vector unsigned char q0, register vector unsigned char q0,
@ -753,6 +753,7 @@ static inline void h264_deblock_q1(register vector unsigned char p0,
register vector unsigned char ones; register vector unsigned char ones;
register vector unsigned char max; register vector unsigned char max;
register vector unsigned char min; register vector unsigned char min;
register vector unsigned char newp1;
temp = vec_xor(average, p2); temp = vec_xor(average, p2);
average = vec_avg(average, p2); /*avg(p2, avg(p0, q0)) */ average = vec_avg(average, p2); /*avg(p2, avg(p0, q0)) */
@ -761,8 +762,9 @@ static inline void h264_deblock_q1(register vector unsigned char p0,
uncliped = vec_subs(average, temp); /*(p2+((p0+q0+1)>>1))>>1 */ uncliped = vec_subs(average, temp); /*(p2+((p0+q0+1)>>1))>>1 */
max = vec_adds(p1, tc0); max = vec_adds(p1, tc0);
min = vec_subs(p1, tc0); min = vec_subs(p1, tc0);
p1 = vec_max(min, uncliped); newp1 = vec_max(min, uncliped);
p1 = vec_min(max, p1); newp1 = vec_min(max, newp1);
return newp1;
} }
#define h264_deblock_p0_q0(p0, p1, q0, q1, tc0masked) { \ #define h264_deblock_p0_q0(p0, p1, q0, q1, tc0masked) { \
@ -804,9 +806,11 @@ static inline void h264_deblock_q1(register vector unsigned char p0,
register vector unsigned char mask; \ register vector unsigned char mask; \
register vector unsigned char p1mask; \ register vector unsigned char p1mask; \
register vector unsigned char q1mask; \ register vector unsigned char q1mask; \
register vector unsigned char tc0vec; \ register vector char tc0vec; \
register vector unsigned char finaltc0; \ register vector unsigned char finaltc0; \
register vector unsigned char tc0masked; \ register vector unsigned char tc0masked; \
register vector unsigned char newp1; \
register vector unsigned char newq1; \
\ \
temp[0] = alpha; \ temp[0] = alpha; \
temp[1] = beta; \ temp[1] = beta; \
@ -819,24 +823,26 @@ static inline void h264_deblock_q1(register vector unsigned char p0,
tc0vec = vec_ld(0, temp); \ tc0vec = vec_ld(0, temp); \
tc0vec = vec_mergeh(tc0vec, tc0vec); \ tc0vec = vec_mergeh(tc0vec, tc0vec); \
tc0vec = vec_mergeh(tc0vec, tc0vec); \ tc0vec = vec_mergeh(tc0vec, tc0vec); \
mask = vec_and(mask, vec_cmpgt(tc0vec, vec_splat_u8(-1))); /* if tc0[i] >= 0 */ \ mask = vec_and(mask, vec_cmpgt(tc0vec, vec_splat_s8(-1))); /* if tc0[i] >= 0 */ \
finaltc0 = vec_and(tc0vec, mask); /*tc = tc0[i]*/ \ finaltc0 = vec_and((vector unsigned char)tc0vec, mask); /* tc = tc0 */ \
\ \
p1mask = diff_lt_altivec(p2, p0, betavec); \ p1mask = diff_lt_altivec(p2, p0, betavec); \
p1mask = vec_and(p1mask, mask); /* if( |p2 - p0| < beta) */ \ p1mask = vec_and(p1mask, mask); /* if( |p2 - p0| < beta) */ \
tc0masked = vec_and(p1mask, tc0vec); \ tc0masked = vec_and(p1mask, tc0vec); \
finaltc0 = vec_sub(finaltc0, p1mask); /* tc++ */ \ finaltc0 = vec_sub(finaltc0, p1mask); /* tc++ */ \
h264_deblock_q1(p0, p1, p2, q0, tc0masked); \ newp1 = h264_deblock_q1(p0, p1, p2, q0, tc0masked); \
/*end if*/ \ /*end if*/ \
\ \
q1mask = diff_lt_altivec(q2, q0, betavec); \ q1mask = diff_lt_altivec(q2, q0, betavec); \
q1mask = vec_and(q1mask, mask); /* if ( |q2 - q0| < beta ) */\ q1mask = vec_and(q1mask, mask); /* if ( |q2 - q0| < beta ) */\
tc0masked = vec_and(q1mask, tc0vec); \ tc0masked = vec_and(q1mask, tc0vec); \
finaltc0 = vec_sub(finaltc0, q1mask); /* tc++ */ \ finaltc0 = vec_sub(finaltc0, q1mask); /* tc++ */ \
h264_deblock_q1(p0, q1, q2, q0, tc0masked); \ newq1 = h264_deblock_q1(p0, q1, q2, q0, tc0masked); \
/*end if*/ \ /*end if*/ \
\ \
h264_deblock_p0_q0(p0, p1, q0, q1, finaltc0); \ h264_deblock_p0_q0(p0, p1, q0, q1, finaltc0); \
p1 = newp1; \
q1 = newq1; \
} }
static void h264_v_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) { static void h264_v_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) {