mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
Use a faster way to compute 255-val: Instead of creating a vector of
all 255s, and then doing the subtraction, nor of the vector with itself: saves one instruction and a register. Patch by Graham Booker % gbooker A tamu P edu% Originally committed as revision 9340 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
a444671542
commit
f4a02f6e9b
@ -766,7 +766,6 @@ static inline vector unsigned char h264_deblock_mask ( register vector unsigned
|
||||
const vector unsigned char A0v = vec_sl(vec_splat_u8(10), vec_splat_u8(4)); \
|
||||
\
|
||||
register vector unsigned char pq0bit = vec_xor(p0,q0); \
|
||||
register vector unsigned char temp; \
|
||||
register vector unsigned char q1minus; \
|
||||
register vector unsigned char p0minus; \
|
||||
register vector unsigned char stage1; \
|
||||
@ -775,11 +774,10 @@ static inline vector unsigned char h264_deblock_mask ( register vector unsigned
|
||||
register vector unsigned char delta; \
|
||||
register vector unsigned char deltaneg; \
|
||||
\
|
||||
temp = (vector unsigned char)vec_cmpeq(p0, p0); \
|
||||
q1minus = vec_xor(temp, q1); /* 255 - q1 */ \
|
||||
q1minus = vec_nor(q1, q1); /* 255 - q1 */ \
|
||||
stage1 = vec_avg(p1, q1minus); /* (p1 - q1 + 256)>>1 */ \
|
||||
stage2 = vec_sr(stage1, vec_splat_u8(1)); /* (p1 - q1 + 256)>>2 = 64 + (p1 - q1) >> 2 */ \
|
||||
p0minus = vec_xor(temp, p0); /* 255 - p0 */ \
|
||||
p0minus = vec_nor(p0, p0); /* 255 - p0 */ \
|
||||
stage1 = vec_avg(q0, p0minus); /* (q0 - p0 + 256)>>1 */ \
|
||||
pq0bit = vec_and(pq0bit, vec_splat_u8(1)); \
|
||||
stage2 = vec_avg(stage2, pq0bit); /* 32 + ((q0 - p0)&1 + (p1 - q1) >> 2 + 1) >> 1 */ \
|
||||
|
Loading…
Reference in New Issue
Block a user