mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
avcodec/ppc/h264qpel: POWER LE support for put_pixels16_l2_altivec() and avg_pixels16_l2_altivec()
add put_unligned_store() and avg_unligned_store() macros Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
8cc5a78e45
commit
bd67d0ead1
@ -191,86 +191,79 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, cons
|
||||
OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
|
||||
}\
|
||||
|
||||
#if HAVE_BIGENDIAN
|
||||
#define put_unligned_store(s, dest) { \
|
||||
tmp1 = vec_ld(0, dest); \
|
||||
mask = vec_lvsl(0, dest); \
|
||||
tmp2 = vec_ld(15, dest); \
|
||||
edges = vec_perm(tmp2, tmp1, mask); \
|
||||
align = vec_lvsr(0, dest); \
|
||||
tmp2 = vec_perm(s, edges, align); \
|
||||
tmp1 = vec_perm(edges, s, align); \
|
||||
vec_st(tmp2, 15, dest); \
|
||||
vec_st(tmp1, 0 , dest); \
|
||||
}
|
||||
#else
|
||||
#define put_unligned_store(s, dest) vec_vsx_st(s, 0, dest);
|
||||
#endif /* HAVE_BIGENDIAN */
|
||||
|
||||
static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
|
||||
const uint8_t * src2, int dst_stride,
|
||||
int src_stride1, int h)
|
||||
{
|
||||
int i;
|
||||
vec_u8 a, b, d, tmp1, tmp2, mask, mask_, edges, align;
|
||||
|
||||
vec_u8 a, b, d, mask_;
|
||||
#if HAVE_BIGENDIAN
|
||||
vec_u8 tmp1, tmp2, mask, edges, align;
|
||||
mask_ = vec_lvsl(0, src2);
|
||||
#endif
|
||||
|
||||
for (i = 0; i < h; i++) {
|
||||
|
||||
tmp1 = vec_ld(i * src_stride1, src1);
|
||||
mask = vec_lvsl(i * src_stride1, src1);
|
||||
tmp2 = vec_ld(i * src_stride1 + 15, src1);
|
||||
|
||||
a = vec_perm(tmp1, tmp2, mask);
|
||||
|
||||
tmp1 = vec_ld(i * 16, src2);
|
||||
tmp2 = vec_ld(i * 16 + 15, src2);
|
||||
|
||||
b = vec_perm(tmp1, tmp2, mask_);
|
||||
|
||||
tmp1 = vec_ld(0, dst);
|
||||
mask = vec_lvsl(0, dst);
|
||||
tmp2 = vec_ld(15, dst);
|
||||
|
||||
a = unaligned_load(i * src_stride1, src1);
|
||||
b = load_with_perm_vec(i * 16, src2, mask_);
|
||||
d = vec_avg(a, b);
|
||||
|
||||
edges = vec_perm(tmp2, tmp1, mask);
|
||||
|
||||
align = vec_lvsr(0, dst);
|
||||
|
||||
tmp2 = vec_perm(d, edges, align);
|
||||
tmp1 = vec_perm(edges, d, align);
|
||||
|
||||
vec_st(tmp2, 15, dst);
|
||||
vec_st(tmp1, 0 , dst);
|
||||
|
||||
put_unligned_store(d, dst);
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
#if HAVE_BIGENDIAN
|
||||
#define avg_unligned_store(s, dest){ \
|
||||
tmp1 = vec_ld(0, dest); \
|
||||
mask = vec_lvsl(0, dest); \
|
||||
tmp2 = vec_ld(15, dest); \
|
||||
a = vec_avg(vec_perm(tmp1, tmp2, mask), s); \
|
||||
edges = vec_perm(tmp2, tmp1, mask); \
|
||||
align = vec_lvsr(0, dest); \
|
||||
tmp2 = vec_perm(a, edges, align); \
|
||||
tmp1 = vec_perm(edges, a, align); \
|
||||
vec_st(tmp2, 15, dest); \
|
||||
vec_st(tmp1, 0 , dest); \
|
||||
}
|
||||
#else
|
||||
#define avg_unligned_store(s, dest){ \
|
||||
a = vec_avg(vec_vsx_ld(0, dst), s); \
|
||||
vec_vsx_st(a, 0, dst); \
|
||||
}
|
||||
#endif /* HAVE_BIGENDIAN */
|
||||
|
||||
static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
|
||||
const uint8_t * src2, int dst_stride,
|
||||
int src_stride1, int h)
|
||||
{
|
||||
int i;
|
||||
vec_u8 a, b, d, tmp1, tmp2, mask, mask_, edges, align;
|
||||
vec_u8 a, b, d, mask_;
|
||||
|
||||
#if HAVE_BIGENDIAN
|
||||
vec_u8 tmp1, tmp2, mask, edges, align;
|
||||
mask_ = vec_lvsl(0, src2);
|
||||
#endif
|
||||
|
||||
for (i = 0; i < h; i++) {
|
||||
|
||||
tmp1 = vec_ld(i * src_stride1, src1);
|
||||
mask = vec_lvsl(i * src_stride1, src1);
|
||||
tmp2 = vec_ld(i * src_stride1 + 15, src1);
|
||||
|
||||
a = vec_perm(tmp1, tmp2, mask);
|
||||
|
||||
tmp1 = vec_ld(i * 16, src2);
|
||||
tmp2 = vec_ld(i * 16 + 15, src2);
|
||||
|
||||
b = vec_perm(tmp1, tmp2, mask_);
|
||||
|
||||
tmp1 = vec_ld(0, dst);
|
||||
mask = vec_lvsl(0, dst);
|
||||
tmp2 = vec_ld(15, dst);
|
||||
|
||||
d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b));
|
||||
|
||||
edges = vec_perm(tmp2, tmp1, mask);
|
||||
|
||||
align = vec_lvsr(0, dst);
|
||||
|
||||
tmp2 = vec_perm(d, edges, align);
|
||||
tmp1 = vec_perm(edges, d, align);
|
||||
|
||||
vec_st(tmp2, 15, dst);
|
||||
vec_st(tmp1, 0 , dst);
|
||||
|
||||
a = unaligned_load(i * src_stride1, src1);
|
||||
b = load_with_perm_vec(i * 16, src2, mask_);
|
||||
d = vec_avg(a, b);
|
||||
avg_unligned_store(d, dst);
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user