mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
avg_pixels8_xy2_altivec in AltiVec, enabling avg_pixels8_altivec, hadamard fix by (Romain Dolbeau <dolbeau at irisa dot fr>)
Originally committed as revision 3196 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
3cf1e291f8
commit
2a5a1bdac9
@ -1647,3 +1647,113 @@ int has_altivec(void)
|
|||||||
#endif /* CONFIG_DARWIN */
|
#endif /* CONFIG_DARWIN */
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* next one assumes that ((line_size % 8) == 0) */
|
||||||
|
void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
|
||||||
|
{
|
||||||
|
POWERPC_PERF_DECLARE(altivec_avg_pixels8_xy2_num, 1);
|
||||||
|
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
|
||||||
|
|
||||||
|
int j;
|
||||||
|
POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1);
|
||||||
|
for (j = 0; j < 2; j++) {
|
||||||
|
int i;
|
||||||
|
const uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
|
||||||
|
const uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l);
|
||||||
|
uint32_t l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
|
||||||
|
uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
|
||||||
|
uint32_t l1, h1;
|
||||||
|
pixels += line_size;
|
||||||
|
for (i = 0; i < h; i += 2) {
|
||||||
|
uint32_t a = (((const struct unaligned_32 *) (pixels))->l);
|
||||||
|
uint32_t b = (((const struct unaligned_32 *) (pixels + 1))->l);
|
||||||
|
l1 = (a & 0x03030303UL) + (b & 0x03030303UL);
|
||||||
|
h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
|
||||||
|
*((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
|
||||||
|
pixels += line_size;
|
||||||
|
block += line_size;
|
||||||
|
a = (((const struct unaligned_32 *) (pixels))->l);
|
||||||
|
b = (((const struct unaligned_32 *) (pixels + 1))->l);
|
||||||
|
l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
|
||||||
|
h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
|
||||||
|
*((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
|
||||||
|
pixels += line_size;
|
||||||
|
block += line_size;
|
||||||
|
} pixels += 4 - line_size * (h + 1);
|
||||||
|
block += 4 - line_size * h;
|
||||||
|
}
|
||||||
|
POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1);
|
||||||
|
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||||
|
register int i;
|
||||||
|
register vector unsigned char
|
||||||
|
pixelsv1, pixelsv2,
|
||||||
|
pixelsavg;
|
||||||
|
register vector unsigned char
|
||||||
|
blockv, temp1, temp2, blocktemp;
|
||||||
|
register vector unsigned short
|
||||||
|
pixelssum1, pixelssum2, temp3;
|
||||||
|
register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
|
||||||
|
register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
|
||||||
|
|
||||||
|
temp1 = vec_ld(0, pixels);
|
||||||
|
temp2 = vec_ld(16, pixels);
|
||||||
|
pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
|
||||||
|
if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F)
|
||||||
|
{
|
||||||
|
pixelsv2 = temp2;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
|
||||||
|
}
|
||||||
|
pixelsv1 = vec_mergeh(vczero, pixelsv1);
|
||||||
|
pixelsv2 = vec_mergeh(vczero, pixelsv2);
|
||||||
|
pixelssum1 = vec_add((vector unsigned short)pixelsv1,
|
||||||
|
(vector unsigned short)pixelsv2);
|
||||||
|
pixelssum1 = vec_add(pixelssum1, vctwo);
|
||||||
|
|
||||||
|
POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1);
|
||||||
|
for (i = 0; i < h ; i++) {
|
||||||
|
int rightside = ((unsigned long)block & 0x0000000F);
|
||||||
|
blockv = vec_ld(0, block);
|
||||||
|
|
||||||
|
temp1 = vec_ld(line_size, pixels);
|
||||||
|
temp2 = vec_ld(line_size + 16, pixels);
|
||||||
|
pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
|
||||||
|
if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F)
|
||||||
|
{
|
||||||
|
pixelsv2 = temp2;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
|
||||||
|
}
|
||||||
|
|
||||||
|
pixelsv1 = vec_mergeh(vczero, pixelsv1);
|
||||||
|
pixelsv2 = vec_mergeh(vczero, pixelsv2);
|
||||||
|
pixelssum2 = vec_add((vector unsigned short)pixelsv1,
|
||||||
|
(vector unsigned short)pixelsv2);
|
||||||
|
temp3 = vec_add(pixelssum1, pixelssum2);
|
||||||
|
temp3 = vec_sra(temp3, vctwo);
|
||||||
|
pixelssum1 = vec_add(pixelssum2, vctwo);
|
||||||
|
pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);
|
||||||
|
|
||||||
|
if (rightside)
|
||||||
|
{
|
||||||
|
blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
blocktemp = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));
|
||||||
|
}
|
||||||
|
|
||||||
|
blockv = vec_avg(blocktemp, blockv);
|
||||||
|
vec_st(blockv, 0, block);
|
||||||
|
|
||||||
|
block += line_size;
|
||||||
|
pixels += line_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1);
|
||||||
|
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
|
||||||
|
}
|
||||||
|
@ -48,6 +48,7 @@ extern void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, in
|
|||||||
extern void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h);
|
extern void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h);
|
||||||
extern int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h);
|
extern int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h);
|
||||||
extern int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h);
|
extern int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h);
|
||||||
|
extern void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h);
|
||||||
|
|
||||||
extern void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder);
|
extern void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder);
|
||||||
|
|
||||||
|
@ -62,6 +62,7 @@ static unsigned char* perfname[] = {
|
|||||||
"put_no_rnd_pixels16_xy2_altivec",
|
"put_no_rnd_pixels16_xy2_altivec",
|
||||||
"hadamard8_diff8x8_altivec",
|
"hadamard8_diff8x8_altivec",
|
||||||
"hadamard8_diff16_altivec",
|
"hadamard8_diff16_altivec",
|
||||||
|
"avg_pixels8_xy2_altivec",
|
||||||
"clear_blocks_dcbz32_ppc",
|
"clear_blocks_dcbz32_ppc",
|
||||||
"clear_blocks_dcbz128_ppc"
|
"clear_blocks_dcbz128_ppc"
|
||||||
};
|
};
|
||||||
@ -268,10 +269,8 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
|
|||||||
/* the two functions do the same thing, so use the same code */
|
/* the two functions do the same thing, so use the same code */
|
||||||
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
|
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
|
||||||
c->avg_pixels_tab[0][0] = avg_pixels16_altivec;
|
c->avg_pixels_tab[0][0] = avg_pixels16_altivec;
|
||||||
// next one disabled as it's untested.
|
|
||||||
#if 0
|
|
||||||
c->avg_pixels_tab[1][0] = avg_pixels8_altivec;
|
c->avg_pixels_tab[1][0] = avg_pixels8_altivec;
|
||||||
#endif /* 0 */
|
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec;
|
||||||
c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec;
|
c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec;
|
||||||
c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;
|
c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;
|
||||||
c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;
|
c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;
|
||||||
|
@ -52,6 +52,7 @@ enum powerpc_perf_index {
|
|||||||
altivec_put_no_rnd_pixels16_xy2_num,
|
altivec_put_no_rnd_pixels16_xy2_num,
|
||||||
altivec_hadamard8_diff8x8_num,
|
altivec_hadamard8_diff8x8_num,
|
||||||
altivec_hadamard8_diff16_num,
|
altivec_hadamard8_diff16_num,
|
||||||
|
altivec_avg_pixels8_xy2_num,
|
||||||
powerpc_clear_blocks_dcbz32,
|
powerpc_clear_blocks_dcbz32,
|
||||||
powerpc_clear_blocks_dcbz128,
|
powerpc_clear_blocks_dcbz128,
|
||||||
powerpc_perf_total
|
powerpc_perf_total
|
||||||
|
Loading…
Reference in New Issue
Block a user