avg_pixels8_xy2_altivec in AltiVec, enabling avg_pixels8_altivec, hadamard fix by (Romain Dolbeau <dolbeau at irisa dot fr>)

Originally committed as revision 3196 to svn://svn.ffmpeg.org/ffmpeg/trunk
2025-07-06 06:27:36 +02:00 · 2004-06-05 22:29:37 +00:00
parent 3cf1e291f8
commit 2a5a1bdac9
4 changed files with 114 additions and 3 deletions
--- a/libavcodec/ppc/dsputil_altivec.c
+++ b/libavcodec/ppc/dsputil_altivec.c
@ -1647,3 +1647,113 @@ int has_altivec(void)
 #endif /* CONFIG_DARWIN */
    return 0;
 }
 /* next one assumes that ((line_size % 8) == 0) */
 void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
 POWERPC_PERF_DECLARE(altivec_avg_pixels8_xy2_num, 1);
 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
    int j;
 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1);
 for (j = 0; j < 2; j++) {
   int             i;
   const uint32_t  a = (((const struct unaligned_32 *) (pixels))->l);
   const uint32_t  b = (((const struct unaligned_32 *) (pixels + 1))->l);
   uint32_t        l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
   uint32_t        h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
   uint32_t        l1, h1;
   pixels += line_size;
   for (i = 0; i < h; i += 2) {
     uint32_t        a = (((const struct unaligned_32 *) (pixels))->l);
     uint32_t        b = (((const struct unaligned_32 *) (pixels + 1))->l);
     l1 = (a & 0x03030303UL) + (b & 0x03030303UL);
     h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
     *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
     pixels += line_size;
     block += line_size;
     a = (((const struct unaligned_32 *) (pixels))->l);
     b = (((const struct unaligned_32 *) (pixels + 1))->l);
     l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
     h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
     *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
     pixels += line_size;
     block += line_size;
   } pixels += 4 - line_size * (h + 1);
   block += 4 - line_size * h;
 }
 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1);
 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
   register int i;
   register vector unsigned char
     pixelsv1, pixelsv2,
     pixelsavg;
   register vector unsigned char
     blockv, temp1, temp2, blocktemp;
   register vector unsigned short
     pixelssum1, pixelssum2, temp3;
   register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
   register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
   temp1 = vec_ld(0, pixels);
   temp2 = vec_ld(16, pixels);
   pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
   if ((((unsigned long)pixels) & 0x0000000F) ==  0x0000000F)
   {
     pixelsv2 = temp2;
   }
   else
   {
     pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
   }
   pixelsv1 = vec_mergeh(vczero, pixelsv1);
   pixelsv2 = vec_mergeh(vczero, pixelsv2);
   pixelssum1 = vec_add((vector unsigned short)pixelsv1,
                        (vector unsigned short)pixelsv2);
   pixelssum1 = vec_add(pixelssum1, vctwo);
 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1); 
   for (i = 0; i < h ; i++) {
     int rightside = ((unsigned long)block & 0x0000000F);
     blockv = vec_ld(0, block);
     temp1 = vec_ld(line_size, pixels);
     temp2 = vec_ld(line_size + 16, pixels);
     pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
     if (((((unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F)
     {
       pixelsv2 = temp2;
     }
     else
     {
       pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
     }
     pixelsv1 = vec_mergeh(vczero, pixelsv1);
     pixelsv2 = vec_mergeh(vczero, pixelsv2);
     pixelssum2 = vec_add((vector unsigned short)pixelsv1,
                          (vector unsigned short)pixelsv2);
     temp3 = vec_add(pixelssum1, pixelssum2);
     temp3 = vec_sra(temp3, vctwo);
     pixelssum1 = vec_add(pixelssum2, vctwo);
     pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);
     if (rightside)
     {
       blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));
     }
     else
     {
       blocktemp = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));
     }
     blockv = vec_avg(blocktemp, blockv);
     vec_st(blockv, 0, block);
     block += line_size;
     pixels += line_size;
   }
 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1);
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
 }
--- a/libavcodec/ppc/dsputil_altivec.h
+++ b/libavcodec/ppc/dsputil_altivec.h
@ -48,6 +48,7 @@ extern void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, in
 extern void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h);
 extern int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h);
 extern int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h);
 extern void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h);
 extern void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder);
--- a/libavcodec/ppc/dsputil_ppc.c
+++ b/libavcodec/ppc/dsputil_ppc.c
@ -62,6 +62,7 @@ static unsigned char* perfname[] = {
  "put_no_rnd_pixels16_xy2_altivec",
  "hadamard8_diff8x8_altivec",
  "hadamard8_diff16_altivec",
  "avg_pixels8_xy2_altivec",
  "clear_blocks_dcbz32_ppc",
  "clear_blocks_dcbz128_ppc"
 };
@ -268,10 +269,8 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
        /* the two functions do the same thing, so use the same code */
        c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
        c->avg_pixels_tab[0][0] = avg_pixels16_altivec;
 // next one disabled as it's untested.
 #if 0
        c->avg_pixels_tab[1][0] = avg_pixels8_altivec;
-#endif /* 0 */
+	c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec;
        c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec;
        c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;
        c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;
--- a/libavcodec/ppc/dsputil_ppc.h
+++ b/libavcodec/ppc/dsputil_ppc.h
@ -52,6 +52,7 @@ enum powerpc_perf_index {
  altivec_put_no_rnd_pixels16_xy2_num,
  altivec_hadamard8_diff8x8_num,
  altivec_hadamard8_diff16_num,
  altivec_avg_pixels8_xy2_num,
  powerpc_clear_blocks_dcbz32,
  powerpc_clear_blocks_dcbz128,
  powerpc_perf_total