From 469bd7b150f1016d5335b846801898792502a817 Mon Sep 17 00:00:00 2001 From: Loren Merritt Date: Thu, 21 Feb 2008 07:54:46 +0000 Subject: [PATCH] pseudo-simd add_bytes and diff_bytes 2x faster than scalar in 32bit, 4x faster in 64bit (as opposed to 8x in mmx) Originally committed as revision 12165 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/dsputil.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index c9b02a0542..6c0be5d7e9 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -50,6 +50,9 @@ void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; uint32_t ff_squareTbl[512] = {0, }; +static const unsigned long pb_7f = 0x7f7f7f7f7f7f7f7fUL; +static const unsigned long pb_80 = 0x8080808080808080UL; + const uint8_t ff_zigzag_direct[64] = { 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, @@ -3276,34 +3279,31 @@ static void clear_blocks_c(DCTELEM *blocks) } static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){ - int i; - for(i=0; i+7