FFmpeg/libavcodec/h264idct.c

/*
 * H.264 IDCT
 * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
 * @file
 * H.264 IDCT.
 * @author Michael Niedermayer <michaelni@gmx.at>
 */

#include "dsputil.h"

static av_always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int stride, int block_stride, int shift, int add){
    int i;
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;

    block[0] += 1<<(shift-1);

    for(i=0; i<4; i++){
        const int z0=  block[0 + block_stride*i]     +  block[2 + block_stride*i];
        const int z1=  block[0 + block_stride*i]     -  block[2 + block_stride*i];
        const int z2= (block[1 + block_stride*i]>>1) -  block[3 + block_stride*i];
        const int z3=  block[1 + block_stride*i]     + (block[3 + block_stride*i]>>1);

        block[0 + block_stride*i]= z0 + z3;
        block[1 + block_stride*i]= z1 + z2;
        block[2 + block_stride*i]= z1 - z2;
        block[3 + block_stride*i]= z0 - z3;
    }

    for(i=0; i<4; i++){
        const int z0=  block[i + block_stride*0]     +  block[i + block_stride*2];
        const int z1=  block[i + block_stride*0]     -  block[i + block_stride*2];
        const int z2= (block[i + block_stride*1]>>1) -  block[i + block_stride*3];
        const int z3=  block[i + block_stride*1]     + (block[i + block_stride*3]>>1);

        dst[i + 0*stride]= cm[ add*dst[i + 0*stride] + ((z0 + z3) >> shift) ];
        dst[i + 1*stride]= cm[ add*dst[i + 1*stride] + ((z1 + z2) >> shift) ];
        dst[i + 2*stride]= cm[ add*dst[i + 2*stride] + ((z1 - z2) >> shift) ];
        dst[i + 3*stride]= cm[ add*dst[i + 3*stride] + ((z0 - z3) >> shift) ];
    }
}

void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride){
    idct_internal(dst, block, stride, 4, 6, 1);
}

void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block){
    idct_internal(dst, block, stride, 8, 3, 1);
}

void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block){
    idct_internal(dst, block, stride, 8, 3, 0);
}

void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride){
    int i;
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;

    block[0] += 32;

    for( i = 0; i < 8; i++ )
    {
        const int a0 =  block[0+i*8] + block[4+i*8];
        const int a2 =  block[0+i*8] - block[4+i*8];
        const int a4 = (block[2+i*8]>>1) - block[6+i*8];
        const int a6 = (block[6+i*8]>>1) + block[2+i*8];

        const int b0 = a0 + a6;
        const int b2 = a2 + a4;
        const int b4 = a2 - a4;
        const int b6 = a0 - a6;

        const int a1 = -block[3+i*8] + block[5+i*8] - block[7+i*8] - (block[7+i*8]>>1);
        const int a3 =  block[1+i*8] + block[7+i*8] - block[3+i*8] - (block[3+i*8]>>1);
        const int a5 = -block[1+i*8] + block[7+i*8] + block[5+i*8] + (block[5+i*8]>>1);
        const int a7 =  block[3+i*8] + block[5+i*8] + block[1+i*8] + (block[1+i*8]>>1);

        const int b1 = (a7>>2) + a1;
        const int b3 =  a3 + (a5>>2);
        const int b5 = (a3>>2) - a5;
        const int b7 =  a7 - (a1>>2);

        block[0+i*8] = b0 + b7;
        block[7+i*8] = b0 - b7;
        block[1+i*8] = b2 + b5;
        block[6+i*8] = b2 - b5;
        block[2+i*8] = b4 + b3;
        block[5+i*8] = b4 - b3;
        block[3+i*8] = b6 + b1;
        block[4+i*8] = b6 - b1;
    }
    for( i = 0; i < 8; i++ )
    {
        const int a0 =  block[i+0*8] + block[i+4*8];
        const int a2 =  block[i+0*8] - block[i+4*8];
        const int a4 = (block[i+2*8]>>1) - block[i+6*8];
        const int a6 = (block[i+6*8]>>1) + block[i+2*8];

        const int b0 = a0 + a6;
        const int b2 = a2 + a4;
        const int b4 = a2 - a4;
        const int b6 = a0 - a6;

        const int a1 = -block[i+3*8] + block[i+5*8] - block[i+7*8] - (block[i+7*8]>>1);
        const int a3 =  block[i+1*8] + block[i+7*8] - block[i+3*8] - (block[i+3*8]>>1);
        const int a5 = -block[i+1*8] + block[i+7*8] + block[i+5*8] + (block[i+5*8]>>1);
        const int a7 =  block[i+3*8] + block[i+5*8] + block[i+1*8] + (block[i+1*8]>>1);

        const int b1 = (a7>>2) + a1;
        const int b3 =  a3 + (a5>>2);
        const int b5 = (a3>>2) - a5;
        const int b7 =  a7 - (a1>>2);

        dst[i + 0*stride] = cm[ dst[i + 0*stride] + ((b0 + b7) >> 6) ];
        dst[i + 1*stride] = cm[ dst[i + 1*stride] + ((b2 + b5) >> 6) ];
        dst[i + 2*stride] = cm[ dst[i + 2*stride] + ((b4 + b3) >> 6) ];
        dst[i + 3*stride] = cm[ dst[i + 3*stride] + ((b6 + b1) >> 6) ];
        dst[i + 4*stride] = cm[ dst[i + 4*stride] + ((b6 - b1) >> 6) ];
        dst[i + 5*stride] = cm[ dst[i + 5*stride] + ((b4 - b3) >> 6) ];
        dst[i + 6*stride] = cm[ dst[i + 6*stride] + ((b2 - b5) >> 6) ];
        dst[i + 7*stride] = cm[ dst[i + 7*stride] + ((b0 - b7) >> 6) ];
    }
}

// assumes all AC coefs are 0
void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){
    int i, j;
    int dc = (block[0] + 32) >> 6;
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc;
    for( j = 0; j < 4; j++ )
    {
        for( i = 0; i < 4; i++ )
            dst[i] = cm[ dst[i] ];
        dst += stride;
    }
}

void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){
    int i, j;
    int dc = (block[0] + 32) >> 6;
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc;
    for( j = 0; j < 8; j++ )
    {
        for( i = 0; i < 8; i++ )
            dst[i] = cm[ dst[i] ];
        dst += stride;
    }
}

//FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split
static const uint8_t scan8[16 + 2*4]={
 4+1*8, 5+1*8, 4+2*8, 5+2*8,
 6+1*8, 7+1*8, 6+2*8, 7+2*8,
 4+3*8, 5+3*8, 4+4*8, 5+4*8,
 6+3*8, 7+3*8, 6+4*8, 7+4*8,
 1+1*8, 2+1*8,
 1+2*8, 2+2*8,
 1+4*8, 2+4*8,
 1+5*8, 2+5*8,
};

void ff_h264_idct_add16_c(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
    int i;
    for(i=0; i<16; i++){
        int nnz = nnzc[ scan8[i] ];
        if(nnz){
            if(nnz==1 && block[i*16]) ff_h264_idct_dc_add_c(dst + block_offset[i], block + i*16, stride);
            else                      idct_internal        (dst + block_offset[i], block + i*16, stride, 4, 6, 1);
        }
    }
}

void ff_h264_idct_add16intra_c(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
    int i;
    for(i=0; i<16; i++){
        if(nnzc[ scan8[i] ]) idct_internal        (dst + block_offset[i], block + i*16, stride, 4, 6, 1);
        else if(block[i*16]) ff_h264_idct_dc_add_c(dst + block_offset[i], block + i*16, stride);
    }
}

void ff_h264_idct8_add4_c(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
    int i;
    for(i=0; i<16; i+=4){
        int nnz = nnzc[ scan8[i] ];
        if(nnz){
            if(nnz==1 && block[i*16]) ff_h264_idct8_dc_add_c(dst + block_offset[i], block + i*16, stride);
            else                      ff_h264_idct8_add_c   (dst + block_offset[i], block + i*16, stride);
        }
    }
}

void ff_h264_idct_add8_c(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
    int i;
    for(i=16; i<16+8; i++){
        if(nnzc[ scan8[i] ])
            ff_h264_idct_add_c   (dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
        else if(block[i*16])
            ff_h264_idct_dc_add_c(dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
    }
}
move h264 idct to its own file and call via function pointer in DspContext allow h264 idct to be used for lowres=1 Originally committed as revision 3524 to svn://svn.ffmpeg.org/ffmpeg/trunk 2004-09-27 22:47:17 +03:00			`/*`
			`* H.264 IDCT`
			`* Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>`
			`*`
Change license headers to say 'FFmpeg' instead of 'this program/this library' and fix GPL/LGPL version mismatches. Originally committed as revision 6577 to svn://svn.ffmpeg.org/ffmpeg/trunk 2006-10-07 18:30:46 +03:00			`* This file is part of FFmpeg.`
			`*`
			`* FFmpeg is free software; you can redistribute it and/or`
move h264 idct to its own file and call via function pointer in DspContext allow h264 idct to be used for lowres=1 Originally committed as revision 3524 to svn://svn.ffmpeg.org/ffmpeg/trunk 2004-09-27 22:47:17 +03:00			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
Change license headers to say 'FFmpeg' instead of 'this program/this library' and fix GPL/LGPL version mismatches. Originally committed as revision 6577 to svn://svn.ffmpeg.org/ffmpeg/trunk 2006-10-07 18:30:46 +03:00			`* version 2.1 of the License, or (at your option) any later version.`
move h264 idct to its own file and call via function pointer in DspContext allow h264 idct to be used for lowres=1 Originally committed as revision 3524 to svn://svn.ffmpeg.org/ffmpeg/trunk 2004-09-27 22:47:17 +03:00			`*`
Change license headers to say 'FFmpeg' instead of 'this program/this library' and fix GPL/LGPL version mismatches. Originally committed as revision 6577 to svn://svn.ffmpeg.org/ffmpeg/trunk 2006-10-07 18:30:46 +03:00			`* FFmpeg is distributed in the hope that it will be useful,`
move h264 idct to its own file and call via function pointer in DspContext allow h264 idct to be used for lowres=1 Originally committed as revision 3524 to svn://svn.ffmpeg.org/ffmpeg/trunk 2004-09-27 22:47:17 +03:00			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public`
Change license headers to say 'FFmpeg' instead of 'this program/this library' and fix GPL/LGPL version mismatches. Originally committed as revision 6577 to svn://svn.ffmpeg.org/ffmpeg/trunk 2006-10-07 18:30:46 +03:00			`* License along with FFmpeg; if not, write to the Free Software`
Update licensing information: The FSF changed postal address. Originally committed as revision 4842 to svn://svn.ffmpeg.org/ffmpeg/trunk 2006-01-13 00:43:26 +02:00			`* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA`
move h264 idct to its own file and call via function pointer in DspContext allow h264 idct to be used for lowres=1 Originally committed as revision 3524 to svn://svn.ffmpeg.org/ffmpeg/trunk 2004-09-27 22:47:17 +03:00			`*/`
COSMETICS: Remove all trailing whitespace. Originally committed as revision 4749 to svn://svn.ffmpeg.org/ffmpeg/trunk 2005-12-17 20:14:38 +02:00
move h264 idct to its own file and call via function pointer in DspContext allow h264 idct to be used for lowres=1 Originally committed as revision 3524 to svn://svn.ffmpeg.org/ffmpeg/trunk 2004-09-27 22:47:17 +03:00			`/**`
Remove explicit filename from Doxygen @file commands. Passing an explicit filename to this command is only necessary if the documentation in the @file block refers to a file different from the one the block resides in. Originally committed as revision 22921 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-04-20 17:45:34 +03:00			`* @file`
move h264 idct to its own file and call via function pointer in DspContext allow h264 idct to be used for lowres=1 Originally committed as revision 3524 to svn://svn.ffmpeg.org/ffmpeg/trunk 2004-09-27 22:47:17 +03:00			`* H.264 IDCT.`
			`* @author Michael Niedermayer <michaelni@gmx.at>`
			`*/`
COSMETICS: Remove all trailing whitespace. Originally committed as revision 4749 to svn://svn.ffmpeg.org/ffmpeg/trunk 2005-12-17 20:14:38 +02:00
move h264 idct to its own file and call via function pointer in DspContext allow h264 idct to be used for lowres=1 Originally committed as revision 3524 to svn://svn.ffmpeg.org/ffmpeg/trunk 2004-09-27 22:47:17 +03:00			`#include "dsputil.h"`

rename always_inline to av_always_inline and move to common.h Originally committed as revision 7256 to svn://svn.ffmpeg.org/ffmpeg/trunk 2006-12-08 02:35:08 +02:00			`static av_always_inline void idct_internal(uint8_t dst, DCTELEM block, int stride, int block_stride, int shift, int add){`
move h264 idct to its own file and call via function pointer in DspContext allow h264 idct to be used for lowres=1 Originally committed as revision 3524 to svn://svn.ffmpeg.org/ffmpeg/trunk 2004-09-27 22:47:17 +03:00			`int i;`
rename cropTbl -> ff_cropTbl Originally committed as revision 6992 to svn://svn.ffmpeg.org/ffmpeg/trunk 2006-11-12 22:08:09 +02:00			`uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;`
move h264 idct to its own file and call via function pointer in DspContext allow h264 idct to be used for lowres=1 Originally committed as revision 3524 to svn://svn.ffmpeg.org/ffmpeg/trunk 2004-09-27 22:47:17 +03:00
			`block[0] += 1<<(shift-1);`

			`for(i=0; i<4; i++){`
			`const int z0= block[0 + block_stridei] + block[2 + block_stridei];`
			`const int z1= block[0 + block_stridei] - block[2 + block_stridei];`
			`const int z2= (block[1 + block_stridei]>>1) - block[3 + block_stridei];`
			`const int z3= block[1 + block_stridei] + (block[3 + block_stridei]>>1);`

			`block[0 + block_stride*i]= z0 + z3;`
			`block[1 + block_stride*i]= z1 + z2;`
			`block[2 + block_stride*i]= z1 - z2;`
			`block[3 + block_stride*i]= z0 - z3;`
			`}`

			`for(i=0; i<4; i++){`
			`const int z0= block[i + block_stride0] + block[i + block_stride2];`
			`const int z1= block[i + block_stride0] - block[i + block_stride2];`
			`const int z2= (block[i + block_stride1]>>1) - block[i + block_stride3];`
			`const int z3= block[i + block_stride1] + (block[i + block_stride3]>>1);`

			`dst[i + 0stride]= cm[ adddst[i + 0*stride] + ((z0 + z3) >> shift) ];`
			`dst[i + 1stride]= cm[ adddst[i + 1*stride] + ((z1 + z2) >> shift) ];`
			`dst[i + 2stride]= cm[ adddst[i + 2*stride] + ((z1 - z2) >> shift) ];`
			`dst[i + 3stride]= cm[ adddst[i + 3*stride] + ((z0 - z3) >> shift) ];`
			`}`
			`}`

			`void ff_h264_idct_add_c(uint8_t dst, DCTELEM block, int stride){`
			`idct_internal(dst, block, stride, 4, 6, 1);`
			`}`

			`void ff_h264_lowres_idct_add_c(uint8_t dst, int stride, DCTELEM block){`
			`idct_internal(dst, block, stride, 8, 3, 1);`
			`}`

			`void ff_h264_lowres_idct_put_c(uint8_t dst, int stride, DCTELEM block){`
			`idct_internal(dst, block, stride, 8, 3, 0);`
			`}`
decode H.264 with 8x8 transform. deblocking is still incorrect with 8x8+cavlc Originally committed as revision 4339 to svn://svn.ffmpeg.org/ffmpeg/trunk 2005-06-03 00:15:20 +03:00
			`void ff_h264_idct8_add_c(uint8_t dst, DCTELEM block, int stride){`
			`int i;`
rename cropTbl -> ff_cropTbl Originally committed as revision 6992 to svn://svn.ffmpeg.org/ffmpeg/trunk 2006-11-12 22:08:09 +02:00			`uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;`
decode H.264 with 8x8 transform. deblocking is still incorrect with 8x8+cavlc Originally committed as revision 4339 to svn://svn.ffmpeg.org/ffmpeg/trunk 2005-06-03 00:15:20 +03:00
			`block[0] += 32;`

			`for( i = 0; i < 8; i++ )`
			`{`
flatten an array, since gcc fails at optimizing multidimensional arrays h264_idct8_add_c: 780 -> 735 cycles on conroe Originally committed as revision 16307 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-25 03:20:37 +02:00			`const int a0 = block[0+i8] + block[4+i8];`
			`const int a2 = block[0+i8] - block[4+i8];`
			`const int a4 = (block[2+i8]>>1) - block[6+i8];`
			`const int a6 = (block[6+i8]>>1) + block[2+i8];`
decode H.264 with 8x8 transform. deblocking is still incorrect with 8x8+cavlc Originally committed as revision 4339 to svn://svn.ffmpeg.org/ffmpeg/trunk 2005-06-03 00:15:20 +03:00
			`const int b0 = a0 + a6;`
			`const int b2 = a2 + a4;`
			`const int b4 = a2 - a4;`
			`const int b6 = a0 - a6;`

flatten an array, since gcc fails at optimizing multidimensional arrays h264_idct8_add_c: 780 -> 735 cycles on conroe Originally committed as revision 16307 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-25 03:20:37 +02:00			`const int a1 = -block[3+i8] + block[5+i8] - block[7+i8] - (block[7+i8]>>1);`
			`const int a3 = block[1+i8] + block[7+i8] - block[3+i8] - (block[3+i8]>>1);`
			`const int a5 = -block[1+i8] + block[7+i8] + block[5+i8] + (block[5+i8]>>1);`
			`const int a7 = block[3+i8] + block[5+i8] + block[1+i8] + (block[1+i8]>>1);`
decode H.264 with 8x8 transform. deblocking is still incorrect with 8x8+cavlc Originally committed as revision 4339 to svn://svn.ffmpeg.org/ffmpeg/trunk 2005-06-03 00:15:20 +03:00
			`const int b1 = (a7>>2) + a1;`
			`const int b3 = a3 + (a5>>2);`
			`const int b5 = (a3>>2) - a5;`
			`const int b7 = a7 - (a1>>2);`

flatten an array, since gcc fails at optimizing multidimensional arrays h264_idct8_add_c: 780 -> 735 cycles on conroe Originally committed as revision 16307 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-25 03:20:37 +02:00			`block[0+i*8] = b0 + b7;`
			`block[7+i*8] = b0 - b7;`
			`block[1+i*8] = b2 + b5;`
			`block[6+i*8] = b2 - b5;`
			`block[2+i*8] = b4 + b3;`
			`block[5+i*8] = b4 - b3;`
			`block[3+i*8] = b6 + b1;`
			`block[4+i*8] = b6 - b1;`
decode H.264 with 8x8 transform. deblocking is still incorrect with 8x8+cavlc Originally committed as revision 4339 to svn://svn.ffmpeg.org/ffmpeg/trunk 2005-06-03 00:15:20 +03:00			`}`
			`for( i = 0; i < 8; i++ )`
			`{`
flatten an array, since gcc fails at optimizing multidimensional arrays h264_idct8_add_c: 780 -> 735 cycles on conroe Originally committed as revision 16307 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-25 03:20:37 +02:00			`const int a0 = block[i+08] + block[i+48];`
			`const int a2 = block[i+08] - block[i+48];`
			`const int a4 = (block[i+28]>>1) - block[i+68];`
			`const int a6 = (block[i+68]>>1) + block[i+28];`
decode H.264 with 8x8 transform. deblocking is still incorrect with 8x8+cavlc Originally committed as revision 4339 to svn://svn.ffmpeg.org/ffmpeg/trunk 2005-06-03 00:15:20 +03:00
			`const int b0 = a0 + a6;`
			`const int b2 = a2 + a4;`
			`const int b4 = a2 - a4;`
			`const int b6 = a0 - a6;`

flatten an array, since gcc fails at optimizing multidimensional arrays h264_idct8_add_c: 780 -> 735 cycles on conroe Originally committed as revision 16307 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-25 03:20:37 +02:00			`const int a1 = -block[i+38] + block[i+58] - block[i+78] - (block[i+78]>>1);`
			`const int a3 = block[i+18] + block[i+78] - block[i+38] - (block[i+38]>>1);`
			`const int a5 = -block[i+18] + block[i+78] + block[i+58] + (block[i+58]>>1);`
			`const int a7 = block[i+38] + block[i+58] + block[i+18] + (block[i+18]>>1);`
decode H.264 with 8x8 transform. deblocking is still incorrect with 8x8+cavlc Originally committed as revision 4339 to svn://svn.ffmpeg.org/ffmpeg/trunk 2005-06-03 00:15:20 +03:00
			`const int b1 = (a7>>2) + a1;`
			`const int b3 = a3 + (a5>>2);`
			`const int b5 = (a3>>2) - a5;`
			`const int b7 = a7 - (a1>>2);`

			`dst[i + 0stride] = cm[ dst[i + 0stride] + ((b0 + b7) >> 6) ];`
			`dst[i + 1stride] = cm[ dst[i + 1stride] + ((b2 + b5) >> 6) ];`
			`dst[i + 2stride] = cm[ dst[i + 2stride] + ((b4 + b3) >> 6) ];`
			`dst[i + 3stride] = cm[ dst[i + 3stride] + ((b6 + b1) >> 6) ];`
			`dst[i + 4stride] = cm[ dst[i + 4stride] + ((b6 - b1) >> 6) ];`
			`dst[i + 5stride] = cm[ dst[i + 5stride] + ((b4 - b3) >> 6) ];`
			`dst[i + 6stride] = cm[ dst[i + 6stride] + ((b2 - b5) >> 6) ];`
			`dst[i + 7stride] = cm[ dst[i + 7stride] + ((b0 - b7) >> 6) ];`
			`}`
			`}`
h264: special case dc-only idct. ~1% faster overall Originally committed as revision 4971 to svn://svn.ffmpeg.org/ffmpeg/trunk 2006-02-10 08:55:25 +02:00
			`// assumes all AC coefs are 0`
			`void ff_h264_idct_dc_add_c(uint8_t dst, DCTELEM block, int stride){`
			`int i, j;`
			`int dc = (block[0] + 32) >> 6;`
Improve some uses of ff_cropTbl with constant offset Originally committed as revision 23728 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-06-23 02:12:48 +03:00			`uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc;`
h264: special case dc-only idct. ~1% faster overall Originally committed as revision 4971 to svn://svn.ffmpeg.org/ffmpeg/trunk 2006-02-10 08:55:25 +02:00			`for( j = 0; j < 4; j++ )`
			`{`
			`for( i = 0; i < 4; i++ )`
Improve some uses of ff_cropTbl with constant offset Originally committed as revision 23728 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-06-23 02:12:48 +03:00			`dst[i] = cm[ dst[i] ];`
h264: special case dc-only idct. ~1% faster overall Originally committed as revision 4971 to svn://svn.ffmpeg.org/ffmpeg/trunk 2006-02-10 08:55:25 +02:00			`dst += stride;`
			`}`
			`}`

			`void ff_h264_idct8_dc_add_c(uint8_t dst, DCTELEM block, int stride){`
			`int i, j;`
			`int dc = (block[0] + 32) >> 6;`
Improve some uses of ff_cropTbl with constant offset Originally committed as revision 23728 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-06-23 02:12:48 +03:00			`uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc;`
h264: special case dc-only idct. ~1% faster overall Originally committed as revision 4971 to svn://svn.ffmpeg.org/ffmpeg/trunk 2006-02-10 08:55:25 +02:00			`for( j = 0; j < 8; j++ )`
			`{`
			`for( i = 0; i < 8; i++ )`
Improve some uses of ff_cropTbl with constant offset Originally committed as revision 23728 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-06-23 02:12:48 +03:00			`dst[i] = cm[ dst[i] ];`
h264: special case dc-only idct. ~1% faster overall Originally committed as revision 4971 to svn://svn.ffmpeg.org/ffmpeg/trunk 2006-02-10 08:55:25 +02:00			`dst += stride;`
			`}`
			`}`
H.264 idct functions that include the chroma, inter luma and intra16 luma loops thus avoiding the calling overhead. New functions are not yet used. Originally committed as revision 16206 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-18 04:36:48 +02:00
			`//FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split`
			`static const uint8_t scan8[16 + 2*4]={`
			`4+18, 5+18, 4+28, 5+28,`
			`6+18, 7+18, 6+28, 7+28,`
			`4+38, 5+38, 4+48, 5+48,`
			`6+38, 7+38, 6+48, 7+48,`
			`1+18, 2+18,`
			`1+28, 2+28,`
			`1+48, 2+48,`
			`1+58, 2+58,`
			`};`

			`void ff_h264_idct_add16_c(uint8_t dst, const int block_offset, DCTELEM block, int stride, const uint8_t nnzc[68]){`
			`int i;`
			`for(i=0; i<16; i++){`
			`int nnz = nnzc[ scan8[i] ];`
			`if(nnz){`
			`if(nnz==1 && block[i16]) ff_h264_idct_dc_add_c(dst + block_offset[i], block + i16, stride);`
			`else idct_internal (dst + block_offset[i], block + i*16, stride, 4, 6, 1);`
			`}`
			`}`
			`}`

			`void ff_h264_idct_add16intra_c(uint8_t dst, const int block_offset, DCTELEM block, int stride, const uint8_t nnzc[68]){`
			`int i;`
			`for(i=0; i<16; i++){`
			`if(nnzc[ scan8[i] ]) idct_internal (dst + block_offset[i], block + i*16, stride, 4, 6, 1);`
			`else if(block[i16]) ff_h264_idct_dc_add_c(dst + block_offset[i], block + i16, stride);`
			`}`
			`}`

			`void ff_h264_idct8_add4_c(uint8_t dst, const int block_offset, DCTELEM block, int stride, const uint8_t nnzc[68]){`
			`int i;`
			`for(i=0; i<16; i+=4){`
			`int nnz = nnzc[ scan8[i] ];`
			`if(nnz){`
			`if(nnz==1 && block[i16]) ff_h264_idct8_dc_add_c(dst + block_offset[i], block + i16, stride);`
			`else ff_h264_idct8_add_c (dst + block_offset[i], block + i*16, stride);`
			`}`
			`}`
			`}`

			`void ff_h264_idct_add8_c(uint8_t *dest, const int block_offset, DCTELEM block, int stride, const uint8_t nnzc[68]){`
			`int i;`
			`for(i=16; i<16+8; i++){`
			`if(nnzc[ scan8[i] ])`
			`ff_h264_idct_add_c (dest[(i&4)>>2] + block_offset[i], block + i*16, stride);`
			`else if(block[i*16])`
			`ff_h264_idct_dc_add_c(dest[(i&4)>>2] + block_offset[i], block + i*16, stride);`
			`}`
			`}`