1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-12-23 12:43:46 +02:00

Use ptrdiff_t instead of int for intra pred "stride" function parameter.

This way, SIMD-optimized functions don't have to sign-extend their
stride argument manually to be able to do pointer arithmetic.
This commit is contained in:
Ronald S. Bultje 2012-10-28 20:44:54 -07:00
parent bad8e33dc9
commit 95c89da36e
5 changed files with 264 additions and 117 deletions

View File

@ -23,25 +23,25 @@
#include "libavutil/arm/cpu.h" #include "libavutil/arm/cpu.h"
#include "libavcodec/h264pred.h" #include "libavcodec/h264pred.h"
void ff_pred16x16_vert_neon(uint8_t *src, int stride); void ff_pred16x16_vert_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred16x16_hor_neon(uint8_t *src, int stride); void ff_pred16x16_hor_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred16x16_plane_neon(uint8_t *src, int stride); void ff_pred16x16_plane_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred16x16_dc_neon(uint8_t *src, int stride); void ff_pred16x16_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred16x16_128_dc_neon(uint8_t *src, int stride); void ff_pred16x16_128_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred16x16_left_dc_neon(uint8_t *src, int stride); void ff_pred16x16_left_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred16x16_top_dc_neon(uint8_t *src, int stride); void ff_pred16x16_top_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_vert_neon(uint8_t *src, int stride); void ff_pred8x8_vert_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_hor_neon(uint8_t *src, int stride); void ff_pred8x8_hor_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_plane_neon(uint8_t *src, int stride); void ff_pred8x8_plane_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_dc_neon(uint8_t *src, int stride); void ff_pred8x8_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_128_dc_neon(uint8_t *src, int stride); void ff_pred8x8_128_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_left_dc_neon(uint8_t *src, int stride); void ff_pred8x8_left_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_top_dc_neon(uint8_t *src, int stride); void ff_pred8x8_top_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_l0t_dc_neon(uint8_t *src, int stride); void ff_pred8x8_l0t_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_0lt_dc_neon(uint8_t *src, int stride); void ff_pred8x8_0lt_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_l00_dc_neon(uint8_t *src, int stride); void ff_pred8x8_l00_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_0l0_dc_neon(uint8_t *src, int stride); void ff_pred8x8_0l0_dc_neon(uint8_t *src, ptrdiff_t stride);
static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc) static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc)
{ {

View File

@ -39,7 +39,9 @@
#include "h264pred_template.c" #include "h264pred_template.c"
#undef BIT_DEPTH #undef BIT_DEPTH
static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright, int stride){ static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright,
ptrdiff_t stride)
{
const unsigned lt = src[-1-1*stride]; const unsigned lt = src[-1-1*stride];
LOAD_TOP_EDGE LOAD_TOP_EDGE
LOAD_TOP_RIGHT_EDGE LOAD_TOP_RIGHT_EDGE
@ -54,7 +56,9 @@ static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright, int st
AV_WN32A(src+3*stride, v); AV_WN32A(src+3*stride, v);
} }
static void pred4x4_horizontal_vp8_c(uint8_t *src, const uint8_t *topright, int stride){ static void pred4x4_horizontal_vp8_c(uint8_t *src, const uint8_t *topright,
ptrdiff_t stride)
{
const unsigned lt = src[-1-1*stride]; const unsigned lt = src[-1-1*stride];
LOAD_LEFT_EDGE LOAD_LEFT_EDGE
@ -64,7 +68,9 @@ static void pred4x4_horizontal_vp8_c(uint8_t *src, const uint8_t *topright, int
AV_WN32A(src+3*stride, ((l2 + 2*l3 + l3 + 2) >> 2)*0x01010101); AV_WN32A(src+3*stride, ((l2 + 2*l3 + l3 + 2) >> 2)*0x01010101);
} }
static void pred4x4_down_left_svq3_c(uint8_t *src, const uint8_t *topright, int stride){ static void pred4x4_down_left_svq3_c(uint8_t *src, const uint8_t *topright,
ptrdiff_t stride)
{
LOAD_TOP_EDGE LOAD_TOP_EDGE
LOAD_LEFT_EDGE LOAD_LEFT_EDGE
@ -86,7 +92,9 @@ static void pred4x4_down_left_svq3_c(uint8_t *src, const uint8_t *topright, int
src[3+3*stride]=(l3 + t3)>>1; src[3+3*stride]=(l3 + t3)>>1;
} }
static void pred4x4_down_left_rv40_c(uint8_t *src, const uint8_t *topright, int stride){ static void pred4x4_down_left_rv40_c(uint8_t *src, const uint8_t *topright,
ptrdiff_t stride)
{
LOAD_TOP_EDGE LOAD_TOP_EDGE
LOAD_TOP_RIGHT_EDGE LOAD_TOP_RIGHT_EDGE
LOAD_LEFT_EDGE LOAD_LEFT_EDGE
@ -110,7 +118,10 @@ static void pred4x4_down_left_rv40_c(uint8_t *src, const uint8_t *topright, int
src[3+3*stride]=(t6 + t7 + 1 + l6 + l7 + 1)>>2; src[3+3*stride]=(t6 + t7 + 1 + l6 + l7 + 1)>>2;
} }
static void pred4x4_down_left_rv40_nodown_c(uint8_t *src, const uint8_t *topright, int stride){ static void pred4x4_down_left_rv40_nodown_c(uint8_t *src,
const uint8_t *topright,
ptrdiff_t stride)
{
LOAD_TOP_EDGE LOAD_TOP_EDGE
LOAD_TOP_RIGHT_EDGE LOAD_TOP_RIGHT_EDGE
LOAD_LEFT_EDGE LOAD_LEFT_EDGE
@ -133,8 +144,11 @@ static void pred4x4_down_left_rv40_nodown_c(uint8_t *src, const uint8_t *toprigh
src[3+3*stride]=(t6 + t7 + 1 + 2*l3 + 1)>>2; src[3+3*stride]=(t6 + t7 + 1 + 2*l3 + 1)>>2;
} }
static void pred4x4_vertical_left_rv40(uint8_t *src, const uint8_t *topright, int stride, static void pred4x4_vertical_left_rv40(uint8_t *src, const uint8_t *topright,
const int l0, const int l1, const int l2, const int l3, const int l4){ ptrdiff_t stride,
const int l0, const int l1, const int l2,
const int l3, const int l4)
{
LOAD_TOP_EDGE LOAD_TOP_EDGE
LOAD_TOP_RIGHT_EDGE LOAD_TOP_RIGHT_EDGE
@ -156,20 +170,27 @@ static void pred4x4_vertical_left_rv40(uint8_t *src, const uint8_t *topright, in
src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2; src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
} }
static void pred4x4_vertical_left_rv40_c(uint8_t *src, const uint8_t *topright, int stride){ static void pred4x4_vertical_left_rv40_c(uint8_t *src, const uint8_t *topright,
ptrdiff_t stride)
{
LOAD_LEFT_EDGE LOAD_LEFT_EDGE
LOAD_DOWN_LEFT_EDGE LOAD_DOWN_LEFT_EDGE
pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l4); pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l4);
} }
static void pred4x4_vertical_left_rv40_nodown_c(uint8_t *src, const uint8_t *topright, int stride){ static void pred4x4_vertical_left_rv40_nodown_c(uint8_t *src,
const uint8_t *topright,
ptrdiff_t stride)
{
LOAD_LEFT_EDGE LOAD_LEFT_EDGE
pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l3); pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l3);
} }
static void pred4x4_vertical_left_vp8_c(uint8_t *src, const uint8_t *topright, int stride){ static void pred4x4_vertical_left_vp8_c(uint8_t *src, const uint8_t *topright,
ptrdiff_t stride)
{
LOAD_TOP_EDGE LOAD_TOP_EDGE
LOAD_TOP_RIGHT_EDGE LOAD_TOP_RIGHT_EDGE
@ -191,7 +212,9 @@ static void pred4x4_vertical_left_vp8_c(uint8_t *src, const uint8_t *topright, i
src[3+3*stride]=(t5 + 2*t6 + t7 + 2)>>2; src[3+3*stride]=(t5 + 2*t6 + t7 + 2)>>2;
} }
static void pred4x4_horizontal_up_rv40_c(uint8_t *src, const uint8_t *topright, int stride){ static void pred4x4_horizontal_up_rv40_c(uint8_t *src, const uint8_t *topright,
ptrdiff_t stride)
{
LOAD_LEFT_EDGE LOAD_LEFT_EDGE
LOAD_DOWN_LEFT_EDGE LOAD_DOWN_LEFT_EDGE
LOAD_TOP_EDGE LOAD_TOP_EDGE
@ -215,7 +238,10 @@ static void pred4x4_horizontal_up_rv40_c(uint8_t *src, const uint8_t *topright,
src[3+3*stride]=(l4 + 2*l5 + l6 + 2)>>2; src[3+3*stride]=(l4 + 2*l5 + l6 + 2)>>2;
} }
static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src, const uint8_t *topright, int stride){ static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src,
const uint8_t *topright,
ptrdiff_t stride)
{
LOAD_LEFT_EDGE LOAD_LEFT_EDGE
LOAD_TOP_EDGE LOAD_TOP_EDGE
LOAD_TOP_RIGHT_EDGE LOAD_TOP_RIGHT_EDGE
@ -238,7 +264,9 @@ static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src, const uint8_t *top
src[3+3*stride]=l3; src[3+3*stride]=l3;
} }
static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright, int stride){ static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright,
ptrdiff_t stride)
{
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride]; uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
uint8_t *top = src-stride; uint8_t *top = src-stride;
int y; int y;
@ -253,15 +281,18 @@ static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
} }
} }
static void pred16x16_plane_svq3_c(uint8_t *src, int stride){ static void pred16x16_plane_svq3_c(uint8_t *src, ptrdiff_t stride)
{
pred16x16_plane_compat_8_c(src, stride, 1, 0); pred16x16_plane_compat_8_c(src, stride, 1, 0);
} }
static void pred16x16_plane_rv40_c(uint8_t *src, int stride){ static void pred16x16_plane_rv40_c(uint8_t *src, ptrdiff_t stride)
{
pred16x16_plane_compat_8_c(src, stride, 0, 1); pred16x16_plane_compat_8_c(src, stride, 0, 1);
} }
static void pred16x16_tm_vp8_c(uint8_t *src, int stride){ static void pred16x16_tm_vp8_c(uint8_t *src, ptrdiff_t stride)
{
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride]; uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
uint8_t *top = src-stride; uint8_t *top = src-stride;
int y; int y;
@ -288,7 +319,8 @@ static void pred16x16_tm_vp8_c(uint8_t *src, int stride){
} }
} }
static void pred8x8_left_dc_rv40_c(uint8_t *src, int stride){ static void pred8x8_left_dc_rv40_c(uint8_t *src, ptrdiff_t stride)
{
int i; int i;
unsigned dc0; unsigned dc0;
@ -303,7 +335,8 @@ static void pred8x8_left_dc_rv40_c(uint8_t *src, int stride){
} }
} }
static void pred8x8_top_dc_rv40_c(uint8_t *src, int stride){ static void pred8x8_top_dc_rv40_c(uint8_t *src, ptrdiff_t stride)
{
int i; int i;
unsigned dc0; unsigned dc0;
@ -318,7 +351,8 @@ static void pred8x8_top_dc_rv40_c(uint8_t *src, int stride){
} }
} }
static void pred8x8_dc_rv40_c(uint8_t *src, int stride){ static void pred8x8_dc_rv40_c(uint8_t *src, ptrdiff_t stride)
{
int i; int i;
unsigned dc0 = 0; unsigned dc0 = 0;
@ -339,7 +373,8 @@ static void pred8x8_dc_rv40_c(uint8_t *src, int stride){
} }
} }
static void pred8x8_tm_vp8_c(uint8_t *src, int stride){ static void pred8x8_tm_vp8_c(uint8_t *src, ptrdiff_t stride)
{
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride]; uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
uint8_t *top = src-stride; uint8_t *top = src-stride;
int y; int y;
@ -361,7 +396,9 @@ static void pred8x8_tm_vp8_c(uint8_t *src, int stride){
/** /**
* Set the intra prediction function pointers. * Set the intra prediction function pointers.
*/ */
void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc){ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth,
const int chroma_format_idc)
{
// MpegEncContext * const s = &h->s; // MpegEncContext * const s = &h->s;
#undef FUNC #undef FUNC

View File

@ -90,21 +90,23 @@
* Context for storing H.264 prediction functions * Context for storing H.264 prediction functions
*/ */
typedef struct H264PredContext { typedef struct H264PredContext {
void(*pred4x4[9 + 3 + 3])(uint8_t *src, const uint8_t *topright, int stride); //FIXME move to dsp? void(*pred4x4[9 + 3 + 3])(uint8_t *src, const uint8_t *topright,
void(*pred8x8l[9 + 3])(uint8_t *src, int topleft, int topright, int stride); ptrdiff_t stride);
void(*pred8x8[4 + 3 + 4])(uint8_t *src, int stride); void(*pred8x8l[9 + 3])(uint8_t *src, int topleft, int topright,
void(*pred16x16[4 + 3 + 2])(uint8_t *src, int stride); ptrdiff_t stride);
void(*pred8x8[4 + 3 + 4])(uint8_t *src, ptrdiff_t stride);
void(*pred16x16[4 + 3 + 2])(uint8_t *src, ptrdiff_t stride);
void(*pred4x4_add[2])(uint8_t *pix /*align 4*/, void(*pred4x4_add[2])(uint8_t *pix /*align 4*/,
const DCTELEM *block /*align 16*/, int stride); const DCTELEM *block /*align 16*/, ptrdiff_t stride);
void(*pred8x8l_add[2])(uint8_t *pix /*align 8*/, void(*pred8x8l_add[2])(uint8_t *pix /*align 8*/,
const DCTELEM *block /*align 16*/, int stride); const DCTELEM *block /*align 16*/, ptrdiff_t stride);
void(*pred8x8_add[3])(uint8_t *pix /*align 8*/, void(*pred8x8_add[3])(uint8_t *pix /*align 8*/,
const int *block_offset, const int *block_offset,
const DCTELEM *block /*align 16*/, int stride); const DCTELEM *block /*align 16*/, ptrdiff_t stride);
void(*pred16x16_add[3])(uint8_t *pix /*align 16*/, void(*pred16x16_add[3])(uint8_t *pix /*align 16*/,
const int *block_offset, const int *block_offset,
const DCTELEM *block /*align 16*/, int stride); const DCTELEM *block /*align 16*/, ptrdiff_t stride);
} H264PredContext; } H264PredContext;
void ff_h264_pred_init(H264PredContext *h, int codec_id, void ff_h264_pred_init(H264PredContext *h, int codec_id,

View File

@ -29,7 +29,9 @@
#include "bit_depth_template.c" #include "bit_depth_template.c"
static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright, int _stride){ static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright,
ptrdiff_t _stride)
{
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
const pixel4 a= AV_RN4PA(src-stride); const pixel4 a= AV_RN4PA(src-stride);
@ -40,7 +42,9 @@ static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright, int
AV_WN4PA(src+3*stride, a); AV_WN4PA(src+3*stride, a);
} }
static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright, int _stride){ static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright,
ptrdiff_t _stride)
{
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
AV_WN4PA(src+0*stride, PIXEL_SPLAT_X4(src[-1+0*stride])); AV_WN4PA(src+0*stride, PIXEL_SPLAT_X4(src[-1+0*stride]));
@ -49,7 +53,9 @@ static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright, in
AV_WN4PA(src+3*stride, PIXEL_SPLAT_X4(src[-1+3*stride])); AV_WN4PA(src+3*stride, PIXEL_SPLAT_X4(src[-1+3*stride]));
} }
static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright,
ptrdiff_t _stride)
{
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
@ -62,7 +68,9 @@ static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright, int _strid
AV_WN4PA(src+3*stride, a); AV_WN4PA(src+3*stride, a);
} }
static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright,
ptrdiff_t _stride)
{
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2; const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
@ -74,7 +82,9 @@ static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright, int _
AV_WN4PA(src+3*stride, a); AV_WN4PA(src+3*stride, a);
} }
static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright,
ptrdiff_t _stride)
{
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2; const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
@ -86,7 +96,9 @@ static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright, int _s
AV_WN4PA(src+3*stride, a); AV_WN4PA(src+3*stride, a);
} }
static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright,
ptrdiff_t _stride)
{
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
const pixel4 a = PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1)); const pixel4 a = PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1));
@ -97,7 +109,9 @@ static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright, int _s
AV_WN4PA(src+3*stride, a); AV_WN4PA(src+3*stride, a);
} }
static void FUNCC(pred4x4_127_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ static void FUNCC(pred4x4_127_dc)(uint8_t *_src, const uint8_t *topright,
ptrdiff_t _stride)
{
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))-1); const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))-1);
@ -108,7 +122,9 @@ static void FUNCC(pred4x4_127_dc)(uint8_t *_src, const uint8_t *topright, int _s
AV_WN4PA(src+3*stride, a); AV_WN4PA(src+3*stride, a);
} }
static void FUNCC(pred4x4_129_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ static void FUNCC(pred4x4_129_dc)(uint8_t *_src, const uint8_t *topright,
ptrdiff_t _stride)
{
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))+1); const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))+1);
@ -144,7 +160,9 @@ static void FUNCC(pred4x4_129_dc)(uint8_t *_src, const uint8_t *topright, int _s
const unsigned av_unused t2 = src[ 2-1*stride];\ const unsigned av_unused t2 = src[ 2-1*stride];\
const unsigned av_unused t3 = src[ 3-1*stride];\ const unsigned av_unused t3 = src[ 3-1*stride];\
static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright, int _stride){ static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright,
ptrdiff_t _stride)
{
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
const int lt= src[-1-1*stride]; const int lt= src[-1-1*stride];
@ -169,7 +187,9 @@ static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright, in
src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2; src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
} }
static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright, int _stride){ static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright,
ptrdiff_t _stride)
{
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
const pixel *topright = (const pixel*)_topright; const pixel *topright = (const pixel*)_topright;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
@ -195,7 +215,10 @@ static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright, in
src[3+3*stride]=(t6 + 3*t7 + 2)>>2; src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
} }
static void FUNCC(pred4x4_vertical_right)(uint8_t *_src, const uint8_t *topright, int _stride){ static void FUNCC(pred4x4_vertical_right)(uint8_t *_src,
const uint8_t *topright,
ptrdiff_t _stride)
{
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
const int lt= src[-1-1*stride]; const int lt= src[-1-1*stride];
@ -220,7 +243,10 @@ static void FUNCC(pred4x4_vertical_right)(uint8_t *_src, const uint8_t *topright
src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2; src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
} }
static void FUNCC(pred4x4_vertical_left)(uint8_t *_src, const uint8_t *_topright, int _stride){ static void FUNCC(pred4x4_vertical_left)(uint8_t *_src,
const uint8_t *_topright,
ptrdiff_t _stride)
{
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
const pixel *topright = (const pixel*)_topright; const pixel *topright = (const pixel*)_topright;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
@ -245,7 +271,9 @@ static void FUNCC(pred4x4_vertical_left)(uint8_t *_src, const uint8_t *_topright
src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2; src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
} }
static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright, int _stride){ static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright,
ptrdiff_t _stride)
{
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
LOAD_LEFT_EDGE LOAD_LEFT_EDGE
@ -268,7 +296,10 @@ static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright,
src[3+3*stride]=l3; src[3+3*stride]=l3;
} }
static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src, const uint8_t *topright, int _stride){ static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src,
const uint8_t *topright,
ptrdiff_t _stride)
{
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
const int lt= src[-1-1*stride]; const int lt= src[-1-1*stride];
@ -293,7 +324,8 @@ static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src, const uint8_t *toprigh
src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2; src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
} }
static void FUNCC(pred16x16_vertical)(uint8_t *_src, int _stride){ static void FUNCC(pred16x16_vertical)(uint8_t *_src, ptrdiff_t _stride)
{
int i; int i;
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
@ -310,7 +342,8 @@ static void FUNCC(pred16x16_vertical)(uint8_t *_src, int _stride){
} }
} }
static void FUNCC(pred16x16_horizontal)(uint8_t *_src, int stride){ static void FUNCC(pred16x16_horizontal)(uint8_t *_src, ptrdiff_t stride)
{
int i; int i;
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
stride /= sizeof(pixel); stride /= sizeof(pixel);
@ -334,7 +367,8 @@ static void FUNCC(pred16x16_horizontal)(uint8_t *_src, int stride){
src += stride;\ src += stride;\
} }
static void FUNCC(pred16x16_dc)(uint8_t *_src, int stride){ static void FUNCC(pred16x16_dc)(uint8_t *_src, ptrdiff_t stride)
{
int i, dc=0; int i, dc=0;
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
pixel4 dcsplat; pixel4 dcsplat;
@ -352,7 +386,8 @@ static void FUNCC(pred16x16_dc)(uint8_t *_src, int stride){
PREDICT_16x16_DC(dcsplat); PREDICT_16x16_DC(dcsplat);
} }
static void FUNCC(pred16x16_left_dc)(uint8_t *_src, int stride){ static void FUNCC(pred16x16_left_dc)(uint8_t *_src, ptrdiff_t stride)
{
int i, dc=0; int i, dc=0;
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
pixel4 dcsplat; pixel4 dcsplat;
@ -366,7 +401,8 @@ static void FUNCC(pred16x16_left_dc)(uint8_t *_src, int stride){
PREDICT_16x16_DC(dcsplat); PREDICT_16x16_DC(dcsplat);
} }
static void FUNCC(pred16x16_top_dc)(uint8_t *_src, int stride){ static void FUNCC(pred16x16_top_dc)(uint8_t *_src, ptrdiff_t stride)
{
int i, dc=0; int i, dc=0;
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
pixel4 dcsplat; pixel4 dcsplat;
@ -381,7 +417,8 @@ static void FUNCC(pred16x16_top_dc)(uint8_t *_src, int stride){
} }
#define PRED16x16_X(n, v) \ #define PRED16x16_X(n, v) \
static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, int stride){\ static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
{\
int i;\ int i;\
pixel *src = (pixel*)_src;\ pixel *src = (pixel*)_src;\
stride /= sizeof(pixel);\ stride /= sizeof(pixel);\
@ -392,7 +429,11 @@ PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1)
PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0) PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0)
PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1) PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1)
static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src, int _stride, const int svq3, const int rv40){ static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src,
ptrdiff_t _stride,
const int svq3,
const int rv40)
{
int i, j, k; int i, j, k;
int a; int a;
INIT_CLIP INIT_CLIP
@ -437,11 +478,13 @@ static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src, int _stride, con
} }
} }
static void FUNCC(pred16x16_plane)(uint8_t *src, int stride){ static void FUNCC(pred16x16_plane)(uint8_t *src, ptrdiff_t stride)
{
FUNCC(pred16x16_plane_compat)(src, stride, 0, 0); FUNCC(pred16x16_plane_compat)(src, stride, 0, 0);
} }
static void FUNCC(pred8x8_vertical)(uint8_t *_src, int _stride){ static void FUNCC(pred8x8_vertical)(uint8_t *_src, ptrdiff_t _stride)
{
int i; int i;
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
@ -454,7 +497,8 @@ static void FUNCC(pred8x8_vertical)(uint8_t *_src, int _stride){
} }
} }
static void FUNCC(pred8x16_vertical)(uint8_t *_src, int _stride){ static void FUNCC(pred8x16_vertical)(uint8_t *_src, ptrdiff_t _stride)
{
int i; int i;
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride>>(sizeof(pixel)-1); int stride = _stride>>(sizeof(pixel)-1);
@ -467,7 +511,8 @@ static void FUNCC(pred8x16_vertical)(uint8_t *_src, int _stride){
} }
} }
static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){ static void FUNCC(pred8x8_horizontal)(uint8_t *_src, ptrdiff_t stride)
{
int i; int i;
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
stride /= sizeof(pixel); stride /= sizeof(pixel);
@ -479,7 +524,8 @@ static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){
} }
} }
static void FUNCC(pred8x16_horizontal)(uint8_t *_src, int stride){ static void FUNCC(pred8x16_horizontal)(uint8_t *_src, ptrdiff_t stride)
{
int i; int i;
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
stride >>= sizeof(pixel)-1; stride >>= sizeof(pixel)-1;
@ -491,7 +537,8 @@ static void FUNCC(pred8x16_horizontal)(uint8_t *_src, int stride){
} }
#define PRED8x8_X(n, v)\ #define PRED8x8_X(n, v)\
static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, int stride){\ static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
{\
int i;\ int i;\
const pixel4 a = PIXEL_SPLAT_X4(v);\ const pixel4 a = PIXEL_SPLAT_X4(v);\
pixel *src = (pixel*)_src;\ pixel *src = (pixel*)_src;\
@ -506,12 +553,14 @@ PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1)
PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0) PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0)
PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1) PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1)
static void FUNCC(pred8x16_128_dc)(uint8_t *_src, int stride){ static void FUNCC(pred8x16_128_dc)(uint8_t *_src, ptrdiff_t stride)
{
FUNCC(pred8x8_128_dc)(_src, stride); FUNCC(pred8x8_128_dc)(_src, stride);
FUNCC(pred8x8_128_dc)(_src+8*stride, stride); FUNCC(pred8x8_128_dc)(_src+8*stride, stride);
} }
static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){ static void FUNCC(pred8x8_left_dc)(uint8_t *_src, ptrdiff_t stride)
{
int i; int i;
int dc0, dc2; int dc0, dc2;
pixel4 dc0splat, dc2splat; pixel4 dc0splat, dc2splat;
@ -536,12 +585,14 @@ static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){
} }
} }
static void FUNCC(pred8x16_left_dc)(uint8_t *_src, int stride){ static void FUNCC(pred8x16_left_dc)(uint8_t *_src, ptrdiff_t stride)
{
FUNCC(pred8x8_left_dc)(_src, stride); FUNCC(pred8x8_left_dc)(_src, stride);
FUNCC(pred8x8_left_dc)(_src+8*stride, stride); FUNCC(pred8x8_left_dc)(_src+8*stride, stride);
} }
static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){ static void FUNCC(pred8x8_top_dc)(uint8_t *_src, ptrdiff_t stride)
{
int i; int i;
int dc0, dc1; int dc0, dc1;
pixel4 dc0splat, dc1splat; pixel4 dc0splat, dc1splat;
@ -566,7 +617,8 @@ static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){
} }
} }
static void FUNCC(pred8x16_top_dc)(uint8_t *_src, int stride){ static void FUNCC(pred8x16_top_dc)(uint8_t *_src, ptrdiff_t stride)
{
int i; int i;
int dc0, dc1; int dc0, dc1;
pixel4 dc0splat, dc1splat; pixel4 dc0splat, dc1splat;
@ -587,7 +639,8 @@ static void FUNCC(pred8x16_top_dc)(uint8_t *_src, int stride){
} }
} }
static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){ static void FUNCC(pred8x8_dc)(uint8_t *_src, ptrdiff_t stride)
{
int i; int i;
int dc0, dc1, dc2; int dc0, dc1, dc2;
pixel4 dc0splat, dc1splat, dc2splat, dc3splat; pixel4 dc0splat, dc1splat, dc2splat, dc3splat;
@ -615,7 +668,8 @@ static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){
} }
} }
static void FUNCC(pred8x16_dc)(uint8_t *_src, int stride){ static void FUNCC(pred8x16_dc)(uint8_t *_src, ptrdiff_t stride)
{
int i; int i;
int dc0, dc1, dc2, dc3, dc4; int dc0, dc1, dc2, dc3, dc4;
pixel4 dc0splat, dc1splat, dc2splat, dc3splat, dc4splat, dc5splat, dc6splat, dc7splat; pixel4 dc0splat, dc1splat, dc2splat, dc3splat, dc4splat, dc5splat, dc6splat, dc7splat;
@ -657,51 +711,60 @@ static void FUNCC(pred8x16_dc)(uint8_t *_src, int stride){
} }
} }
static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){ static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)
{
FUNCC(pred8x8_top_dc)(src, stride); FUNCC(pred8x8_top_dc)(src, stride);
FUNCC(pred4x4_dc)(src, NULL, stride); FUNCC(pred4x4_dc)(src, NULL, stride);
} }
static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, int stride){ static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)
{
FUNCC(pred8x16_top_dc)(src, stride); FUNCC(pred8x16_top_dc)(src, stride);
FUNCC(pred4x4_dc)(src, NULL, stride); FUNCC(pred4x4_dc)(src, NULL, stride);
} }
static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, int stride){ static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride)
{
FUNCC(pred8x8_dc)(src, stride); FUNCC(pred8x8_dc)(src, stride);
FUNCC(pred4x4_top_dc)(src, NULL, stride); FUNCC(pred4x4_top_dc)(src, NULL, stride);
} }
static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, int stride){ static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride)
{
FUNCC(pred8x16_dc)(src, stride); FUNCC(pred8x16_dc)(src, stride);
FUNCC(pred4x4_top_dc)(src, NULL, stride); FUNCC(pred4x4_top_dc)(src, NULL, stride);
} }
static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, int stride){ static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride)
{
FUNCC(pred8x8_left_dc)(src, stride); FUNCC(pred8x8_left_dc)(src, stride);
FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride); FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride); FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
} }
static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, int stride){ static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride)
{
FUNCC(pred8x16_left_dc)(src, stride); FUNCC(pred8x16_left_dc)(src, stride);
FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride); FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride); FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
} }
static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, int stride){ static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride)
{
FUNCC(pred8x8_left_dc)(src, stride); FUNCC(pred8x8_left_dc)(src, stride);
FUNCC(pred4x4_128_dc)(src , NULL, stride); FUNCC(pred4x4_128_dc)(src , NULL, stride);
FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride); FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
} }
static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, int stride){ static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride)
{
FUNCC(pred8x16_left_dc)(src, stride); FUNCC(pred8x16_left_dc)(src, stride);
FUNCC(pred4x4_128_dc)(src , NULL, stride); FUNCC(pred4x4_128_dc)(src , NULL, stride);
FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride); FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
} }
static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){ static void FUNCC(pred8x8_plane)(uint8_t *_src, ptrdiff_t _stride)
{
int j, k; int j, k;
int a; int a;
INIT_CLIP INIT_CLIP
@ -736,7 +799,8 @@ static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){
} }
} }
static void FUNCC(pred8x16_plane)(uint8_t *_src, int _stride){ static void FUNCC(pred8x16_plane)(uint8_t *_src, ptrdiff_t _stride)
{
int j, k; int j, k;
int a; int a;
INIT_CLIP INIT_CLIP
@ -815,14 +879,16 @@ static void FUNCC(pred8x16_plane)(uint8_t *_src, int _stride){
src += stride; \ src += stride; \
} }
static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride) static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft,
int has_topright, ptrdiff_t _stride)
{ {
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1))); PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1)));
} }
static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride) static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft,
int has_topright, ptrdiff_t _stride)
{ {
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
@ -831,7 +897,8 @@ static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft, int has_topr
const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3); const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3);
PREDICT_8x8_DC(dc); PREDICT_8x8_DC(dc);
} }
static void FUNCC(pred8x8l_top_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride) static void FUNCC(pred8x8l_top_dc)(uint8_t *_src, int has_topleft,
int has_topright, ptrdiff_t _stride)
{ {
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
@ -840,7 +907,8 @@ static void FUNCC(pred8x8l_top_dc)(uint8_t *_src, int has_topleft, int has_topri
const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3); const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3);
PREDICT_8x8_DC(dc); PREDICT_8x8_DC(dc);
} }
static void FUNCC(pred8x8l_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride) static void FUNCC(pred8x8l_dc)(uint8_t *_src, int has_topleft,
int has_topright, ptrdiff_t _stride)
{ {
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
@ -851,7 +919,8 @@ static void FUNCC(pred8x8l_dc)(uint8_t *_src, int has_topleft, int has_topright,
+t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4); +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4);
PREDICT_8x8_DC(dc); PREDICT_8x8_DC(dc);
} }
static void FUNCC(pred8x8l_horizontal)(uint8_t *_src, int has_topleft, int has_topright, int _stride) static void FUNCC(pred8x8l_horizontal)(uint8_t *_src, int has_topleft,
int has_topright, ptrdiff_t _stride)
{ {
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
@ -864,7 +933,8 @@ static void FUNCC(pred8x8l_horizontal)(uint8_t *_src, int has_topleft, int has_t
ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7); ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
#undef ROW #undef ROW
} }
static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft, int has_topright, int _stride) static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft,
int has_topright, ptrdiff_t _stride)
{ {
int y; int y;
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
@ -887,7 +957,8 @@ static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft, int has_top
AV_WN4PA(((pixel4*)(src+y*stride))+1, b); AV_WN4PA(((pixel4*)(src+y*stride))+1, b);
} }
} }
static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft, int has_topright, int _stride) static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft,
int has_topright, ptrdiff_t _stride)
{ {
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
@ -909,7 +980,8 @@ static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft, int has_to
SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2; SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
SRC(7,7)= (t14 + 3*t15 + 2) >> 2; SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
} }
static void FUNCC(pred8x8l_down_right)(uint8_t *_src, int has_topleft, int has_topright, int _stride) static void FUNCC(pred8x8l_down_right)(uint8_t *_src, int has_topleft,
int has_topright, ptrdiff_t _stride)
{ {
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
@ -932,7 +1004,8 @@ static void FUNCC(pred8x8l_down_right)(uint8_t *_src, int has_topleft, int has_t
SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2; SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2; SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
} }
static void FUNCC(pred8x8l_vertical_right)(uint8_t *_src, int has_topleft, int has_topright, int _stride) static void FUNCC(pred8x8l_vertical_right)(uint8_t *_src, int has_topleft,
int has_topright, ptrdiff_t _stride)
{ {
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
@ -962,7 +1035,8 @@ static void FUNCC(pred8x8l_vertical_right)(uint8_t *_src, int has_topleft, int h
SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2; SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
SRC(7,0)= (t6 + t7 + 1) >> 1; SRC(7,0)= (t6 + t7 + 1) >> 1;
} }
static void FUNCC(pred8x8l_horizontal_down)(uint8_t *_src, int has_topleft, int has_topright, int _stride) static void FUNCC(pred8x8l_horizontal_down)(uint8_t *_src, int has_topleft,
int has_topright, ptrdiff_t _stride)
{ {
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
@ -992,7 +1066,8 @@ static void FUNCC(pred8x8l_horizontal_down)(uint8_t *_src, int has_topleft, int
SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2; SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2; SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
} }
static void FUNCC(pred8x8l_vertical_left)(uint8_t *_src, int has_topleft, int has_topright, int _stride) static void FUNCC(pred8x8l_vertical_left)(uint8_t *_src, int has_topleft,
int has_topright, ptrdiff_t _stride)
{ {
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
@ -1021,7 +1096,8 @@ static void FUNCC(pred8x8l_vertical_left)(uint8_t *_src, int has_topleft, int ha
SRC(7,6)= (t10 + t11 + 1) >> 1; SRC(7,6)= (t10 + t11 + 1) >> 1;
SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2; SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
} }
static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft, int has_topright, int _stride) static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft,
int has_topright, ptrdiff_t _stride)
{ {
pixel *src = (pixel*)_src; pixel *src = (pixel*)_src;
int stride = _stride/sizeof(pixel); int stride = _stride/sizeof(pixel);
@ -1055,7 +1131,9 @@ static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft, int ha
#undef PL #undef PL
#undef SRC #undef SRC
static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, const DCTELEM *_block, int stride){ static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, const DCTELEM *_block,
ptrdiff_t stride)
{
int i; int i;
pixel *pix = (pixel*)_pix; pixel *pix = (pixel*)_pix;
const dctcoef *block = (const dctcoef*)_block; const dctcoef *block = (const dctcoef*)_block;
@ -1072,7 +1150,9 @@ static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, const DCTELEM *_block, in
} }
} }
static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, const DCTELEM *_block, int stride){ static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, const DCTELEM *_block,
ptrdiff_t stride)
{
int i; int i;
pixel *pix = (pixel*)_pix; pixel *pix = (pixel*)_pix;
const dctcoef *block = (const dctcoef*)_block; const dctcoef *block = (const dctcoef*)_block;
@ -1088,7 +1168,9 @@ static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, const DCTELEM *_block,
} }
} }
static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, const DCTELEM *_block, int stride){ static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, const DCTELEM *_block,
ptrdiff_t stride)
{
int i; int i;
pixel *pix = (pixel*)_pix; pixel *pix = (pixel*)_pix;
const dctcoef *block = (const dctcoef*)_block; const dctcoef *block = (const dctcoef*)_block;
@ -1109,7 +1191,9 @@ static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, const DCTELEM *_block, i
} }
} }
static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, const DCTELEM *_block, int stride){ static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, const DCTELEM *_block,
ptrdiff_t stride)
{
int i; int i;
pixel *pix = (pixel*)_pix; pixel *pix = (pixel*)_pix;
const dctcoef *block = (const dctcoef*)_block; const dctcoef *block = (const dctcoef*)_block;
@ -1129,25 +1213,36 @@ static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, const DCTELEM *_block,
} }
} }
static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset,
const DCTELEM *block,
ptrdiff_t stride)
{
int i; int i;
for(i=0; i<16; i++) for(i=0; i<16; i++)
FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
} }
static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix,
const int *block_offset,
const DCTELEM *block,
ptrdiff_t stride)
{
int i; int i;
for(i=0; i<16; i++) for(i=0; i<16; i++)
FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
} }
static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset,
const DCTELEM *block, ptrdiff_t stride)
{
int i; int i;
for(i=0; i<4; i++) for(i=0; i<4; i++)
FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
} }
static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset,
const DCTELEM *block, ptrdiff_t stride)
{
int i; int i;
for(i=0; i<4; i++) for(i=0; i<4; i++)
FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
@ -1155,13 +1250,19 @@ static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset,
FUNCC(pred4x4_vertical_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride); FUNCC(pred4x4_vertical_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
} }
static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset,
const DCTELEM *block,
ptrdiff_t stride)
{
int i; int i;
for(i=0; i<4; i++) for(i=0; i<4; i++)
FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
} }
static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix,
const int *block_offset,
const DCTELEM *block, ptrdiff_t stride)
{
int i; int i;
for(i=0; i<4; i++) for(i=0; i<4; i++)
FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);

View File

@ -23,7 +23,9 @@
#include "libavcodec/h264pred.h" #include "libavcodec/h264pred.h"
#define PRED4x4(TYPE, DEPTH, OPT) \ #define PRED4x4(TYPE, DEPTH, OPT) \
void ff_pred4x4_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, const uint8_t *topright, int stride); void ff_pred4x4_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
const uint8_t *topright, \
ptrdiff_t stride);
PRED4x4(dc, 10, mmx2) PRED4x4(dc, 10, mmx2)
PRED4x4(down_left, 10, sse2) PRED4x4(down_left, 10, sse2)
@ -42,7 +44,8 @@ PRED4x4(horizontal_down, 10, ssse3)
PRED4x4(horizontal_down, 10, avx) PRED4x4(horizontal_down, 10, avx)
#define PRED8x8(TYPE, DEPTH, OPT) \ #define PRED8x8(TYPE, DEPTH, OPT) \
void ff_pred8x8_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, int stride); void ff_pred8x8_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
ptrdiff_t stride);
PRED8x8(dc, 10, mmx2) PRED8x8(dc, 10, mmx2)
PRED8x8(dc, 10, sse2) PRED8x8(dc, 10, sse2)
@ -52,7 +55,10 @@ PRED8x8(vertical, 10, sse2)
PRED8x8(horizontal, 10, sse2) PRED8x8(horizontal, 10, sse2)
#define PRED8x8L(TYPE, DEPTH, OPT)\ #define PRED8x8L(TYPE, DEPTH, OPT)\
void ff_pred8x8l_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, int has_topleft, int has_topright, int stride); void ff_pred8x8l_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
int has_topleft, \
int has_topright, \
ptrdiff_t stride);
PRED8x8L(dc, 10, sse2) PRED8x8L(dc, 10, sse2)
PRED8x8L(dc, 10, avx) PRED8x8L(dc, 10, avx)
@ -79,7 +85,8 @@ PRED8x8L(horizontal_up, 10, ssse3)
PRED8x8L(horizontal_up, 10, avx) PRED8x8L(horizontal_up, 10, avx)
#define PRED16x16(TYPE, DEPTH, OPT)\ #define PRED16x16(TYPE, DEPTH, OPT)\
void ff_pred16x16_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, int stride); void ff_pred16x16_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
ptrdiff_t stride);
PRED16x16(dc, 10, mmx2) PRED16x16(dc, 10, mmx2)
PRED16x16(dc, 10, sse2) PRED16x16(dc, 10, sse2)