mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
pp: rework the way templating is done.
This refactoring simplifies the usage of the template: define the profile and include the template is all that is required. It should now be easier to add more instruction sets. The HAVE_* flags are changed with TEMPLATE_PP_* setting to avoid messing them up. See the top comment in postprocess_template.c for details.
This commit is contained in:
parent
6535d81d87
commit
375cd3f2ec
@ -538,85 +538,37 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride,
|
||||
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
|
||||
//Plain C versions
|
||||
//we always compile C for testing which needs bitexactness
|
||||
#define COMPILE_C
|
||||
#define TEMPLATE_PP_C 1
|
||||
#include "postprocess_template.c"
|
||||
|
||||
#if HAVE_ALTIVEC
|
||||
#define COMPILE_ALTIVEC
|
||||
#endif //HAVE_ALTIVEC
|
||||
|
||||
#if ARCH_X86 && HAVE_INLINE_ASM
|
||||
|
||||
#if (HAVE_MMX_INLINE && !HAVE_AMD3DNOW_INLINE && !HAVE_MMXEXT_INLINE) || CONFIG_RUNTIME_CPUDETECT
|
||||
#define COMPILE_MMX
|
||||
#endif
|
||||
|
||||
#if HAVE_MMXEXT_INLINE || CONFIG_RUNTIME_CPUDETECT
|
||||
#define COMPILE_MMX2
|
||||
#endif
|
||||
|
||||
#if (HAVE_AMD3DNOW_INLINE && !HAVE_MMXEXT_INLINE) || CONFIG_RUNTIME_CPUDETECT
|
||||
#define COMPILE_3DNOW
|
||||
#endif
|
||||
#endif /* ARCH_X86 */
|
||||
|
||||
#undef HAVE_MMX_INLINE
|
||||
#define HAVE_MMX_INLINE 0
|
||||
#undef HAVE_MMXEXT_INLINE
|
||||
#define HAVE_MMXEXT_INLINE 0
|
||||
#undef HAVE_AMD3DNOW_INLINE
|
||||
#define HAVE_AMD3DNOW_INLINE 0
|
||||
#undef HAVE_ALTIVEC
|
||||
#define HAVE_ALTIVEC 0
|
||||
|
||||
#ifdef COMPILE_C
|
||||
#define RENAME(a) a ## _C
|
||||
#include "postprocess_template.c"
|
||||
#endif
|
||||
|
||||
#ifdef COMPILE_ALTIVEC
|
||||
#undef RENAME
|
||||
#undef HAVE_ALTIVEC
|
||||
#define HAVE_ALTIVEC 1
|
||||
#define RENAME(a) a ## _altivec
|
||||
# define TEMPLATE_PP_ALTIVEC 1
|
||||
# include "postprocess_altivec_template.c"
|
||||
# include "postprocess_template.c"
|
||||
#endif
|
||||
|
||||
//MMX versions
|
||||
#ifdef COMPILE_MMX
|
||||
#undef RENAME
|
||||
#undef HAVE_MMX_INLINE
|
||||
#define HAVE_MMX_INLINE 1
|
||||
#define RENAME(a) a ## _MMX
|
||||
#if ARCH_X86 && HAVE_INLINE_ASM
|
||||
# if CONFIG_RUNTIME_CPUDETECT
|
||||
# define TEMPLATE_PP_MMX 1
|
||||
# include "postprocess_template.c"
|
||||
# define TEMPLATE_PP_MMXEXT 1
|
||||
# include "postprocess_template.c"
|
||||
# define TEMPLATE_PP_3DNOW 1
|
||||
# include "postprocess_template.c"
|
||||
# else
|
||||
# if HAVE_MMXEXT_INLINE
|
||||
# define TEMPLATE_PP_MMXEXT 1
|
||||
# include "postprocess_template.c"
|
||||
# elif HAVE_AMD3DNOW_INLINE
|
||||
# define TEMPLATE_PP_3DNOW 1
|
||||
# include "postprocess_template.c"
|
||||
# elif HAVE_MMX_INLINE
|
||||
# define TEMPLATE_PP_MMX 1
|
||||
# include "postprocess_template.c"
|
||||
# endif
|
||||
|
||||
//MMX2 versions
|
||||
#ifdef COMPILE_MMX2
|
||||
#undef RENAME
|
||||
#undef HAVE_MMX_INLINE
|
||||
#undef HAVE_MMXEXT_INLINE
|
||||
#define HAVE_MMX_INLINE 1
|
||||
#define HAVE_MMXEXT_INLINE 1
|
||||
#define RENAME(a) a ## _MMX2
|
||||
#include "postprocess_template.c"
|
||||
# endif
|
||||
|
||||
//3DNOW versions
|
||||
#ifdef COMPILE_3DNOW
|
||||
#undef RENAME
|
||||
#undef HAVE_MMX_INLINE
|
||||
#undef HAVE_MMXEXT_INLINE
|
||||
#undef HAVE_AMD3DNOW_INLINE
|
||||
#define HAVE_MMX_INLINE 1
|
||||
#define HAVE_MMXEXT_INLINE 0
|
||||
#define HAVE_AMD3DNOW_INLINE 1
|
||||
#define RENAME(a) a ## _3DNow
|
||||
#include "postprocess_template.c"
|
||||
#endif
|
||||
|
||||
// minor note: the HAVE_xyz is messed up after that line so do not use it.
|
||||
|
||||
static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
|
||||
const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
|
||||
{
|
||||
|
@ -825,7 +825,7 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
|
||||
#define doHorizDefFilter_altivec(a...) doHorizDefFilter_C(a)
|
||||
#define do_a_deblock_altivec(a...) do_a_deblock_C(a)
|
||||
|
||||
static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
|
||||
static inline void tempNoiseReducer_altivec(uint8_t *src, int stride,
|
||||
uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise)
|
||||
{
|
||||
const vector signed char neg1 = vec_splat_s8(-1);
|
||||
|
@ -25,37 +25,76 @@
|
||||
|
||||
#include "libavutil/x86/asm.h"
|
||||
|
||||
/* A single TEMPLATE_PP_* should be defined (to 1) when this template is
|
||||
* included. The following macros will define its dependencies to 1 as well
|
||||
* (like MMX2 depending on MMX), and will define to 0 all the others. Every
|
||||
* TEMPLATE_PP_* need to be undef at the end. */
|
||||
|
||||
#ifdef TEMPLATE_PP_C
|
||||
# define RENAME(a) a ## _C
|
||||
#else
|
||||
# define TEMPLATE_PP_C 0
|
||||
#endif
|
||||
|
||||
#ifdef TEMPLATE_PP_ALTIVEC
|
||||
# define RENAME(a) a ## _altivec
|
||||
#else
|
||||
# define TEMPLATE_PP_ALTIVEC 0
|
||||
#endif
|
||||
|
||||
#ifdef TEMPLATE_PP_MMX
|
||||
# define RENAME(a) a ## _MMX
|
||||
#else
|
||||
# define TEMPLATE_PP_MMX 0
|
||||
#endif
|
||||
|
||||
#ifdef TEMPLATE_PP_MMXEXT
|
||||
# undef TEMPLATE_PP_MMX
|
||||
# define TEMPLATE_PP_MMX 1
|
||||
# define RENAME(a) a ## _MMX2
|
||||
#else
|
||||
# define TEMPLATE_PP_MMXEXT 0
|
||||
#endif
|
||||
|
||||
#ifdef TEMPLATE_PP_3DNOW
|
||||
# undef TEMPLATE_PP_MMX
|
||||
# define TEMPLATE_PP_MMX 1
|
||||
# define RENAME(a) a ## _3DNow
|
||||
#else
|
||||
# define TEMPLATE_PP_3DNOW 0
|
||||
#endif
|
||||
|
||||
#undef REAL_PAVGB
|
||||
#undef PAVGB
|
||||
#undef PMINUB
|
||||
#undef PMAXUB
|
||||
|
||||
#if HAVE_MMXEXT_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT
|
||||
#define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
|
||||
#elif HAVE_AMD3DNOW_INLINE
|
||||
#elif TEMPLATE_PP_3DNOW
|
||||
#define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
|
||||
#endif
|
||||
#define PAVGB(a,b) REAL_PAVGB(a,b)
|
||||
|
||||
#if HAVE_MMXEXT_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT
|
||||
#define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t"
|
||||
#elif HAVE_MMX_INLINE
|
||||
#elif TEMPLATE_PP_MMX
|
||||
#define PMINUB(b,a,t) \
|
||||
"movq " #a ", " #t " \n\t"\
|
||||
"psubusb " #b ", " #t " \n\t"\
|
||||
"psubb " #t ", " #a " \n\t"
|
||||
#endif
|
||||
|
||||
#if HAVE_MMXEXT_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT
|
||||
#define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t"
|
||||
#elif HAVE_MMX_INLINE
|
||||
#elif TEMPLATE_PP_MMX
|
||||
#define PMAXUB(a,b) \
|
||||
"psubusb " #a ", " #b " \n\t"\
|
||||
"paddb " #a ", " #b " \n\t"
|
||||
#endif
|
||||
|
||||
//FIXME? |255-0| = 1 (should not be a problem ...)
|
||||
#if HAVE_MMX_INLINE
|
||||
#if TEMPLATE_PP_MMX
|
||||
/**
|
||||
* Check if the middle 8x8 Block in the given 8x16 block is flat
|
||||
*/
|
||||
@ -135,7 +174,7 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
|
||||
"psubusb %%mm3, %%mm4 \n\t"
|
||||
|
||||
" \n\t"
|
||||
#if HAVE_MMXEXT_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"psadbw %%mm7, %%mm0 \n\t"
|
||||
#else
|
||||
@ -169,16 +208,16 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
#endif //HAVE_MMX_INLINE
|
||||
#endif //TEMPLATE_PP_MMX
|
||||
|
||||
/**
|
||||
* Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle)
|
||||
* using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16
|
||||
*/
|
||||
#if !HAVE_ALTIVEC
|
||||
#if !TEMPLATE_PP_ALTIVEC
|
||||
static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
|
||||
{
|
||||
#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
src+= stride*3;
|
||||
__asm__ volatile( //"movv %0 %1 %2\n\t"
|
||||
"movq %2, %%mm0 \n\t" // QP,..., QP
|
||||
@ -305,7 +344,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
|
||||
: "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
|
||||
: "%"REG_a, "%"REG_c
|
||||
);
|
||||
#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
const int l1= stride;
|
||||
const int l2= stride + l1;
|
||||
const int l3= stride + l2;
|
||||
@ -344,9 +383,9 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
|
||||
|
||||
src++;
|
||||
}
|
||||
#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
}
|
||||
#endif //HAVE_ALTIVEC
|
||||
#endif //TEMPLATE_PP_ALTIVEC
|
||||
|
||||
/**
|
||||
* Experimental Filter 1
|
||||
@ -357,7 +396,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
|
||||
*/
|
||||
static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
|
||||
{
|
||||
#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
src+= stride*3;
|
||||
|
||||
__asm__ volatile(
|
||||
@ -443,7 +482,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
|
||||
: "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb)
|
||||
: "%"REG_a, "%"REG_c
|
||||
);
|
||||
#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
|
||||
const int l1= stride;
|
||||
const int l2= stride + l1;
|
||||
@ -477,13 +516,13 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
|
||||
}
|
||||
src++;
|
||||
}
|
||||
#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
}
|
||||
|
||||
#if !HAVE_ALTIVEC
|
||||
#if !TEMPLATE_PP_ALTIVEC
|
||||
static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c)
|
||||
{
|
||||
#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
/*
|
||||
uint8_t tmp[16];
|
||||
const int l1= stride;
|
||||
@ -764,7 +803,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
|
||||
}
|
||||
}
|
||||
*/
|
||||
#elif HAVE_MMX_INLINE
|
||||
#elif TEMPLATE_PP_MMX
|
||||
DECLARE_ALIGNED(8, uint64_t, tmp)[4]; // make space for 4 8-byte vars
|
||||
src+= stride*4;
|
||||
__asm__ volatile(
|
||||
@ -872,7 +911,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
|
||||
"movq (%3), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
|
||||
"movq 8(%3), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
|
||||
|
||||
#if HAVE_MMXEXT_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT
|
||||
"movq %%mm7, %%mm6 \n\t" // 0
|
||||
"psubw %%mm0, %%mm6 \n\t"
|
||||
"pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7|
|
||||
@ -904,7 +943,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
|
||||
"psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3|
|
||||
#endif
|
||||
|
||||
#if HAVE_MMXEXT_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT
|
||||
"pminsw %%mm2, %%mm0 \n\t"
|
||||
"pminsw %%mm3, %%mm1 \n\t"
|
||||
#else
|
||||
@ -968,7 +1007,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
|
||||
"pand %%mm2, %%mm4 \n\t"
|
||||
"pand %%mm3, %%mm5 \n\t"
|
||||
|
||||
#if HAVE_MMXEXT_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT
|
||||
"pminsw %%mm0, %%mm4 \n\t"
|
||||
"pminsw %%mm1, %%mm5 \n\t"
|
||||
#else
|
||||
@ -995,7 +1034,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
|
||||
: "r" ((x86_reg)stride), "m" (c->pQPb), "r"(tmp)
|
||||
: "%"REG_a
|
||||
);
|
||||
#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
const int l1= stride;
|
||||
const int l2= stride + l1;
|
||||
const int l3= stride + l2;
|
||||
@ -1033,14 +1072,14 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
|
||||
}
|
||||
src++;
|
||||
}
|
||||
#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
}
|
||||
#endif //HAVE_ALTIVEC
|
||||
#endif //TEMPLATE_PP_ALTIVEC
|
||||
|
||||
#if !HAVE_ALTIVEC
|
||||
#if !TEMPLATE_PP_ALTIVEC
|
||||
static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
|
||||
{
|
||||
#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
DECLARE_ALIGNED(8, uint64_t, tmp)[3];
|
||||
__asm__ volatile(
|
||||
"pxor %%mm6, %%mm6 \n\t"
|
||||
@ -1060,7 +1099,7 @@ static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
|
||||
|
||||
#undef REAL_FIND_MIN_MAX
|
||||
#undef FIND_MIN_MAX
|
||||
#if HAVE_MMXEXT_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT
|
||||
#define REAL_FIND_MIN_MAX(addr)\
|
||||
"movq " #addr ", %%mm0 \n\t"\
|
||||
"pminub %%mm0, %%mm7 \n\t"\
|
||||
@ -1087,7 +1126,7 @@ FIND_MIN_MAX((%0, %1, 8))
|
||||
|
||||
"movq %%mm7, %%mm4 \n\t"
|
||||
"psrlq $8, %%mm7 \n\t"
|
||||
#if HAVE_MMXEXT_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT
|
||||
"pminub %%mm4, %%mm7 \n\t" // min of pixels
|
||||
"pshufw $0xF9, %%mm7, %%mm4 \n\t"
|
||||
"pminub %%mm4, %%mm7 \n\t" // min of pixels
|
||||
@ -1112,7 +1151,7 @@ FIND_MIN_MAX((%0, %1, 8))
|
||||
|
||||
"movq %%mm6, %%mm4 \n\t"
|
||||
"psrlq $8, %%mm6 \n\t"
|
||||
#if HAVE_MMXEXT_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT
|
||||
"pmaxub %%mm4, %%mm6 \n\t" // max of pixels
|
||||
"pshufw $0xF9, %%mm6, %%mm4 \n\t"
|
||||
"pmaxub %%mm4, %%mm6 \n\t"
|
||||
@ -1266,7 +1305,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
|
||||
: : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2), "q"(tmp)
|
||||
: "%"REG_a, "%"REG_d
|
||||
);
|
||||
#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
int y;
|
||||
int min=255;
|
||||
int max=0;
|
||||
@ -1383,9 +1422,9 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
|
||||
// src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255;
|
||||
}
|
||||
#endif
|
||||
#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
}
|
||||
#endif //HAVE_ALTIVEC
|
||||
#endif //TEMPLATE_PP_ALTIVEC
|
||||
|
||||
/**
|
||||
* Deinterlace the given block by linearly interpolating every second line.
|
||||
@ -1395,7 +1434,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
|
||||
*/
|
||||
static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride)
|
||||
{
|
||||
#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
src+= 4*stride;
|
||||
__asm__ volatile(
|
||||
"lea (%0, %1), %%"REG_a" \n\t"
|
||||
@ -1448,7 +1487,7 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid
|
||||
*/
|
||||
static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride)
|
||||
{
|
||||
#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
src+= stride*3;
|
||||
__asm__ volatile(
|
||||
"lea (%0, %1), %%"REG_a" \n\t"
|
||||
@ -1490,7 +1529,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc,
|
||||
: : "r" (src), "r" ((x86_reg)stride)
|
||||
: "%"REG_a, "%"REG_d, "%"REG_c
|
||||
);
|
||||
#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
int x;
|
||||
src+= stride*3;
|
||||
for(x=0; x<8; x++){
|
||||
@ -1500,7 +1539,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc,
|
||||
src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4);
|
||||
src++;
|
||||
}
|
||||
#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1512,7 +1551,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc,
|
||||
*/
|
||||
static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp)
|
||||
{
|
||||
#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
src+= stride*4;
|
||||
__asm__ volatile(
|
||||
"lea (%0, %1), %%"REG_a" \n\t"
|
||||
@ -1561,7 +1600,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
|
||||
: : "r" (src), "r" ((x86_reg)stride), "r"(tmp)
|
||||
: "%"REG_a, "%"REG_d
|
||||
);
|
||||
#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
int x;
|
||||
src+= stride*4;
|
||||
for(x=0; x<8; x++){
|
||||
@ -1579,7 +1618,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
|
||||
|
||||
src++;
|
||||
}
|
||||
#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1591,7 +1630,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
|
||||
*/
|
||||
static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2)
|
||||
{
|
||||
#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
src+= stride*4;
|
||||
__asm__ volatile(
|
||||
"lea (%0, %1), %%"REG_a" \n\t"
|
||||
@ -1651,7 +1690,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
|
||||
: : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2)
|
||||
: "%"REG_a, "%"REG_d
|
||||
);
|
||||
#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
int x;
|
||||
src+= stride*4;
|
||||
for(x=0; x<8; x++){
|
||||
@ -1680,7 +1719,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
|
||||
|
||||
src++;
|
||||
}
|
||||
#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1692,7 +1731,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
|
||||
*/
|
||||
static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp)
|
||||
{
|
||||
#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
src+= 4*stride;
|
||||
__asm__ volatile(
|
||||
"lea (%0, %1), %%"REG_a" \n\t"
|
||||
@ -1739,7 +1778,7 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin
|
||||
: : "r" (src), "r" ((x86_reg)stride), "r" (tmp)
|
||||
: "%"REG_a, "%"REG_d
|
||||
);
|
||||
#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
int a, b, c, x;
|
||||
src+= 4*stride;
|
||||
|
||||
@ -1782,7 +1821,7 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin
|
||||
src += 4;
|
||||
tmp += 4;
|
||||
}
|
||||
#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1793,9 +1832,9 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin
|
||||
*/
|
||||
static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride)
|
||||
{
|
||||
#if HAVE_MMX_INLINE
|
||||
#if TEMPLATE_PP_MMX
|
||||
src+= 4*stride;
|
||||
#if HAVE_MMXEXT_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT
|
||||
__asm__ volatile(
|
||||
"lea (%0, %1), %%"REG_a" \n\t"
|
||||
"lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
|
||||
@ -1885,8 +1924,8 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
|
||||
: : "r" (src), "r" ((x86_reg)stride)
|
||||
: "%"REG_a, "%"REG_d
|
||||
);
|
||||
#endif //HAVE_MMXEXT_INLINE
|
||||
#else //HAVE_MMX_INLINE
|
||||
#endif //TEMPLATE_PP_MMXEXT
|
||||
#else //TEMPLATE_PP_MMX
|
||||
int x, y;
|
||||
src+= 4*stride;
|
||||
// FIXME - there should be a way to do a few columns in parallel like w/mmx
|
||||
@ -1905,10 +1944,10 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
|
||||
}
|
||||
src++;
|
||||
}
|
||||
#endif //HAVE_MMX_INLINE
|
||||
#endif //TEMPLATE_PP_MMX
|
||||
}
|
||||
|
||||
#if HAVE_MMX_INLINE
|
||||
#if TEMPLATE_PP_MMX
|
||||
/**
|
||||
* Transpose and shift the given 8x8 Block into dst1 and dst2.
|
||||
*/
|
||||
@ -2073,10 +2112,10 @@ static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src)
|
||||
: "%"REG_a, "%"REG_d
|
||||
);
|
||||
}
|
||||
#endif //HAVE_MMX_INLINE
|
||||
#endif //TEMPLATE_PP_MMX
|
||||
//static long test=0;
|
||||
|
||||
#if !HAVE_ALTIVEC
|
||||
#if !TEMPLATE_PP_ALTIVEC
|
||||
static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
|
||||
uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise)
|
||||
{
|
||||
@ -2087,7 +2126,7 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
|
||||
|
||||
#define FAST_L2_DIFF
|
||||
//#define L1_DIFF //u should change the thresholds too if u try that one
|
||||
#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
__asm__ volatile(
|
||||
"lea (%2, %2, 2), %%"REG_a" \n\t" // 3*stride
|
||||
"lea (%2, %2, 4), %%"REG_d" \n\t" // 5*stride
|
||||
@ -2375,7 +2414,7 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc))
|
||||
:: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast)
|
||||
: "%"REG_a, "%"REG_d, "%"REG_c, "memory"
|
||||
);
|
||||
#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
{
|
||||
int y;
|
||||
int d=0;
|
||||
@ -2458,11 +2497,11 @@ Switch between
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
|
||||
#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
|
||||
}
|
||||
#endif //HAVE_ALTIVEC
|
||||
#endif //TEMPLATE_PP_ALTIVEC
|
||||
|
||||
#if HAVE_MMX_INLINE
|
||||
#if TEMPLATE_PP_MMX
|
||||
/**
|
||||
* accurate deblock filter
|
||||
*/
|
||||
@ -2865,7 +2904,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
|
||||
"movq (%4), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
|
||||
"movq 8(%4), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
|
||||
|
||||
#if HAVE_MMXEXT_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT
|
||||
"movq %%mm7, %%mm6 \n\t" // 0
|
||||
"psubw %%mm0, %%mm6 \n\t"
|
||||
"pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7|
|
||||
@ -2897,7 +2936,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
|
||||
"psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3|
|
||||
#endif
|
||||
|
||||
#if HAVE_MMXEXT_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT
|
||||
"pminsw %%mm2, %%mm0 \n\t"
|
||||
"pminsw %%mm3, %%mm1 \n\t"
|
||||
#else
|
||||
@ -2961,7 +3000,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
|
||||
"pand %%mm2, %%mm4 \n\t"
|
||||
"pand %%mm3, %%mm5 \n\t"
|
||||
|
||||
#if HAVE_MMXEXT_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT
|
||||
"pminsw %%mm0, %%mm4 \n\t"
|
||||
"pminsw %%mm1, %%mm5 \n\t"
|
||||
#else
|
||||
@ -2998,7 +3037,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
|
||||
}
|
||||
} */
|
||||
}
|
||||
#endif //HAVE_MMX_INLINE
|
||||
#endif //TEMPLATE_PP_MMX
|
||||
|
||||
static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
|
||||
const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c);
|
||||
@ -3013,18 +3052,18 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
|
||||
static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t src[], int srcStride,
|
||||
int levelFix, int64_t *packedOffsetAndScale)
|
||||
{
|
||||
#if !HAVE_MMX_INLINE
|
||||
#if !TEMPLATE_PP_MMX
|
||||
int i;
|
||||
#endif
|
||||
if(levelFix){
|
||||
#if HAVE_MMX_INLINE
|
||||
#if TEMPLATE_PP_MMX
|
||||
__asm__ volatile(
|
||||
"movq (%%"REG_a"), %%mm2 \n\t" // packedYOffset
|
||||
"movq 8(%%"REG_a"), %%mm3 \n\t" // packedYScale
|
||||
"lea (%2,%4), %%"REG_a" \n\t"
|
||||
"lea (%3,%5), %%"REG_d" \n\t"
|
||||
"pxor %%mm4, %%mm4 \n\t"
|
||||
#if HAVE_MMXEXT_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT
|
||||
#define REAL_SCALED_CPY(src1, src2, dst1, dst2) \
|
||||
"movq " #src1 ", %%mm0 \n\t"\
|
||||
"movq " #src1 ", %%mm5 \n\t"\
|
||||
@ -3047,7 +3086,7 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t
|
||||
"movq %%mm0, " #dst1 " \n\t"\
|
||||
"movq %%mm1, " #dst2 " \n\t"\
|
||||
|
||||
#else //HAVE_MMXEXT_INLINE
|
||||
#else //TEMPLATE_PP_MMXEXT
|
||||
#define REAL_SCALED_CPY(src1, src2, dst1, dst2) \
|
||||
"movq " #src1 ", %%mm0 \n\t"\
|
||||
"movq " #src1 ", %%mm5 \n\t"\
|
||||
@ -3074,7 +3113,7 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t
|
||||
"movq %%mm0, " #dst1 " \n\t"\
|
||||
"movq %%mm1, " #dst2 " \n\t"\
|
||||
|
||||
#endif //HAVE_MMXEXT_INLINE
|
||||
#endif //TEMPLATE_PP_MMXEXT
|
||||
#define SCALED_CPY(src1, src2, dst1, dst2)\
|
||||
REAL_SCALED_CPY(src1, src2, dst1, dst2)
|
||||
|
||||
@ -3094,13 +3133,13 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
|
||||
"r" ((x86_reg)dstStride)
|
||||
: "%"REG_d
|
||||
);
|
||||
#else //HAVE_MMX_INLINE
|
||||
#else //TEMPLATE_PP_MMX
|
||||
for(i=0; i<8; i++)
|
||||
memcpy( &(dst[dstStride*i]),
|
||||
&(src[srcStride*i]), BLOCK_SIZE);
|
||||
#endif //HAVE_MMX_INLINE
|
||||
#endif //TEMPLATE_PP_MMX
|
||||
}else{
|
||||
#if HAVE_MMX_INLINE
|
||||
#if TEMPLATE_PP_MMX
|
||||
__asm__ volatile(
|
||||
"lea (%0,%2), %%"REG_a" \n\t"
|
||||
"lea (%1,%3), %%"REG_d" \n\t"
|
||||
@ -3127,11 +3166,11 @@ SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
|
||||
"r" ((x86_reg)dstStride)
|
||||
: "%"REG_a, "%"REG_d
|
||||
);
|
||||
#else //HAVE_MMX_INLINE
|
||||
#else //TEMPLATE_PP_MMX
|
||||
for(i=0; i<8; i++)
|
||||
memcpy( &(dst[dstStride*i]),
|
||||
&(src[srcStride*i]), BLOCK_SIZE);
|
||||
#endif //HAVE_MMX_INLINE
|
||||
#endif //TEMPLATE_PP_MMX
|
||||
}
|
||||
}
|
||||
|
||||
@ -3140,7 +3179,7 @@ SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
|
||||
*/
|
||||
static inline void RENAME(duplicate)(uint8_t src[], int stride)
|
||||
{
|
||||
#if HAVE_MMX_INLINE
|
||||
#if TEMPLATE_PP_MMX
|
||||
__asm__ volatile(
|
||||
"movq (%0), %%mm0 \n\t"
|
||||
"add %1, %0 \n\t"
|
||||
@ -3168,8 +3207,8 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
|
||||
{
|
||||
DECLARE_ALIGNED(8, PPContext, c)= *c2; //copy to stack for faster access
|
||||
int x,y;
|
||||
#ifdef COMPILE_TIME_MODE
|
||||
const int mode= COMPILE_TIME_MODE;
|
||||
#ifdef TEMPLATE_PP_TIME_MODE
|
||||
const int mode= TEMPLATE_PP_TIME_MODE;
|
||||
#else
|
||||
const int mode= isColor ? c.ppMode.chromMode : c.ppMode.lumMode;
|
||||
#endif
|
||||
@ -3177,7 +3216,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
|
||||
int QPCorrecture= 256*256;
|
||||
|
||||
int copyAhead;
|
||||
#if HAVE_MMX_INLINE
|
||||
#if TEMPLATE_PP_MMX
|
||||
int i;
|
||||
#endif
|
||||
|
||||
@ -3190,7 +3229,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
|
||||
uint8_t * const tempDst= dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride;
|
||||
//const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4;
|
||||
|
||||
#if HAVE_MMX_INLINE
|
||||
#if TEMPLATE_PP_MMX
|
||||
for(i=0; i<57; i++){
|
||||
int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
|
||||
int threshold= offset*2 + 1;
|
||||
@ -3248,7 +3287,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
|
||||
|
||||
scale= (double)(c.ppMode.maxAllowedY - c.ppMode.minAllowedY) / (double)(white-black);
|
||||
|
||||
#if HAVE_MMXEXT_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT
|
||||
c.packedYScale= (uint16_t)(scale*256.0 + 0.5);
|
||||
c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF;
|
||||
#else
|
||||
@ -3281,7 +3320,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
|
||||
// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
|
||||
for(x=0; x<width; x+=BLOCK_SIZE){
|
||||
|
||||
#if HAVE_MMXEXT_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT
|
||||
/*
|
||||
prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
|
||||
prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
|
||||
@ -3308,7 +3347,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
|
||||
: "%"REG_a, "%"REG_d
|
||||
);
|
||||
|
||||
#elif HAVE_AMD3DNOW_INLINE
|
||||
#elif TEMPLATE_PP_3DNOW
|
||||
//FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ...
|
||||
/* prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32);
|
||||
prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32);
|
||||
@ -3354,7 +3393,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
|
||||
//1% speedup if these are here instead of the inner loop
|
||||
const uint8_t *srcBlock= &(src[y*srcStride]);
|
||||
uint8_t *dstBlock= &(dst[y*dstStride]);
|
||||
#if HAVE_MMX_INLINE
|
||||
#if TEMPLATE_PP_MMX
|
||||
uint8_t *tempBlock1= c.tempBlocks;
|
||||
uint8_t *tempBlock2= c.tempBlocks + 8;
|
||||
#endif
|
||||
@ -3390,7 +3429,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
|
||||
// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
|
||||
for(x=0; x<width; x+=BLOCK_SIZE){
|
||||
const int stride= dstStride;
|
||||
#if HAVE_MMX_INLINE
|
||||
#if TEMPLATE_PP_MMX
|
||||
uint8_t *tmpXchg;
|
||||
#endif
|
||||
if(isColor){
|
||||
@ -3404,7 +3443,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
|
||||
yHistogram[ srcBlock[srcStride*12 + 4] ]++;
|
||||
}
|
||||
c.QP= QP;
|
||||
#if HAVE_MMX_INLINE
|
||||
#if TEMPLATE_PP_MMX
|
||||
__asm__ volatile(
|
||||
"movd %1, %%mm7 \n\t"
|
||||
"packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
|
||||
@ -3417,7 +3456,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
|
||||
#endif
|
||||
|
||||
|
||||
#if HAVE_MMXEXT_INLINE
|
||||
#if TEMPLATE_PP_MMXEXT
|
||||
/*
|
||||
prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
|
||||
prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
|
||||
@ -3444,7 +3483,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
|
||||
: "%"REG_a, "%"REG_d
|
||||
);
|
||||
|
||||
#elif HAVE_AMD3DNOW_INLINE
|
||||
#elif TEMPLATE_PP_3DNOW
|
||||
//FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ...
|
||||
/* prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32);
|
||||
prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32);
|
||||
@ -3488,12 +3527,12 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
|
||||
}
|
||||
}
|
||||
|
||||
#if HAVE_MMX_INLINE
|
||||
#if TEMPLATE_PP_MMX
|
||||
RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
|
||||
#endif
|
||||
/* check if we have a previous block to deblock it with dstBlock */
|
||||
if(x - 8 >= 0){
|
||||
#if HAVE_MMX_INLINE
|
||||
#if TEMPLATE_PP_MMX
|
||||
if(mode & H_X1_FILTER)
|
||||
RENAME(vertX1Filter)(tempBlock1, 16, &c);
|
||||
else if(mode & H_DEBLOCK){
|
||||
@ -3514,7 +3553,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
|
||||
if(mode & H_X1_FILTER)
|
||||
horizX1Filter(dstBlock-4, stride, QP);
|
||||
else if(mode & H_DEBLOCK){
|
||||
#if HAVE_ALTIVEC
|
||||
#if TEMPLATE_PP_ALTIVEC
|
||||
DECLARE_ALIGNED(16, unsigned char, tempBlock)[272];
|
||||
int t;
|
||||
transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride);
|
||||
@ -3539,7 +3578,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
|
||||
}else if(mode & H_A_DEBLOCK){
|
||||
RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c);
|
||||
}
|
||||
#endif //HAVE_MMX_INLINE
|
||||
#endif //TEMPLATE_PP_MMX
|
||||
if(mode & DERING){
|
||||
//FIXME filter first line
|
||||
if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c);
|
||||
@ -3557,7 +3596,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
|
||||
dstBlock+=8;
|
||||
srcBlock+=8;
|
||||
|
||||
#if HAVE_MMX_INLINE
|
||||
#if TEMPLATE_PP_MMX
|
||||
tmpXchg= tempBlock1;
|
||||
tempBlock1= tempBlock2;
|
||||
tempBlock2 = tmpXchg;
|
||||
@ -3597,9 +3636,9 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
|
||||
+ dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride];
|
||||
}*/
|
||||
}
|
||||
#if HAVE_AMD3DNOW_INLINE
|
||||
#if TEMPLATE_PP_3DNOW
|
||||
__asm__ volatile("femms");
|
||||
#elif HAVE_MMX_INLINE
|
||||
#elif TEMPLATE_PP_MMX
|
||||
__asm__ volatile("emms");
|
||||
#endif
|
||||
|
||||
@ -3629,3 +3668,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
|
||||
*c2= c; //copy local context back
|
||||
|
||||
}
|
||||
|
||||
#undef RENAME
|
||||
#undef TEMPLATE_PP_C
|
||||
#undef TEMPLATE_PP_ALTIVEC
|
||||
#undef TEMPLATE_PP_MMX
|
||||
#undef TEMPLATE_PP_MMXEXT
|
||||
#undef TEMPLATE_PP_3DNOW
|
||||
|
Loading…
Reference in New Issue
Block a user