1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-11-21 10:55:51 +02:00

libavcodec/mips: Fix build errors reported by clang

Clang is more strict on the type of asm operands, float or double
type variable should use constraint 'f', integer variable should
use constraint 'r'.

Signed-off-by: Jin Bo <jinbo@loongson.cn>
Reviewed-by: yinshiyou-hf@loongson.cn
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
This commit is contained in:
Jin Bo 2021-06-01 14:22:09 +08:00 committed by Michael Niedermayer
parent 864d1ef2fc
commit fd5fd48659
12 changed files with 536 additions and 403 deletions

View File

@ -19,50 +19,49 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
#include "config.h" #include "libavutil/intfloat.h"
#include "libavutil/mem_internal.h"
#include "constants.h" #include "constants.h"
DECLARE_ALIGNED(8, const uint64_t, ff_pw_1) = {0x0001000100010001ULL}; const union av_intfloat64 ff_pw_1 = {0x0001000100010001ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_2) = {0x0002000200020002ULL}; const union av_intfloat64 ff_pw_2 = {0x0002000200020002ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_3) = {0x0003000300030003ULL}; const union av_intfloat64 ff_pw_3 = {0x0003000300030003ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_4) = {0x0004000400040004ULL}; const union av_intfloat64 ff_pw_4 = {0x0004000400040004ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_5) = {0x0005000500050005ULL}; const union av_intfloat64 ff_pw_5 = {0x0005000500050005ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_6) = {0x0006000600060006ULL}; const union av_intfloat64 ff_pw_6 = {0x0006000600060006ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_8) = {0x0008000800080008ULL}; const union av_intfloat64 ff_pw_8 = {0x0008000800080008ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_9) = {0x0009000900090009ULL}; const union av_intfloat64 ff_pw_9 = {0x0009000900090009ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_10) = {0x000A000A000A000AULL}; const union av_intfloat64 ff_pw_10 = {0x000A000A000A000AULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_12) = {0x000C000C000C000CULL}; const union av_intfloat64 ff_pw_12 = {0x000C000C000C000CULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_15) = {0x000F000F000F000FULL}; const union av_intfloat64 ff_pw_15 = {0x000F000F000F000FULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_16) = {0x0010001000100010ULL}; const union av_intfloat64 ff_pw_16 = {0x0010001000100010ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_17) = {0x0011001100110011ULL}; const union av_intfloat64 ff_pw_17 = {0x0011001100110011ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_18) = {0x0012001200120012ULL}; const union av_intfloat64 ff_pw_18 = {0x0012001200120012ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_20) = {0x0014001400140014ULL}; const union av_intfloat64 ff_pw_20 = {0x0014001400140014ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_22) = {0x0016001600160016ULL}; const union av_intfloat64 ff_pw_22 = {0x0016001600160016ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_28) = {0x001C001C001C001CULL}; const union av_intfloat64 ff_pw_28 = {0x001C001C001C001CULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_32) = {0x0020002000200020ULL}; const union av_intfloat64 ff_pw_32 = {0x0020002000200020ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_53) = {0x0035003500350035ULL}; const union av_intfloat64 ff_pw_53 = {0x0035003500350035ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_64) = {0x0040004000400040ULL}; const union av_intfloat64 ff_pw_64 = {0x0040004000400040ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = {0x0080008000800080ULL}; const union av_intfloat64 ff_pw_128 = {0x0080008000800080ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_512) = {0x0200020002000200ULL}; const union av_intfloat64 ff_pw_512 = {0x0200020002000200ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_m8tom5) = {0xFFFBFFFAFFF9FFF8ULL}; const union av_intfloat64 ff_pw_m8tom5 = {0xFFFBFFFAFFF9FFF8ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_m4tom1) = {0xFFFFFFFEFFFDFFFCULL}; const union av_intfloat64 ff_pw_m4tom1 = {0xFFFFFFFEFFFDFFFCULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_1to4) = {0x0004000300020001ULL}; const union av_intfloat64 ff_pw_1to4 = {0x0004000300020001ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_5to8) = {0x0008000700060005ULL}; const union av_intfloat64 ff_pw_5to8 = {0x0008000700060005ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_0to3) = {0x0003000200010000ULL}; const union av_intfloat64 ff_pw_0to3 = {0x0003000200010000ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_4to7) = {0x0007000600050004ULL}; const union av_intfloat64 ff_pw_4to7 = {0x0007000600050004ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_8tob) = {0x000b000a00090008ULL}; const union av_intfloat64 ff_pw_8tob = {0x000b000a00090008ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_ctof) = {0x000f000e000d000cULL}; const union av_intfloat64 ff_pw_ctof = {0x000f000e000d000cULL};
const union av_intfloat64 ff_pw_32_1 = {0x0000000100000001ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pb_1) = {0x0101010101010101ULL}; const union av_intfloat64 ff_pw_32_4 = {0x0000000400000004ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pb_3) = {0x0303030303030303ULL}; const union av_intfloat64 ff_pw_32_64 = {0x0000004000000040ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pb_80) = {0x8080808080808080ULL}; const union av_intfloat64 ff_pb_1 = {0x0101010101010101ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1) = {0xA1A1A1A1A1A1A1A1ULL}; const union av_intfloat64 ff_pb_3 = {0x0303030303030303ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pb_FE) = {0xFEFEFEFEFEFEFEFEULL}; const union av_intfloat64 ff_pb_80 = {0x8080808080808080ULL};
const union av_intfloat64 ff_pb_A1 = {0xA1A1A1A1A1A1A1A1ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_rnd) = {0x0004000400040004ULL}; const union av_intfloat64 ff_pb_FE = {0xFEFEFEFEFEFEFEFEULL};
DECLARE_ALIGNED(8, const uint64_t, ff_rnd2) = {0x0040004000400040ULL}; const union av_intfloat64 ff_rnd = {0x0004000400040004ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_rnd3) = {0x0020002000200020ULL}; const union av_intfloat64 ff_rnd2 = {0x0040004000400040ULL};
const union av_intfloat64 ff_rnd3 = {0x0020002000200020ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_wm1010) = {0xFFFF0000FFFF0000ULL}; const union av_intfloat64 ff_ff_wm1010 = {0xFFFF0000FFFF0000ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_d40000) = {0x0000000000040000ULL}; const union av_intfloat64 ff_d40000 = {0x0000000000040000ULL};

View File

@ -22,50 +22,48 @@
#ifndef AVCODEC_MIPS_CONSTANTS_H #ifndef AVCODEC_MIPS_CONSTANTS_H
#define AVCODEC_MIPS_CONSTANTS_H #define AVCODEC_MIPS_CONSTANTS_H
#include <stdint.h> extern const union av_intfloat64 ff_pw_1;
extern const union av_intfloat64 ff_pw_2;
extern const uint64_t ff_pw_1; extern const union av_intfloat64 ff_pw_3;
extern const uint64_t ff_pw_2; extern const union av_intfloat64 ff_pw_4;
extern const uint64_t ff_pw_3; extern const union av_intfloat64 ff_pw_5;
extern const uint64_t ff_pw_4; extern const union av_intfloat64 ff_pw_6;
extern const uint64_t ff_pw_5; extern const union av_intfloat64 ff_pw_8;
extern const uint64_t ff_pw_6; extern const union av_intfloat64 ff_pw_9;
extern const uint64_t ff_pw_8; extern const union av_intfloat64 ff_pw_10;
extern const uint64_t ff_pw_9; extern const union av_intfloat64 ff_pw_12;
extern const uint64_t ff_pw_10; extern const union av_intfloat64 ff_pw_15;
extern const uint64_t ff_pw_12; extern const union av_intfloat64 ff_pw_16;
extern const uint64_t ff_pw_15; extern const union av_intfloat64 ff_pw_17;
extern const uint64_t ff_pw_16; extern const union av_intfloat64 ff_pw_18;
extern const uint64_t ff_pw_17; extern const union av_intfloat64 ff_pw_20;
extern const uint64_t ff_pw_18; extern const union av_intfloat64 ff_pw_22;
extern const uint64_t ff_pw_20; extern const union av_intfloat64 ff_pw_28;
extern const uint64_t ff_pw_22; extern const union av_intfloat64 ff_pw_32;
extern const uint64_t ff_pw_28; extern const union av_intfloat64 ff_pw_53;
extern const uint64_t ff_pw_32; extern const union av_intfloat64 ff_pw_64;
extern const uint64_t ff_pw_53; extern const union av_intfloat64 ff_pw_128;
extern const uint64_t ff_pw_64; extern const union av_intfloat64 ff_pw_512;
extern const uint64_t ff_pw_128; extern const union av_intfloat64 ff_pw_m8tom5;
extern const uint64_t ff_pw_512; extern const union av_intfloat64 ff_pw_m4tom1;
extern const uint64_t ff_pw_m8tom5; extern const union av_intfloat64 ff_pw_1to4;
extern const uint64_t ff_pw_m4tom1; extern const union av_intfloat64 ff_pw_5to8;
extern const uint64_t ff_pw_1to4; extern const union av_intfloat64 ff_pw_0to3;
extern const uint64_t ff_pw_5to8; extern const union av_intfloat64 ff_pw_4to7;
extern const uint64_t ff_pw_0to3; extern const union av_intfloat64 ff_pw_8tob;
extern const uint64_t ff_pw_4to7; extern const union av_intfloat64 ff_pw_ctof;
extern const uint64_t ff_pw_8tob; extern const union av_intfloat64 ff_pw_32_1;
extern const uint64_t ff_pw_ctof; extern const union av_intfloat64 ff_pw_32_4;
extern const union av_intfloat64 ff_pw_32_64;
extern const uint64_t ff_pb_1; extern const union av_intfloat64 ff_pb_1;
extern const uint64_t ff_pb_3; extern const union av_intfloat64 ff_pb_3;
extern const uint64_t ff_pb_80; extern const union av_intfloat64 ff_pb_80;
extern const uint64_t ff_pb_A1; extern const union av_intfloat64 ff_pb_A1;
extern const uint64_t ff_pb_FE; extern const union av_intfloat64 ff_pb_FE;
extern const union av_intfloat64 ff_rnd;
extern const uint64_t ff_rnd; extern const union av_intfloat64 ff_rnd2;
extern const uint64_t ff_rnd2; extern const union av_intfloat64 ff_rnd3;
extern const uint64_t ff_rnd3; extern const union av_intfloat64 ff_wm1010;
extern const union av_intfloat64 ff_d40000;
extern const uint64_t ff_wm1010;
extern const uint64_t ff_d40000;
#endif /* AVCODEC_MIPS_CONSTANTS_H */ #endif /* AVCODEC_MIPS_CONSTANTS_H */

View File

@ -29,12 +29,12 @@
void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
int h, int x, int y) int h, int x, int y)
{ {
int A = 64, B, C, D, E;
double ftmp[12]; double ftmp[12];
uint64_t tmp[1]; union mmi_intfloat64 A, B, C, D, E;
A.i = 64;
if (!(x || y)) { if (!(x || y)) {
/* x=0, y=0, A=64 */ /* x=0, y=0, A.i=64 */
__asm__ volatile ( __asm__ volatile (
"1: \n\t" "1: \n\t"
MMI_ULDC1(%[ftmp0], %[src], 0x00) MMI_ULDC1(%[ftmp0], %[src], 0x00)
@ -66,14 +66,13 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
); );
} else if (x && y) { } else if (x && y) {
/* x!=0, y!=0 */ /* x!=0, y!=0 */
D = x * y; D.i = x * y;
B = (x << 3) - D; B.i = (x << 3) - D.i;
C = (y << 3) - D; C.i = (y << 3) - D.i;
A = 64 - D - B - C; A.i = 64 - D.i - B.i - C.i;
__asm__ volatile ( __asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"dli %[tmp0], 0x06 \n\t"
"pshufh %[A], %[A], %[ftmp0] \n\t" "pshufh %[A], %[A], %[ftmp0] \n\t"
"pshufh %[B], %[B], %[ftmp0] \n\t" "pshufh %[B], %[B], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp9] \n\t" "mtc1 %[tmp0], %[ftmp9] \n\t"
@ -158,22 +157,21 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[tmp0]"=&r"(tmp[0]),
[dst]"+&r"(dst), [src]"+&r"(src), [dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h) [h]"+&r"(h)
: [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32), : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32.f),
[A]"f"(A), [B]"f"(B), [A]"f"(A.f), [B]"f"(B.f),
[C]"f"(C), [D]"f"(D) [C]"f"(C.f), [D]"f"(D.f),
[tmp0]"r"(0x06)
: "memory" : "memory"
); );
} else if (x) { } else if (x) {
/* x!=0, y==0 */ /* x!=0, y==0 */
E = x << 3; E.i = x << 3;
A = 64 - E; A.i = 64 - E.i;
__asm__ volatile ( __asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"dli %[tmp0], 0x06 \n\t"
"pshufh %[A], %[A], %[ftmp0] \n\t" "pshufh %[A], %[A], %[ftmp0] \n\t"
"pshufh %[E], %[E], %[ftmp0] \n\t" "pshufh %[E], %[E], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp7] \n\t" "mtc1 %[tmp0], %[ftmp7] \n\t"
@ -207,22 +205,20 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[tmp0]"=&r"(tmp[0]),
[dst]"+&r"(dst), [src]"+&r"(src), [dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h) [h]"+&r"(h)
: [stride]"r"((mips_reg)stride), : [stride]"r"((mips_reg)stride),
[ff_pw_32]"f"(ff_pw_32), [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06),
[A]"f"(A), [E]"f"(E) [A]"f"(A.f), [E]"f"(E.f)
: "memory" : "memory"
); );
} else { } else {
/* x==0, y!=0 */ /* x==0, y!=0 */
E = y << 3; E.i = y << 3;
A = 64 - E; A.i = 64 - E.i;
__asm__ volatile ( __asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"dli %[tmp0], 0x06 \n\t"
"pshufh %[A], %[A], %[ftmp0] \n\t" "pshufh %[A], %[A], %[ftmp0] \n\t"
"pshufh %[E], %[E], %[ftmp0] \n\t" "pshufh %[E], %[E], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp7] \n\t" "mtc1 %[tmp0], %[ftmp7] \n\t"
@ -276,12 +272,12 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[ftmp8]"=&f"(ftmp[8]), [tmp0]"=&r"(tmp[0]), [ftmp8]"=&f"(ftmp[8]),
[dst]"+&r"(dst), [src]"+&r"(src), [dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h) [h]"+&r"(h)
: [stride]"r"((mips_reg)stride), : [stride]"r"((mips_reg)stride),
[ff_pw_32]"f"(ff_pw_32), [ff_pw_32]"f"(ff_pw_32.f), [A]"f"(A.f),
[A]"f"(A), [E]"f"(E) [E]"f"(E.f), [tmp0]"r"(0x06)
: "memory" : "memory"
); );
} }
@ -290,12 +286,12 @@ void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
int h, int x, int y) int h, int x, int y)
{ {
int A = 64, B, C, D, E;
double ftmp[10]; double ftmp[10];
uint64_t tmp[1]; union mmi_intfloat64 A, B, C, D, E;
A.i = 64;
if(!(x || y)){ if(!(x || y)){
/* x=0, y=0, A=64 */ /* x=0, y=0, A.i=64 */
__asm__ volatile ( __asm__ volatile (
"1: \n\t" "1: \n\t"
MMI_ULDC1(%[ftmp0], %[src], 0x00) MMI_ULDC1(%[ftmp0], %[src], 0x00)
@ -323,13 +319,12 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
); );
} else if (x && y) { } else if (x && y) {
/* x!=0, y!=0 */ /* x!=0, y!=0 */
D = x * y; D.i = x * y;
B = (x << 3) - D; B.i = (x << 3) - D.i;
C = (y << 3) - D; C.i = (y << 3) - D.i;
A = 64 - D - B - C; A.i = 64 - D.i - B.i - C.i;
__asm__ volatile ( __asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"dli %[tmp0], 0x06 \n\t"
"pshufh %[A], %[A], %[ftmp0] \n\t" "pshufh %[A], %[A], %[ftmp0] \n\t"
"pshufh %[B], %[B], %[ftmp0] \n\t" "pshufh %[B], %[B], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp9] \n\t" "mtc1 %[tmp0], %[ftmp9] \n\t"
@ -383,21 +378,20 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
[tmp0]"=&r"(tmp[0]),
[dst]"+&r"(dst), [src]"+&r"(src), [dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h) [h]"+&r"(h)
: [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32), : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32.f),
[A]"f"(A), [B]"f"(B), [A]"f"(A.f), [B]"f"(B.f),
[C]"f"(C), [D]"f"(D) [C]"f"(C.f), [D]"f"(D.f),
[tmp0]"r"(0x06)
: "memory" : "memory"
); );
} else if (x) { } else if (x) {
/* x!=0, y==0 */ /* x!=0, y==0 */
E = x << 3; E.i = x << 3;
A = 64 - E; A.i = 64 - E.i;
__asm__ volatile ( __asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"dli %[tmp0], 0x06 \n\t"
"pshufh %[A], %[A], %[ftmp0] \n\t" "pshufh %[A], %[A], %[ftmp0] \n\t"
"pshufh %[E], %[E], %[ftmp0] \n\t" "pshufh %[E], %[E], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp7] \n\t" "mtc1 %[tmp0], %[ftmp7] \n\t"
@ -433,21 +427,19 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[tmp0]"=&r"(tmp[0]),
[dst]"+&r"(dst), [src]"+&r"(src), [dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h) [h]"+&r"(h)
: [stride]"r"((mips_reg)stride), : [stride]"r"((mips_reg)stride),
[ff_pw_32]"f"(ff_pw_32), [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06),
[A]"f"(A), [E]"f"(E) [A]"f"(A.f), [E]"f"(E.f)
: "memory" : "memory"
); );
} else { } else {
/* x==0, y!=0 */ /* x==0, y!=0 */
E = y << 3; E.i = y << 3;
A = 64 - E; A.i = 64 - E.i;
__asm__ volatile ( __asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"dli %[tmp0], 0x06 \n\t"
"pshufh %[A], %[A], %[ftmp0] \n\t" "pshufh %[A], %[A], %[ftmp0] \n\t"
"pshufh %[E], %[E], %[ftmp0] \n\t" "pshufh %[E], %[E], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp7] \n\t" "mtc1 %[tmp0], %[ftmp7] \n\t"
@ -469,8 +461,8 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
"pmullh %[ftmp6], %[ftmp6], %[E] \n\t" "pmullh %[ftmp6], %[ftmp6], %[E] \n\t"
"paddh %[ftmp2], %[ftmp4], %[ftmp6] \n\t" "paddh %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
"paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t" "paddh %[ftmp1], %[ftmp1], %[ff_pw_32] \n\t"
"paddh %[ftmp2], %[ftmp2], %[ff_pw_32] \n\t" "paddh %[ftmp2], %[ftmp2], %[ff_pw_32] \n\t"
"psrlh %[ftmp1], %[ftmp1], %[ftmp7] \n\t" "psrlh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
"psrlh %[ftmp2], %[ftmp2], %[ftmp7] \n\t" "psrlh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
"packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
@ -483,12 +475,11 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[tmp0]"=&r"(tmp[0]),
[dst]"+&r"(dst), [src]"+&r"(src), [dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h) [h]"+&r"(h)
: [stride]"r"((mips_reg)stride), : [stride]"r"((mips_reg)stride),
[ff_pw_32]"f"(ff_pw_32), [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06),
[A]"f"(A), [E]"f"(E) [A]"f"(A.f), [E]"f"(E.f)
: "memory" : "memory"
); );
} }
@ -497,20 +488,19 @@ void ff_avg_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
int h, int x, int y) int h, int x, int y)
{ {
const int A = (8 - x) * (8 - y);
const int B = x * (8 - y);
const int C = (8 - x) * y;
const int D = x * y;
const int E = B + C;
double ftmp[8]; double ftmp[8];
uint64_t tmp[1];
mips_reg addr[1]; mips_reg addr[1];
union mmi_intfloat64 A, B, C, D, E;
DECLARE_VAR_LOW32; DECLARE_VAR_LOW32;
A.i = (8 - x) * (8 - y);
B.i = x * (8 - y);
C.i = (8 - x) * y;
D.i = x * y;
E.i = B.i + C.i;
if (D) { if (D.i) {
__asm__ volatile ( __asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"dli %[tmp0], 0x06 \n\t"
"pshufh %[A], %[A], %[ftmp0] \n\t" "pshufh %[A], %[A], %[ftmp0] \n\t"
"pshufh %[B], %[B], %[ftmp0] \n\t" "pshufh %[B], %[B], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp7] \n\t" "mtc1 %[tmp0], %[ftmp7] \n\t"
@ -547,20 +537,19 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[tmp0]"=&r"(tmp[0]),
RESTRICT_ASM_LOW32 RESTRICT_ASM_LOW32
[dst]"+&r"(dst), [src]"+&r"(src), [dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h) [h]"+&r"(h)
: [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32), : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32.f),
[A]"f"(A), [B]"f"(B), [A]"f"(A.f), [B]"f"(B.f),
[C]"f"(C), [D]"f"(D) [C]"f"(C.f), [D]"f"(D.f),
[tmp0]"r"(0x06)
: "memory" : "memory"
); );
} else if (E) { } else if (E.i) {
const int step = C ? stride : 1; const int step = C.i ? stride : 1;
__asm__ volatile ( __asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"dli %[tmp0], 0x06 \n\t"
"pshufh %[A], %[A], %[ftmp0] \n\t" "pshufh %[A], %[A], %[ftmp0] \n\t"
"pshufh %[E], %[E], %[ftmp0] \n\t" "pshufh %[E], %[E], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp5] \n\t" "mtc1 %[tmp0], %[ftmp5] \n\t"
@ -585,14 +574,13 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[tmp0]"=&r"(tmp[0]),
RESTRICT_ASM_LOW32 RESTRICT_ASM_LOW32
[addr0]"=&r"(addr[0]), [addr0]"=&r"(addr[0]),
[dst]"+&r"(dst), [src]"+&r"(src), [dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h) [h]"+&r"(h)
: [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step), : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step),
[ff_pw_32]"f"(ff_pw_32), [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06),
[A]"f"(A), [E]"f"(E) [A]"f"(A.f), [E]"f"(E.f)
: "memory" : "memory"
); );
} else { } else {
@ -621,20 +609,19 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
int h, int x, int y) int h, int x, int y)
{ {
const int A = (8 - x) *(8 - y);
const int B = x * (8 - y);
const int C = (8 - x) * y;
const int D = x * y;
const int E = B + C;
double ftmp[8]; double ftmp[8];
uint64_t tmp[1];
mips_reg addr[1]; mips_reg addr[1];
union mmi_intfloat64 A, B, C, D, E;
DECLARE_VAR_LOW32; DECLARE_VAR_LOW32;
A.i = (8 - x) *(8 - y);
B.i = x * (8 - y);
C.i = (8 - x) * y;
D.i = x * y;
E.i = B.i + C.i;
if (D) { if (D.i) {
__asm__ volatile ( __asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"dli %[tmp0], 0x06 \n\t"
"pshufh %[A], %[A], %[ftmp0] \n\t" "pshufh %[A], %[A], %[ftmp0] \n\t"
"pshufh %[B], %[B], %[ftmp0] \n\t" "pshufh %[B], %[B], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp7] \n\t" "mtc1 %[tmp0], %[ftmp7] \n\t"
@ -673,20 +660,19 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
[tmp0]"=&r"(tmp[0]),
RESTRICT_ASM_LOW32 RESTRICT_ASM_LOW32
[dst]"+&r"(dst), [src]"+&r"(src), [dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h) [h]"+&r"(h)
: [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32), : [stride]"r"((mips_reg)stride),[ff_pw_32]"f"(ff_pw_32.f),
[A]"f"(A), [B]"f"(B), [A]"f"(A.f), [B]"f"(B.f),
[C]"f"(C), [D]"f"(D) [C]"f"(C.f), [D]"f"(D.f),
[tmp0]"r"(0x06)
: "memory" : "memory"
); );
} else if (E) { } else if (E.i) {
const int step = C ? stride : 1; const int step = C.i ? stride : 1;
__asm__ volatile ( __asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
"dli %[tmp0], 0x06 \n\t"
"pshufh %[A], %[A], %[ftmp0] \n\t" "pshufh %[A], %[A], %[ftmp0] \n\t"
"pshufh %[E], %[E], %[ftmp0] \n\t" "pshufh %[E], %[E], %[ftmp0] \n\t"
"mtc1 %[tmp0], %[ftmp5] \n\t" "mtc1 %[tmp0], %[ftmp5] \n\t"
@ -713,14 +699,13 @@ void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
: [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
[ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
[ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
[tmp0]"=&r"(tmp[0]),
RESTRICT_ASM_LOW32 RESTRICT_ASM_LOW32
[addr0]"=&r"(addr[0]), [addr0]"=&r"(addr[0]),
[dst]"+&r"(dst), [src]"+&r"(src), [dst]"+&r"(dst), [src]"+&r"(src),
[h]"+&r"(h) [h]"+&r"(h)
: [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step), : [stride]"r"((mips_reg)stride),[step]"r"((mips_reg)step),
[ff_pw_32]"f"(ff_pw_32), [ff_pw_32]"f"(ff_pw_32.f), [tmp0]"r"(0x06),
[A]"f"(A), [E]"f"(E) [A]"f"(A.f), [E]"f"(E.f)
: "memory" : "memory"
); );
} else { } else {

View File

@ -162,7 +162,7 @@ void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
RESTRICT_ASM_ADDRT RESTRICT_ASM_ADDRT
[tmp0]"=&r"(tmp[0]) [tmp0]"=&r"(tmp[0])
: [dst]"r"(dst), [block]"r"(block), : [dst]"r"(dst), [block]"r"(block),
[stride]"r"((mips_reg)stride), [ff_pw_32]"f"(ff_pw_32) [stride]"r"((mips_reg)stride), [ff_pw_32]"f"(ff_pw_32.f)
: "memory" : "memory"
); );
@ -1078,7 +1078,7 @@ void ff_h264_luma_dc_dequant_idct_8_mmi(int16_t *output, int16_t *input,
RESTRICT_ASM_ALL64 RESTRICT_ASM_ALL64
[output]"+&r"(output), [input]"+&r"(input), [output]"+&r"(output), [input]"+&r"(input),
[qmul]"+&r"(qmul) [qmul]"+&r"(qmul)
: [ff_pw_1]"f"(ff_pw_1) : [ff_pw_1]"f"(ff_pw_1.f)
: "memory" : "memory"
); );
} }
@ -1556,8 +1556,8 @@ void ff_deblock_v8_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int bet
[addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]) [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
: [pix]"r"(pix), [stride]"r"((mips_reg)stride), : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
[alpha]"r"((mips_reg)alpha), [beta]"r"((mips_reg)beta), [alpha]"r"((mips_reg)alpha), [beta]"r"((mips_reg)beta),
[tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1), [tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1.f),
[ff_pb_3]"f"(ff_pb_3), [ff_pb_A1]"f"(ff_pb_A1) [ff_pb_3]"f"(ff_pb_3.f), [ff_pb_A1]"f"(ff_pb_A1.f)
: "memory" : "memory"
); );
} }
@ -1866,8 +1866,8 @@ void ff_deblock_v_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
[addr0]"=&r"(addr[0]) [addr0]"=&r"(addr[0])
: [pix]"r"(pix), [stride]"r"((mips_reg)stride), : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
[alpha]"r"(alpha), [beta]"r"(beta), [alpha]"r"(alpha), [beta]"r"(beta),
[tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1), [tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1.f),
[ff_pb_3]"f"(ff_pb_3), [ff_pb_A1]"f"(ff_pb_A1) [ff_pb_3]"f"(ff_pb_3.f), [ff_pb_A1]"f"(ff_pb_A1.f)
: "memory" : "memory"
); );
} }
@ -1945,7 +1945,7 @@ void ff_deblock_v_chroma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
[addr0]"=&r"(addr[0]) [addr0]"=&r"(addr[0])
: [pix]"r"(pix), [stride]"r"((mips_reg)stride), : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
[alpha]"r"(alpha), [beta]"r"(beta), [alpha]"r"(alpha), [beta]"r"(beta),
[ff_pb_1]"f"(ff_pb_1) [ff_pb_1]"f"(ff_pb_1.f)
: "memory" : "memory"
); );
} }
@ -2084,8 +2084,8 @@ void ff_deblock_h_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int be
[pix]"+&r"(pix) [pix]"+&r"(pix)
: [alpha]"r"(alpha), [beta]"r"(beta), : [alpha]"r"(alpha), [beta]"r"(beta),
[stride]"r"((mips_reg)stride), [tc0]"r"(tc0), [stride]"r"((mips_reg)stride), [tc0]"r"(tc0),
[ff_pb_1]"f"(ff_pb_1), [ff_pb_3]"f"(ff_pb_3), [ff_pb_1]"f"(ff_pb_1.f), [ff_pb_3]"f"(ff_pb_3.f),
[ff_pb_A1]"f"(ff_pb_A1) [ff_pb_A1]"f"(ff_pb_A1.f)
: "memory" : "memory"
); );
} }
@ -2218,7 +2218,7 @@ void ff_deblock_h_chroma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
[addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]), [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
[pix]"+&r"(pix) [pix]"+&r"(pix)
: [alpha]"r"(alpha), [beta]"r"(beta), : [alpha]"r"(alpha), [beta]"r"(beta),
[stride]"r"((mips_reg)stride), [ff_pb_1]"f"(ff_pb_1) [stride]"r"((mips_reg)stride), [ff_pb_1]"f"(ff_pb_1.f)
: "memory" : "memory"
); );
} }

View File

@ -155,9 +155,9 @@ void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft, void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft,
int has_topright, ptrdiff_t stride) int has_topright, ptrdiff_t stride)
{ {
uint32_t dc;
double ftmp[11]; double ftmp[11];
mips_reg tmp[3]; mips_reg tmp[3];
union av_intfloat64 dc;
DECLARE_VAR_ALL64; DECLARE_VAR_ALL64;
DECLARE_VAR_ADDRT; DECLARE_VAR_ADDRT;
@ -209,12 +209,12 @@ void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft,
[ftmp10]"=&f"(ftmp[10]), [ftmp10]"=&f"(ftmp[10]),
[tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
RESTRICT_ASM_ALL64 RESTRICT_ASM_ALL64
[dc]"=r"(dc) [dc]"=r"(dc.i)
: [srcA]"r"((mips_reg)(src-stride-1)), : [srcA]"r"((mips_reg)(src-stride-1)),
[src0]"r"((mips_reg)(src-stride)), [src0]"r"((mips_reg)(src-stride)),
[src1]"r"((mips_reg)(src-stride+1)), [src1]"r"((mips_reg)(src-stride+1)),
[has_topleft]"r"(has_topleft), [has_topright]"r"(has_topright), [has_topleft]"r"(has_topleft), [has_topright]"r"(has_topright),
[ff_pb_1]"r"(ff_pb_1), [ff_pw_2]"f"(ff_pw_2) [ff_pb_1]"r"(ff_pb_1.i), [ff_pw_2]"f"(ff_pw_2.f)
: "memory" : "memory"
); );
@ -238,7 +238,7 @@ void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft,
RESTRICT_ASM_ALL64 RESTRICT_ASM_ALL64
RESTRICT_ASM_ADDRT RESTRICT_ASM_ADDRT
[src]"+&r"(src) [src]"+&r"(src)
: [dc]"f"(dc), [stride]"r"((mips_reg)stride) : [dc]"f"(dc.f), [stride]"r"((mips_reg)stride)
: "memory" : "memory"
); );
} }
@ -246,9 +246,10 @@ void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft,
void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright, void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright,
ptrdiff_t stride) ptrdiff_t stride)
{ {
uint32_t dc, dc1, dc2; uint32_t dc1, dc2;
double ftmp[14]; double ftmp[14];
mips_reg tmp[1]; mips_reg tmp[1];
union av_intfloat64 dc;
const int l0 = ((has_topleft ? src[-1+-1*stride] : src[-1+0*stride]) + 2*src[-1+0*stride] + src[-1+1*stride] + 2) >> 2; const int l0 = ((has_topleft ? src[-1+-1*stride] : src[-1+0*stride]) + 2*src[-1+0*stride] + src[-1+1*stride] + 2) >> 2;
const int l1 = (src[-1+0*stride] + 2*src[-1+1*stride] + src[-1+2*stride] + 2) >> 2; const int l1 = (src[-1+0*stride] + 2*src[-1+1*stride] + src[-1+2*stride] + 2) >> 2;
@ -322,7 +323,7 @@ void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright,
); );
dc1 = l0+l1+l2+l3+l4+l5+l6+l7; dc1 = l0+l1+l2+l3+l4+l5+l6+l7;
dc = ((dc1+dc2+8)>>4)*0x01010101U; dc.i = ((dc1+dc2+8)>>4)*0x01010101U;
__asm__ volatile ( __asm__ volatile (
"dli %[tmp0], 0x02 \n\t" "dli %[tmp0], 0x02 \n\t"
@ -344,7 +345,7 @@ void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright,
RESTRICT_ASM_ALL64 RESTRICT_ASM_ALL64
RESTRICT_ASM_ADDRT RESTRICT_ASM_ADDRT
[src]"+&r"(src) [src]"+&r"(src)
: [dc]"f"(dc), [stride]"r"((mips_reg)stride) : [dc]"f"(dc.f), [stride]"r"((mips_reg)stride)
: "memory" : "memory"
); );
} }
@ -965,10 +966,10 @@ static inline void pred16x16_plane_compat_mmi(uint8_t *src, int stride,
[addr0]"=&r"(addr[0]) [addr0]"=&r"(addr[0])
: [src]"r"(src), [stride]"r"((mips_reg)stride), : [src]"r"(src), [stride]"r"((mips_reg)stride),
[svq3]"r"(svq3), [rv40]"r"(rv40), [svq3]"r"(svq3), [rv40]"r"(rv40),
[ff_pw_m8tom5]"f"(ff_pw_m8tom5), [ff_pw_m4tom1]"f"(ff_pw_m4tom1), [ff_pw_m8tom5]"f"(ff_pw_m8tom5.f),[ff_pw_m4tom1]"f"(ff_pw_m4tom1.f),
[ff_pw_1to4]"f"(ff_pw_1to4), [ff_pw_5to8]"f"(ff_pw_5to8), [ff_pw_1to4]"f"(ff_pw_1to4.f), [ff_pw_5to8]"f"(ff_pw_5to8.f),
[ff_pw_0to3]"f"(ff_pw_0to3), [ff_pw_4to7]"r"(ff_pw_4to7), [ff_pw_0to3]"f"(ff_pw_0to3.f), [ff_pw_4to7]"r"(ff_pw_4to7.i),
[ff_pw_8tob]"r"(ff_pw_8tob), [ff_pw_ctof]"r"(ff_pw_ctof) [ff_pw_8tob]"r"(ff_pw_8tob.i), [ff_pw_ctof]"r"(ff_pw_ctof.i)
: "memory" : "memory"
); );
} }

View File

@ -155,8 +155,8 @@ static void put_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[dst]"+&r"(dst), [src]"+&r"(src) [dst]"+&r"(dst), [src]"+&r"(src)
: [dstStride]"r"((mips_reg)dstStride), : [dstStride]"r"((mips_reg)dstStride),
[srcStride]"r"((mips_reg)srcStride), [srcStride]"r"((mips_reg)srcStride),
[ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5), [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f),
[ff_pw_16]"f"(ff_pw_16) [ff_pw_16]"f"(ff_pw_16.f)
: "memory" : "memory"
); );
} }
@ -225,8 +225,8 @@ static void put_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[dst]"+&r"(dst), [src]"+&r"(src) [dst]"+&r"(dst), [src]"+&r"(src)
: [dstStride]"r"((mips_reg)dstStride), : [dstStride]"r"((mips_reg)dstStride),
[srcStride]"r"((mips_reg)srcStride), [srcStride]"r"((mips_reg)srcStride),
[ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5), [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f),
[ff_pw_16]"f"(ff_pw_16) [ff_pw_16]"f"(ff_pw_16.f)
: "memory" : "memory"
); );
} }
@ -293,8 +293,8 @@ static void avg_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[dst]"+&r"(dst), [src]"+&r"(src) [dst]"+&r"(dst), [src]"+&r"(src)
: [dstStride]"r"((mips_reg)dstStride), : [dstStride]"r"((mips_reg)dstStride),
[srcStride]"r"((mips_reg)srcStride), [srcStride]"r"((mips_reg)srcStride),
[ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5), [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f),
[ff_pw_16]"f"(ff_pw_16) [ff_pw_16]"f"(ff_pw_16.f)
: "memory" : "memory"
); );
} }
@ -365,8 +365,8 @@ static void avg_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[dst]"+&r"(dst), [src]"+&r"(src) [dst]"+&r"(dst), [src]"+&r"(src)
: [dstStride]"r"((mips_reg)dstStride), : [dstStride]"r"((mips_reg)dstStride),
[srcStride]"r"((mips_reg)srcStride), [srcStride]"r"((mips_reg)srcStride),
[ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5), [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f),
[ff_pw_16]"f"(ff_pw_16) [ff_pw_16]"f"(ff_pw_16.f)
: "memory" : "memory"
); );
} }
@ -486,7 +486,7 @@ static void put_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[dst]"+&r"(dst), [src]"+&r"(src) [dst]"+&r"(dst), [src]"+&r"(src)
: [dstStride]"r"((mips_reg)dstStride), : [dstStride]"r"((mips_reg)dstStride),
[srcStride]"r"((mips_reg)srcStride), [srcStride]"r"((mips_reg)srcStride),
[ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16) [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
: "memory" : "memory"
); );
} }
@ -780,7 +780,7 @@ static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[h]"+&r"(h) [h]"+&r"(h)
: [dstStride]"r"((mips_reg)dstStride), : [dstStride]"r"((mips_reg)dstStride),
[srcStride]"r"((mips_reg)srcStride), [srcStride]"r"((mips_reg)srcStride),
[ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16) [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
: "memory" : "memory"
); );
@ -909,7 +909,7 @@ static void avg_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[src]"+&r"(src), [dst]"+&r"(dst) [src]"+&r"(src), [dst]"+&r"(dst)
: [dstStride]"r"((mips_reg)dstStride), : [dstStride]"r"((mips_reg)dstStride),
[srcStride]"r"((mips_reg)srcStride), [srcStride]"r"((mips_reg)srcStride),
[ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16) [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
: "memory" : "memory"
); );
} }
@ -1235,7 +1235,7 @@ static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[h]"+&r"(h) [h]"+&r"(h)
: [dstStride]"r"((mips_reg)dstStride), : [dstStride]"r"((mips_reg)dstStride),
[srcStride]"r"((mips_reg)srcStride), [srcStride]"r"((mips_reg)srcStride),
[ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16) [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
: "memory" : "memory"
); );
@ -1306,7 +1306,7 @@ static void put_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[tmp]"+&r"(tmp), [src]"+&r"(src) [tmp]"+&r"(tmp), [src]"+&r"(src)
: [tmpStride]"r"(8), : [tmpStride]"r"(8),
[srcStride]"r"((mips_reg)srcStride), [srcStride]"r"((mips_reg)srcStride),
[ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5) [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f)
: "memory" : "memory"
); );
@ -1567,7 +1567,7 @@ static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp,
[src]"+&r"(src) [src]"+&r"(src)
: [tmp]"r"(tmp), [size]"r"(size), : [tmp]"r"(tmp), [size]"r"(size),
[srcStride]"r"((mips_reg)srcStride), [srcStride]"r"((mips_reg)srcStride),
[ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16) [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
: "memory" : "memory"
); );
@ -1742,7 +1742,7 @@ static void put_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
[src2]"+&r"(src2), [h]"+&r"(h) [src2]"+&r"(src2), [h]"+&r"(h)
: [src2Stride]"r"((mips_reg)src2Stride), : [src2Stride]"r"((mips_reg)src2Stride),
[dstStride]"r"((mips_reg)dstStride), [dstStride]"r"((mips_reg)dstStride),
[ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16) [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
: "memory" : "memory"
); );
} }
@ -1870,7 +1870,7 @@ static void avg_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
[tmp]"+&r"(tmp), [src]"+&r"(src) [tmp]"+&r"(tmp), [src]"+&r"(src)
: [tmpStride]"r"(8), : [tmpStride]"r"(8),
[srcStride]"r"((mips_reg)srcStride), [srcStride]"r"((mips_reg)srcStride),
[ff_pw_20]"f"(ff_pw_20), [ff_pw_5]"f"(ff_pw_5) [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f)
: "memory" : "memory"
); );
@ -2065,7 +2065,7 @@ static void avg_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
[src2]"+&r"(src2) [src2]"+&r"(src2)
: [dstStride]"r"((mips_reg)dstStride), : [dstStride]"r"((mips_reg)dstStride),
[src2Stride]"r"((mips_reg)src2Stride), [src2Stride]"r"((mips_reg)src2Stride),
[ff_pw_5]"f"(ff_pw_5), [ff_pw_16]"f"(ff_pw_16) [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
: "memory" : "memory"
); );
} }

View File

@ -32,7 +32,7 @@ void ff_hevc_put_hevc_qpel_h##w##_8_mmi(int16_t *dst, uint8_t *_src, \
int x, y; \ int x, y; \
pixel *src = (pixel*)_src - 3; \ pixel *src = (pixel*)_src - 3; \
ptrdiff_t srcstride = _srcstride / sizeof(pixel); \ ptrdiff_t srcstride = _srcstride / sizeof(pixel); \
uint64_t ftmp[15]; \ double ftmp[15]; \
uint64_t rtmp[1]; \ uint64_t rtmp[1]; \
const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; \ const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; \
\ \
@ -132,7 +132,7 @@ void ff_hevc_put_hevc_qpel_hv##w##_8_mmi(int16_t *dst, uint8_t *_src, \
ptrdiff_t srcstride = _srcstride / sizeof(pixel); \ ptrdiff_t srcstride = _srcstride / sizeof(pixel); \
int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; \ int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; \
int16_t *tmp = tmp_array; \ int16_t *tmp = tmp_array; \
uint64_t ftmp[15]; \ double ftmp[15]; \
uint64_t rtmp[1]; \ uint64_t rtmp[1]; \
\ \
src -= (QPEL_EXTRA_BEFORE * srcstride + 3); \ src -= (QPEL_EXTRA_BEFORE * srcstride + 3); \
@ -329,10 +329,12 @@ void ff_hevc_put_hevc_qpel_bi_h##w##_8_mmi(uint8_t *_dst, \
pixel *dst = (pixel *)_dst; \ pixel *dst = (pixel *)_dst; \
ptrdiff_t dststride = _dststride / sizeof(pixel); \ ptrdiff_t dststride = _dststride / sizeof(pixel); \
const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; \ const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; \
uint64_t ftmp[20]; \ double ftmp[20]; \
uint64_t rtmp[1]; \ uint64_t rtmp[1]; \
int shift = 7; \ union av_intfloat64 shift; \
int offset = 64; \ union av_intfloat64 offset; \
shift.i = 7; \
offset.i = 64; \
\ \
x = width >> 2; \ x = width >> 2; \
y = height; \ y = height; \
@ -430,9 +432,9 @@ void ff_hevc_put_hevc_qpel_bi_h##w##_8_mmi(uint8_t *_dst, \
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), \ [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), \
[ftmp12]"=&f"(ftmp[12]), [src2]"+&r"(src2), \ [ftmp12]"=&f"(ftmp[12]), [src2]"+&r"(src2), \
[dst]"+&r"(dst), [src]"+&r"(src), [y]"+&r"(y), [x]"=&r"(x), \ [dst]"+&r"(dst), [src]"+&r"(src), [y]"+&r"(y), [x]"=&r"(x), \
[offset]"+&f"(offset), [rtmp0]"=&r"(rtmp[0]) \ [offset]"+&f"(offset.f), [rtmp0]"=&r"(rtmp[0]) \
: [src_stride]"r"(srcstride), [dst_stride]"r"(dststride), \ : [src_stride]"r"(srcstride), [dst_stride]"r"(dststride), \
[filter]"r"(filter), [shift]"f"(shift) \ [filter]"r"(filter), [shift]"f"(shift.f) \
: "memory" \ : "memory" \
); \ ); \
} }
@ -463,10 +465,12 @@ void ff_hevc_put_hevc_qpel_bi_hv##w##_8_mmi(uint8_t *_dst, \
ptrdiff_t dststride = _dststride / sizeof(pixel); \ ptrdiff_t dststride = _dststride / sizeof(pixel); \
int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; \ int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; \
int16_t *tmp = tmp_array; \ int16_t *tmp = tmp_array; \
uint64_t ftmp[20]; \ double ftmp[20]; \
uint64_t rtmp[1]; \ uint64_t rtmp[1]; \
int shift = 7; \ union av_intfloat64 shift; \
int offset = 64; \ union av_intfloat64 offset; \
shift.i = 7; \
offset.i = 64; \
\ \
src -= (QPEL_EXTRA_BEFORE * srcstride + 3); \ src -= (QPEL_EXTRA_BEFORE * srcstride + 3); \
filter = ff_hevc_qpel_filters[mx - 1]; \ filter = ff_hevc_qpel_filters[mx - 1]; \
@ -659,9 +663,9 @@ void ff_hevc_put_hevc_qpel_bi_hv##w##_8_mmi(uint8_t *_dst, \
[ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]), \ [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]), \
[ftmp14]"=&f"(ftmp[14]), [src2]"+&r"(src2), \ [ftmp14]"=&f"(ftmp[14]), [src2]"+&r"(src2), \
[dst]"+&r"(dst), [tmp]"+&r"(tmp), [y]"+&r"(y), [x]"=&r"(x), \ [dst]"+&r"(dst), [tmp]"+&r"(tmp), [y]"+&r"(y), [x]"=&r"(x), \
[offset]"+&f"(offset), [rtmp0]"=&r"(rtmp[0]) \ [offset]"+&f"(offset.f), [rtmp0]"=&r"(rtmp[0]) \
: [filter]"r"(filter), [stride]"r"(dststride), \ : [filter]"r"(filter), [stride]"r"(dststride), \
[shift]"f"(shift) \ [shift]"f"(shift.f) \
: "memory" \ : "memory" \
); \ ); \
} }
@ -692,10 +696,12 @@ void ff_hevc_put_hevc_epel_bi_hv##w##_8_mmi(uint8_t *_dst, \
const int8_t *filter = ff_hevc_epel_filters[mx - 1]; \ const int8_t *filter = ff_hevc_epel_filters[mx - 1]; \
int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; \ int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; \
int16_t *tmp = tmp_array; \ int16_t *tmp = tmp_array; \
uint64_t ftmp[12]; \ double ftmp[12]; \
uint64_t rtmp[1]; \ uint64_t rtmp[1]; \
int shift = 7; \ union av_intfloat64 shift; \
int offset = 64; \ union av_intfloat64 offset; \
shift.i = 7; \
offset.i = 64; \
\ \
src -= (EPEL_EXTRA_BEFORE * srcstride + 1); \ src -= (EPEL_EXTRA_BEFORE * srcstride + 1); \
x = width >> 2; \ x = width >> 2; \
@ -847,9 +853,9 @@ void ff_hevc_put_hevc_epel_bi_hv##w##_8_mmi(uint8_t *_dst, \
[ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), \ [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), \
[ftmp10]"=&f"(ftmp[10]), [src2]"+&r"(src2), \ [ftmp10]"=&f"(ftmp[10]), [src2]"+&r"(src2), \
[dst]"+&r"(dst), [tmp]"+&r"(tmp), [y]"+&r"(y), [x]"=&r"(x), \ [dst]"+&r"(dst), [tmp]"+&r"(tmp), [y]"+&r"(y), [x]"=&r"(x), \
[offset]"+&f"(offset), [rtmp0]"=&r"(rtmp[0]) \ [offset]"+&f"(offset.f), [rtmp0]"=&r"(rtmp[0]) \
: [filter]"r"(filter), [stride]"r"(dststride), \ : [filter]"r"(filter), [stride]"r"(dststride), \
[shift]"f"(shift) \ [shift]"f"(shift.f) \
: "memory" \ : "memory" \
); \ ); \
} }
@ -875,9 +881,10 @@ void ff_hevc_put_hevc_pel_bi_pixels##w##_8_mmi(uint8_t *_dst, \
ptrdiff_t srcstride = _srcstride / sizeof(pixel); \ ptrdiff_t srcstride = _srcstride / sizeof(pixel); \
pixel *dst = (pixel *)_dst; \ pixel *dst = (pixel *)_dst; \
ptrdiff_t dststride = _dststride / sizeof(pixel); \ ptrdiff_t dststride = _dststride / sizeof(pixel); \
uint64_t ftmp[12]; \ double ftmp[12]; \
uint64_t rtmp[1]; \ uint64_t rtmp[1]; \
int shift = 7; \ union av_intfloat64 shift; \
shift.i = 7; \
\ \
y = height; \ y = height; \
x = width >> 3; \ x = width >> 3; \
@ -959,7 +966,7 @@ void ff_hevc_put_hevc_pel_bi_pixels##w##_8_mmi(uint8_t *_dst, \
[ftmp10]"=&f"(ftmp[10]), [offset]"=&f"(ftmp[11]), \ [ftmp10]"=&f"(ftmp[10]), [offset]"=&f"(ftmp[11]), \
[src2]"+&r"(src2), [dst]"+&r"(dst), [src]"+&r"(src), \ [src2]"+&r"(src2), [dst]"+&r"(dst), [src]"+&r"(src), \
[x]"+&r"(x), [y]"+&r"(y), [rtmp0]"=&r"(rtmp[0]) \ [x]"+&r"(x), [y]"+&r"(y), [rtmp0]"=&r"(rtmp[0]) \
: [dststride]"r"(dststride), [shift]"f"(shift), \ : [dststride]"r"(dststride), [shift]"f"(shift.f), \
[srcstride]"r"(srcstride) \ [srcstride]"r"(srcstride) \
: "memory" \ : "memory" \
); \ ); \
@ -989,10 +996,12 @@ void ff_hevc_put_hevc_qpel_uni_hv##w##_8_mmi(uint8_t *_dst, \
ptrdiff_t dststride = _dststride / sizeof(pixel); \ ptrdiff_t dststride = _dststride / sizeof(pixel); \
int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; \ int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; \
int16_t *tmp = tmp_array; \ int16_t *tmp = tmp_array; \
uint64_t ftmp[20]; \ double ftmp[20]; \
uint64_t rtmp[1]; \ uint64_t rtmp[1]; \
int shift = 6; \ union av_intfloat64 shift; \
int offset = 32; \ union av_intfloat64 offset; \
shift.i = 6; \
offset.i = 32; \
\ \
src -= (QPEL_EXTRA_BEFORE * srcstride + 3); \ src -= (QPEL_EXTRA_BEFORE * srcstride + 3); \
filter = ff_hevc_qpel_filters[mx - 1]; \ filter = ff_hevc_qpel_filters[mx - 1]; \
@ -1166,9 +1175,9 @@ void ff_hevc_put_hevc_qpel_uni_hv##w##_8_mmi(uint8_t *_dst, \
[ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]), \ [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]), \
[ftmp14]"=&f"(ftmp[14]), \ [ftmp14]"=&f"(ftmp[14]), \
[dst]"+&r"(dst), [tmp]"+&r"(tmp), [y]"+&r"(y), [x]"=&r"(x), \ [dst]"+&r"(dst), [tmp]"+&r"(tmp), [y]"+&r"(y), [x]"=&r"(x), \
[offset]"+&f"(offset), [rtmp0]"=&r"(rtmp[0]) \ [offset]"+&f"(offset.f), [rtmp0]"=&r"(rtmp[0]) \
: [filter]"r"(filter), [stride]"r"(dststride), \ : [filter]"r"(filter), [stride]"r"(dststride), \
[shift]"f"(shift) \ [shift]"f"(shift.f) \
: "memory" \ : "memory" \
); \ ); \
} }

View File

@ -142,7 +142,7 @@ void ff_put_signed_pixels_clamped_mmi(const int16_t *block,
[pixels]"+&r"(pixels) [pixels]"+&r"(pixels)
: [block]"r"(block), : [block]"r"(block),
[line_size]"r"((mips_reg)line_size), [line_size]"r"((mips_reg)line_size),
[ff_pb_80]"f"(ff_pb_80) [ff_pb_80]"f"(ff_pb_80.f)
: "memory" : "memory"
); );
} }

View File

@ -28,12 +28,13 @@
void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block, void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
int n, int qscale) int n, int qscale)
{ {
int64_t level, qmul, qadd, nCoeffs; int64_t level, nCoeffs;
double ftmp[6]; double ftmp[6];
mips_reg addr[1]; mips_reg addr[1];
union mmi_intfloat64 qmul_u, qadd_u;
DECLARE_VAR_ALL64; DECLARE_VAR_ALL64;
qmul = qscale << 1; qmul_u.i = qscale << 1;
av_assert2(s->block_last_index[n]>=0 || s->h263_aic); av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
if (!s->h263_aic) { if (!s->h263_aic) {
@ -41,9 +42,9 @@ void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
level = block[0] * s->y_dc_scale; level = block[0] * s->y_dc_scale;
else else
level = block[0] * s->c_dc_scale; level = block[0] * s->c_dc_scale;
qadd = (qscale-1) | 1; qadd_u.i = (qscale-1) | 1;
} else { } else {
qadd = 0; qadd_u.i = 0;
level = block[0]; level = block[0];
} }
@ -93,7 +94,7 @@ void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
[addr0]"=&r"(addr[0]) [addr0]"=&r"(addr[0])
: [block]"r"((mips_reg)(block+nCoeffs)), : [block]"r"((mips_reg)(block+nCoeffs)),
[nCoeffs]"r"((mips_reg)(2*(-nCoeffs))), [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
[qmul]"f"(qmul), [qadd]"f"(qadd) [qmul]"f"(qmul_u.f), [qadd]"f"(qadd_u.f)
: "memory" : "memory"
); );
@ -103,13 +104,14 @@ void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
void ff_dct_unquantize_h263_inter_mmi(MpegEncContext *s, int16_t *block, void ff_dct_unquantize_h263_inter_mmi(MpegEncContext *s, int16_t *block,
int n, int qscale) int n, int qscale)
{ {
int64_t qmul, qadd, nCoeffs; int64_t nCoeffs;
double ftmp[6]; double ftmp[6];
mips_reg addr[1]; mips_reg addr[1];
union mmi_intfloat64 qmul_u, qadd_u;
DECLARE_VAR_ALL64; DECLARE_VAR_ALL64;
qmul = qscale << 1; qmul_u.i = qscale << 1;
qadd = (qscale - 1) | 1; qadd_u.i = (qscale - 1) | 1;
av_assert2(s->block_last_index[n]>=0 || s->h263_aic); av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]]; nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
@ -153,7 +155,7 @@ void ff_dct_unquantize_h263_inter_mmi(MpegEncContext *s, int16_t *block,
[addr0]"=&r"(addr[0]) [addr0]"=&r"(addr[0])
: [block]"r"((mips_reg)(block+nCoeffs)), : [block]"r"((mips_reg)(block+nCoeffs)),
[nCoeffs]"r"((mips_reg)(2*(-nCoeffs))), [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
[qmul]"f"(qmul), [qadd]"f"(qadd) [qmul]"f"(qmul_u.f), [qadd]"f"(qadd_u.f)
: "memory" : "memory"
); );
} }

View File

@ -129,9 +129,11 @@ void ff_vc1_inv_trans_8x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
double ftmp[9]; double ftmp[9];
mips_reg addr[1]; mips_reg addr[1];
int count; int count;
union mmi_intfloat64 dc_u;
dc = (3 * dc + 1) >> 1; dc = (3 * dc + 1) >> 1;
dc = (3 * dc + 16) >> 5; dc = (3 * dc + 16) >> 5;
dc_u.i = dc;
__asm__ volatile( __asm__ volatile(
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
@ -189,7 +191,7 @@ void ff_vc1_inv_trans_8x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
[addr0]"=&r"(addr[0]), [addr0]"=&r"(addr[0]),
[count]"=&r"(count), [dest]"+&r"(dest) [count]"=&r"(count), [dest]"+&r"(dest)
: [linesize]"r"((mips_reg)linesize), : [linesize]"r"((mips_reg)linesize),
[dc]"f"(dc) [dc]"f"(dc_u.f)
: "memory" : "memory"
); );
} }
@ -198,9 +200,6 @@ void ff_vc1_inv_trans_8x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
void ff_vc1_inv_trans_8x8_mmi(int16_t block[64]) void ff_vc1_inv_trans_8x8_mmi(int16_t block[64])
{ {
DECLARE_ALIGNED(16, int16_t, temp[64]); DECLARE_ALIGNED(16, int16_t, temp[64]);
DECLARE_ALIGNED(8, const uint64_t, ff_pw_1_local) = {0x0000000100000001ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
double ftmp[23]; double ftmp[23];
uint64_t tmp[1]; uint64_t tmp[1];
@ -407,8 +406,8 @@ void ff_vc1_inv_trans_8x8_mmi(int16_t block[64])
[ftmp20]"=&f"(ftmp[20]), [ftmp21]"=&f"(ftmp[21]), [ftmp20]"=&f"(ftmp[20]), [ftmp21]"=&f"(ftmp[21]),
[ftmp22]"=&f"(ftmp[22]), [ftmp22]"=&f"(ftmp[22]),
[tmp0]"=&r"(tmp[0]) [tmp0]"=&r"(tmp[0])
: [ff_pw_1]"f"(ff_pw_1_local), [ff_pw_64]"f"(ff_pw_64_local), : [ff_pw_1]"f"(ff_pw_32_1.f), [ff_pw_64]"f"(ff_pw_32_64.f),
[ff_pw_4]"f"(ff_pw_4_local), [block]"r"(block), [ff_pw_4]"f"(ff_pw_32_4.f), [block]"r"(block),
[temp]"r"(temp) [temp]"r"(temp)
: "memory" : "memory"
); );
@ -420,9 +419,11 @@ void ff_vc1_inv_trans_8x4_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
{ {
int dc = block[0]; int dc = block[0];
double ftmp[9]; double ftmp[9];
union mmi_intfloat64 dc_u;
dc = ( 3 * dc + 1) >> 1; dc = ( 3 * dc + 1) >> 1;
dc = (17 * dc + 64) >> 7; dc = (17 * dc + 64) >> 7;
dc_u.i = dc;
__asm__ volatile( __asm__ volatile(
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
@ -467,7 +468,7 @@ void ff_vc1_inv_trans_8x4_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
[ftmp8]"=&f"(ftmp[8]) [ftmp8]"=&f"(ftmp[8])
: [dest0]"r"(dest+0*linesize), [dest1]"r"(dest+1*linesize), : [dest0]"r"(dest+0*linesize), [dest1]"r"(dest+1*linesize),
[dest2]"r"(dest+2*linesize), [dest3]"r"(dest+3*linesize), [dest2]"r"(dest+2*linesize), [dest3]"r"(dest+3*linesize),
[dc]"f"(dc) [dc]"f"(dc_u.f)
: "memory" : "memory"
); );
} }
@ -480,8 +481,6 @@ void ff_vc1_inv_trans_8x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
double ftmp[16]; double ftmp[16];
uint32_t tmp[1]; uint32_t tmp[1];
int16_t count = 4; int16_t count = 4;
DECLARE_ALIGNED(16, const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
DECLARE_ALIGNED(16, const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
int16_t coeff[64] = {12, 16, 16, 15, 12, 9, 6, 4, int16_t coeff[64] = {12, 16, 16, 15, 12, 9, 6, 4,
12, 15, 6, -4, -12, -16, -16, -9, 12, 15, 6, -4, -12, -16, -16, -9,
12, 9, -6, -16, -12, 4, 16, 15, 12, 9, -6, -16, -12, 4, 16, 15,
@ -591,7 +590,7 @@ void ff_vc1_inv_trans_8x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
[ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]), [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
[ftmp14]"=&f"(ftmp[14]), [tmp0]"=&r"(tmp[0]), [ftmp14]"=&f"(ftmp[14]), [tmp0]"=&r"(tmp[0]),
[src]"+&r"(src), [dst]"+&r"(dst), [count]"+&r"(count) [src]"+&r"(src), [dst]"+&r"(dst), [count]"+&r"(count)
: [ff_pw_4]"f"(ff_pw_4_local), [coeff]"r"(coeff) : [ff_pw_4]"f"(ff_pw_32_4.f), [coeff]"r"(coeff)
: "memory" : "memory"
); );
@ -859,7 +858,7 @@ void ff_vc1_inv_trans_8x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
[ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]), [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
[ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]), [ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]),
[tmp0]"=&r"(tmp[0]) [tmp0]"=&r"(tmp[0])
: [ff_pw_64]"f"(ff_pw_64_local), : [ff_pw_64]"f"(ff_pw_32_64.f),
[src]"r"(src), [dest]"r"(dest), [linesize]"r"(linesize) [src]"r"(src), [dest]"r"(dest), [linesize]"r"(linesize)
:"memory" :"memory"
); );
@ -871,10 +870,12 @@ void ff_vc1_inv_trans_4x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
{ {
int dc = block[0]; int dc = block[0];
double ftmp[9]; double ftmp[9];
union mmi_intfloat64 dc_u;
DECLARE_VAR_LOW32; DECLARE_VAR_LOW32;
dc = (17 * dc + 4) >> 3; dc = (17 * dc + 4) >> 3;
dc = (12 * dc + 64) >> 7; dc = (12 * dc + 64) >> 7;
dc_u.i = dc;
__asm__ volatile( __asm__ volatile(
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
@ -934,7 +935,7 @@ void ff_vc1_inv_trans_4x8_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
[dest2]"r"(dest+2*linesize), [dest3]"r"(dest+3*linesize), [dest2]"r"(dest+2*linesize), [dest3]"r"(dest+3*linesize),
[dest4]"r"(dest+4*linesize), [dest5]"r"(dest+5*linesize), [dest4]"r"(dest+4*linesize), [dest5]"r"(dest+5*linesize),
[dest6]"r"(dest+6*linesize), [dest7]"r"(dest+7*linesize), [dest6]"r"(dest+6*linesize), [dest7]"r"(dest+7*linesize),
[dc]"f"(dc) [dc]"f"(dc_u.f)
: "memory" : "memory"
); );
} }
@ -945,14 +946,11 @@ void ff_vc1_inv_trans_4x8_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
int16_t *src = block; int16_t *src = block;
int16_t *dst = block; int16_t *dst = block;
double ftmp[23]; double ftmp[23];
uint32_t count = 8, tmp[1]; uint64_t count = 8, tmp[1];
int16_t coeff[16] = {17, 22, 17, 10, int16_t coeff[16] = {17, 22, 17, 10,
17, 10,-17,-22, 17, 10,-17,-22,
17,-10,-17, 22, 17,-10,-17, 22,
17,-22, 17,-10}; 17,-22, 17,-10};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_1_local) = {0x0000000100000001ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
// 1st loop // 1st loop
__asm__ volatile ( __asm__ volatile (
@ -998,7 +996,7 @@ void ff_vc1_inv_trans_4x8_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[tmp0]"=&r"(tmp[0]), [count]"+&r"(count), [tmp0]"=&r"(tmp[0]), [count]"+&r"(count),
[src]"+&r"(src), [dst]"+&r"(dst) [src]"+&r"(src), [dst]"+&r"(dst)
: [ff_pw_4]"f"(ff_pw_4_local), [coeff]"r"(coeff) : [ff_pw_4]"f"(ff_pw_32_4.f), [coeff]"r"(coeff)
: "memory" : "memory"
); );
@ -1115,7 +1113,7 @@ void ff_vc1_inv_trans_4x8_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
[ftmp20]"=&f"(ftmp[20]), [ftmp21]"=&f"(ftmp[21]), [ftmp20]"=&f"(ftmp[20]), [ftmp21]"=&f"(ftmp[21]),
[ftmp22]"=&f"(ftmp[22]), [ftmp22]"=&f"(ftmp[22]),
[tmp0]"=&r"(tmp[0]) [tmp0]"=&r"(tmp[0])
: [ff_pw_1]"f"(ff_pw_1_local), [ff_pw_64]"f"(ff_pw_64_local), : [ff_pw_1]"f"(ff_pw_32_1.f), [ff_pw_64]"f"(ff_pw_32_64.f),
[src]"r"(src), [dest]"r"(dest), [linesize]"r"(linesize) [src]"r"(src), [dest]"r"(dest), [linesize]"r"(linesize)
: "memory" : "memory"
); );
@ -1127,10 +1125,12 @@ void ff_vc1_inv_trans_4x4_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
{ {
int dc = block[0]; int dc = block[0];
double ftmp[5]; double ftmp[5];
union mmi_intfloat64 dc_u;
DECLARE_VAR_LOW32; DECLARE_VAR_LOW32;
dc = (17 * dc + 4) >> 3; dc = (17 * dc + 4) >> 3;
dc = (17 * dc + 64) >> 7; dc = (17 * dc + 64) >> 7;
dc_u.i = dc;
__asm__ volatile( __asm__ volatile(
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
@ -1166,7 +1166,7 @@ void ff_vc1_inv_trans_4x4_dc_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *blo
[ftmp4]"=&f"(ftmp[4]) [ftmp4]"=&f"(ftmp[4])
: [dest0]"r"(dest+0*linesize), [dest1]"r"(dest+1*linesize), : [dest0]"r"(dest+0*linesize), [dest1]"r"(dest+1*linesize),
[dest2]"r"(dest+2*linesize), [dest3]"r"(dest+3*linesize), [dest2]"r"(dest+2*linesize), [dest3]"r"(dest+3*linesize),
[dc]"f"(dc) [dc]"f"(dc_u.f)
: "memory" : "memory"
); );
} }
@ -1181,8 +1181,6 @@ void ff_vc1_inv_trans_4x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
17, 10,-17,-22, 17, 10,-17,-22,
17,-10,-17, 22, 17,-10,-17, 22,
17,-22, 17,-10}; 17,-22, 17,-10};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_4_local) = {0x0000000400000004ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_64_local)= {0x0000004000000040ULL};
// 1st loop // 1st loop
__asm__ volatile ( __asm__ volatile (
@ -1226,7 +1224,7 @@ void ff_vc1_inv_trans_4x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
[ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
[tmp0]"=&r"(tmp[0]), [count]"+&r"(count), [tmp0]"=&r"(tmp[0]), [count]"+&r"(count),
[src]"+&r"(src), [dst]"+&r"(dst) [src]"+&r"(src), [dst]"+&r"(dst)
: [ff_pw_4]"f"(ff_pw_4_local), [coeff]"r"(coeff) : [ff_pw_4]"f"(ff_pw_32_4.f), [coeff]"r"(coeff)
: "memory" : "memory"
); );
@ -1370,7 +1368,7 @@ void ff_vc1_inv_trans_4x4_mmi(uint8_t *dest, ptrdiff_t linesize, int16_t *block)
[ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]), [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
[ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]), [ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]),
[tmp0]"=&r"(tmp[0]) [tmp0]"=&r"(tmp[0])
: [ff_pw_64]"f"(ff_pw_64_local), : [ff_pw_64]"f"(ff_pw_32_64.f),
[src]"r"(src), [dest]"r"(dest), [linesize]"r"(linesize) [src]"r"(src), [dest]"r"(dest), [linesize]"r"(linesize)
:"memory" :"memory"
); );
@ -1660,14 +1658,15 @@ static void vc1_put_ver_16b_shift2_mmi(int16_t *dst,
const uint8_t *src, mips_reg stride, const uint8_t *src, mips_reg stride,
int rnd, int64_t shift) int rnd, int64_t shift)
{ {
union mmi_intfloat64 shift_u;
DECLARE_VAR_LOW32; DECLARE_VAR_LOW32;
DECLARE_VAR_ADDRT; DECLARE_VAR_ADDRT;
shift_u.i = shift;
__asm__ volatile( __asm__ volatile(
"pxor $f0, $f0, $f0 \n\t" "pxor $f0, $f0, $f0 \n\t"
"li $8, 0x03 \n\t" "li $8, 0x03 \n\t"
LOAD_ROUNDER_MMI("%[rnd]") LOAD_ROUNDER_MMI("%[rnd]")
"ldc1 $f12, %[ff_pw_9] \n\t"
"1: \n\t" "1: \n\t"
MMI_ULWC1($f4, %[src], 0x00) MMI_ULWC1($f4, %[src], 0x00)
PTR_ADDU "%[src], %[src], %[stride] \n\t" PTR_ADDU "%[src], %[src], %[stride] \n\t"
@ -1689,9 +1688,9 @@ static void vc1_put_ver_16b_shift2_mmi(int16_t *dst,
: RESTRICT_ASM_LOW32 RESTRICT_ASM_ADDRT : RESTRICT_ASM_LOW32 RESTRICT_ASM_ADDRT
[src]"+r"(src), [dst]"+r"(dst) [src]"+r"(src), [dst]"+r"(dst)
: [stride]"r"(stride), [stride1]"r"(-2*stride), : [stride]"r"(stride), [stride1]"r"(-2*stride),
[shift]"f"(shift), [rnd]"m"(rnd), [shift]"f"(shift_u.f), [rnd]"m"(rnd),
[stride2]"r"(9*stride-4), [ff_pw_9]"m"(ff_pw_9) [stride2]"r"(9*stride-4)
: "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10",
"$f14", "$f16", "memory" "$f14", "$f16", "memory"
); );
} }
@ -1713,8 +1712,6 @@ static void OPNAME ## vc1_hor_16b_shift2_mmi(uint8_t *dst, mips_reg stride, \
\ \
__asm__ volatile( \ __asm__ volatile( \
LOAD_ROUNDER_MMI("%[rnd]") \ LOAD_ROUNDER_MMI("%[rnd]") \
"ldc1 $f12, %[ff_pw_128] \n\t" \
"ldc1 $f10, %[ff_pw_9] \n\t" \
"1: \n\t" \ "1: \n\t" \
MMI_ULDC1($f2, %[src], 0x00) \ MMI_ULDC1($f2, %[src], 0x00) \
MMI_ULDC1($f4, %[src], 0x08) \ MMI_ULDC1($f4, %[src], 0x08) \
@ -1728,16 +1725,16 @@ static void OPNAME ## vc1_hor_16b_shift2_mmi(uint8_t *dst, mips_reg stride, \
"paddh $f6, $f6, $f0 \n\t" \ "paddh $f6, $f6, $f0 \n\t" \
MMI_ULDC1($f0, %[src], 0x0b) \ MMI_ULDC1($f0, %[src], 0x0b) \
"paddh $f8, $f8, $f0 \n\t" \ "paddh $f8, $f8, $f0 \n\t" \
"pmullh $f6, $f6, $f10 \n\t" \ "pmullh $f6, $f6, %[ff_pw_9] \n\t" \
"pmullh $f8, $f8, $f10 \n\t" \ "pmullh $f8, $f8, %[ff_pw_9] \n\t" \
"psubh $f6, $f6, $f2 \n\t" \ "psubh $f6, $f6, $f2 \n\t" \
"psubh $f8, $f8, $f4 \n\t" \ "psubh $f8, $f8, $f4 \n\t" \
"li $8, 0x07 \n\t" \ "li $8, 0x07 \n\t" \
"mtc1 $8, $f16 \n\t" \ "mtc1 $8, $f16 \n\t" \
NORMALIZE_MMI("$f16") \ NORMALIZE_MMI("$f16") \
/* Remove bias */ \ /* Remove bias */ \
"paddh $f6, $f6, $f12 \n\t" \ "paddh $f6, $f6, %[ff_pw_128] \n\t" \
"paddh $f8, $f8, $f12 \n\t" \ "paddh $f8, $f8, %[ff_pw_128] \n\t" \
TRANSFER_DO_PACK(OP) \ TRANSFER_DO_PACK(OP) \
"addiu %[h], %[h], -0x01 \n\t" \ "addiu %[h], %[h], -0x01 \n\t" \
PTR_ADDIU "%[src], %[src], 0x18 \n\t" \ PTR_ADDIU "%[src], %[src], 0x18 \n\t" \
@ -1747,8 +1744,8 @@ static void OPNAME ## vc1_hor_16b_shift2_mmi(uint8_t *dst, mips_reg stride, \
[h]"+r"(h), \ [h]"+r"(h), \
[src]"+r"(src), [dst]"+r"(dst) \ [src]"+r"(src), [dst]"+r"(dst) \
: [stride]"r"(stride), [rnd]"m"(rnd), \ : [stride]"r"(stride), [rnd]"m"(rnd), \
[ff_pw_9]"m"(ff_pw_9), [ff_pw_128]"m"(ff_pw_128) \ [ff_pw_9]"f"(ff_pw_9.f), [ff_pw_128]"f"(ff_pw_128.f) \
: "$8", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", "$f14", \ : "$8", "$f0", "$f2", "$f4", "$f6", "$f8", "$f14", \
"$f16", "memory" \ "$f16", "memory" \
); \ ); \
} }
@ -1774,7 +1771,6 @@ static void OPNAME ## vc1_shift2_mmi(uint8_t *dst, const uint8_t *src, \
"pxor $f0, $f0, $f0 \n\t" \ "pxor $f0, $f0, $f0 \n\t" \
"li $10, 0x08 \n\t" \ "li $10, 0x08 \n\t" \
LOAD_ROUNDER_MMI("%[rnd]") \ LOAD_ROUNDER_MMI("%[rnd]") \
"ldc1 $f12, %[ff_pw_9] \n\t" \
"1: \n\t" \ "1: \n\t" \
MMI_ULWC1($f6, %[src], 0x00) \ MMI_ULWC1($f6, %[src], 0x00) \
MMI_ULWC1($f8, %[src], 0x04) \ MMI_ULWC1($f8, %[src], 0x04) \
@ -1791,8 +1787,8 @@ static void OPNAME ## vc1_shift2_mmi(uint8_t *dst, const uint8_t *src, \
PTR_ADDU "$9, %[src], %[offset_x2n] \n\t" \ PTR_ADDU "$9, %[src], %[offset_x2n] \n\t" \
MMI_ULWC1($f2, $9, 0x00) \ MMI_ULWC1($f2, $9, 0x00) \
MMI_ULWC1($f4, $9, 0x04) \ MMI_ULWC1($f4, $9, 0x04) \
"pmullh $f6, $f6, $f12 \n\t" /* 0,9,9,0*/ \ "pmullh $f6, $f6, %[ff_pw_9] \n\t" /* 0,9,9,0*/ \
"pmullh $f8, $f8, $f12 \n\t" /* 0,9,9,0*/ \ "pmullh $f8, $f8, %[ff_pw_9] \n\t" /* 0,9,9,0*/ \
"punpcklbh $f2, $f2, $f0 \n\t" \ "punpcklbh $f2, $f2, $f0 \n\t" \
"punpcklbh $f4, $f4, $f0 \n\t" \ "punpcklbh $f4, $f4, $f0 \n\t" \
"psubh $f6, $f6, $f2 \n\t" /*-1,9,9,0*/ \ "psubh $f6, $f6, $f2 \n\t" /*-1,9,9,0*/ \
@ -1819,9 +1815,9 @@ static void OPNAME ## vc1_shift2_mmi(uint8_t *dst, const uint8_t *src, \
: [offset]"r"(offset), [offset_x2n]"r"(-2*offset), \ : [offset]"r"(offset), [offset_x2n]"r"(-2*offset), \
[stride]"r"(stride), [rnd]"m"(rnd), \ [stride]"r"(stride), [rnd]"m"(rnd), \
[stride1]"r"(stride-offset), \ [stride1]"r"(stride-offset), \
[ff_pw_9]"m"(ff_pw_9) \ [ff_pw_9]"f"(ff_pw_9.f) \
: "$8", "$9", "$10", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", \ : "$8", "$9", "$10", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", \
"$f12", "$f14", "$f16", "memory" \ "$f14", "$f16", "memory" \
); \ ); \
} }
@ -1852,8 +1848,8 @@ VC1_SHIFT2(OP_AVG, avg_)
LOAD($f8, $9, M*4) \ LOAD($f8, $9, M*4) \
UNPACK("$f6") \ UNPACK("$f6") \
UNPACK("$f8") \ UNPACK("$f8") \
"pmullh $f6, $f6, $f12 \n\t" /* *18 */ \ "pmullh $f6, $f6, %[ff_pw_18] \n\t" /* *18 */ \
"pmullh $f8, $f8, $f12 \n\t" /* *18 */ \ "pmullh $f8, $f8, %[ff_pw_18] \n\t" /* *18 */ \
"psubh $f6, $f6, $f2 \n\t" /* *18, -3 */ \ "psubh $f6, $f6, $f2 \n\t" /* *18, -3 */ \
"psubh $f8, $f8, $f4 \n\t" /* *18, -3 */ \ "psubh $f8, $f8, $f4 \n\t" /* *18, -3 */ \
PTR_ADDU "$9, %[src], "#A4" \n\t" \ PTR_ADDU "$9, %[src], "#A4" \n\t" \
@ -1872,8 +1868,8 @@ VC1_SHIFT2(OP_AVG, avg_)
LOAD($f4, $9, M*4) \ LOAD($f4, $9, M*4) \
UNPACK("$f2") \ UNPACK("$f2") \
UNPACK("$f4") \ UNPACK("$f4") \
"pmullh $f2, $f2, $f10 \n\t" /* *53 */ \ "pmullh $f2, $f2, %[ff_pw_53] \n\t" /* *53 */ \
"pmullh $f4, $f4, $f10 \n\t" /* *53 */ \ "pmullh $f4, $f4, %[ff_pw_53] \n\t" /* *53 */ \
"paddh $f6, $f6, $f2 \n\t" /* 4,53,18,-3 */ \ "paddh $f6, $f6, $f2 \n\t" /* 4,53,18,-3 */ \
"paddh $f8, $f8, $f4 \n\t" /* 4,53,18,-3 */ "paddh $f8, $f8, $f4 \n\t" /* 4,53,18,-3 */
@ -1892,16 +1888,16 @@ vc1_put_ver_16b_ ## NAME ## _mmi(int16_t *dst, const uint8_t *src, \
int rnd, int64_t shift) \ int rnd, int64_t shift) \
{ \ { \
int h = 8; \ int h = 8; \
union mmi_intfloat64 shift_u; \
DECLARE_VAR_LOW32; \ DECLARE_VAR_LOW32; \
DECLARE_VAR_ADDRT; \ DECLARE_VAR_ADDRT; \
shift_u.i = shift; \
\ \
src -= src_stride; \ src -= src_stride; \
\ \
__asm__ volatile( \ __asm__ volatile( \
"pxor $f0, $f0, $f0 \n\t" \ "pxor $f0, $f0, $f0 \n\t" \
LOAD_ROUNDER_MMI("%[rnd]") \ LOAD_ROUNDER_MMI("%[rnd]") \
"ldc1 $f10, %[ff_pw_53] \n\t" \
"ldc1 $f12, %[ff_pw_18] \n\t" \
".p2align 3 \n\t" \ ".p2align 3 \n\t" \
"1: \n\t" \ "1: \n\t" \
MSPEL_FILTER13_CORE(DO_UNPACK, MMI_ULWC1, 1, A1, A2, A3, A4) \ MSPEL_FILTER13_CORE(DO_UNPACK, MMI_ULWC1, 1, A1, A2, A3, A4) \
@ -1917,12 +1913,12 @@ vc1_put_ver_16b_ ## NAME ## _mmi(int16_t *dst, const uint8_t *src, \
PTR_ADDU "$9, %[src], "#A2" \n\t" \ PTR_ADDU "$9, %[src], "#A2" \n\t" \
MMI_ULWC1($f6, $9, 0x08) \ MMI_ULWC1($f6, $9, 0x08) \
DO_UNPACK("$f6") \ DO_UNPACK("$f6") \
"pmullh $f6, $f6, $f12 \n\t" /* *18 */ \ "pmullh $f6, $f6, %[ff_pw_18] \n\t" /* *18 */ \
"psubh $f6, $f6, $f2 \n\t" /* *18,-3 */ \ "psubh $f6, $f6, $f2 \n\t" /* *18,-3 */ \
PTR_ADDU "$9, %[src], "#A3" \n\t" \ PTR_ADDU "$9, %[src], "#A3" \n\t" \
MMI_ULWC1($f2, $9, 0x08) \ MMI_ULWC1($f2, $9, 0x08) \
DO_UNPACK("$f2") \ DO_UNPACK("$f2") \
"pmullh $f2, $f2, $f10 \n\t" /* *53 */ \ "pmullh $f2, $f2, %[ff_pw_53] \n\t" /* *53 */ \
"paddh $f6, $f6, $f2 \n\t" /* *53,18,-3 */ \ "paddh $f6, $f6, $f2 \n\t" /* *53,18,-3 */ \
PTR_ADDU "$9, %[src], "#A4" \n\t" \ PTR_ADDU "$9, %[src], "#A4" \n\t" \
MMI_ULWC1($f2, $9, 0x08) \ MMI_ULWC1($f2, $9, 0x08) \
@ -1945,10 +1941,10 @@ vc1_put_ver_16b_ ## NAME ## _mmi(int16_t *dst, const uint8_t *src, \
[src]"+r"(src), [dst]"+r"(dst) \ [src]"+r"(src), [dst]"+r"(dst) \
: [stride_x1]"r"(src_stride), [stride_x2]"r"(2*src_stride), \ : [stride_x1]"r"(src_stride), [stride_x2]"r"(2*src_stride), \
[stride_x3]"r"(3*src_stride), \ [stride_x3]"r"(3*src_stride), \
[rnd]"m"(rnd), [shift]"f"(shift), \ [rnd]"m"(rnd), [shift]"f"(shift_u.f), \
[ff_pw_53]"m"(ff_pw_53), [ff_pw_18]"m"(ff_pw_18), \ [ff_pw_53]"f"(ff_pw_53.f), [ff_pw_18]"f"(ff_pw_18.f), \
[ff_pw_3]"f"(ff_pw_3) \ [ff_pw_3]"f"(ff_pw_3.f) \
: "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", \ : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", \
"$f14", "$f16", "memory" \ "$f14", "$f16", "memory" \
); \ ); \
} }
@ -1975,8 +1971,6 @@ OPNAME ## vc1_hor_16b_ ## NAME ## _mmi(uint8_t *dst, mips_reg stride, \
__asm__ volatile( \ __asm__ volatile( \
"pxor $f0, $f0, $f0 \n\t" \ "pxor $f0, $f0, $f0 \n\t" \
LOAD_ROUNDER_MMI("%[rnd]") \ LOAD_ROUNDER_MMI("%[rnd]") \
"ldc1 $f10, %[ff_pw_53] \n\t" \
"ldc1 $f12, %[ff_pw_18] \n\t" \
".p2align 3 \n\t" \ ".p2align 3 \n\t" \
"1: \n\t" \ "1: \n\t" \
MSPEL_FILTER13_CORE(DONT_UNPACK, MMI_ULDC1, 2, A1, A2, A3, A4) \ MSPEL_FILTER13_CORE(DONT_UNPACK, MMI_ULDC1, 2, A1, A2, A3, A4) \
@ -1995,9 +1989,9 @@ OPNAME ## vc1_hor_16b_ ## NAME ## _mmi(uint8_t *dst, mips_reg stride, \
[h]"+r"(h), \ [h]"+r"(h), \
[src]"+r"(src), [dst]"+r"(dst) \ [src]"+r"(src), [dst]"+r"(dst) \
: [stride]"r"(stride), [rnd]"m"(rnd), \ : [stride]"r"(stride), [rnd]"m"(rnd), \
[ff_pw_53]"m"(ff_pw_53), [ff_pw_18]"m"(ff_pw_18), \ [ff_pw_53]"f"(ff_pw_53.f), [ff_pw_18]"f"(ff_pw_18.f), \
[ff_pw_3]"f"(ff_pw_3), [ff_pw_128]"f"(ff_pw_128) \ [ff_pw_3]"f"(ff_pw_3.f), [ff_pw_128]"f"(ff_pw_128.f) \
: "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", \ : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", \
"$f14", "$f16", "memory" \ "$f14", "$f16", "memory" \
); \ ); \
} }
@ -2025,8 +2019,6 @@ OPNAME ## vc1_## NAME ## _mmi(uint8_t *dst, const uint8_t *src, \
__asm__ volatile ( \ __asm__ volatile ( \
"pxor $f0, $f0, $f0 \n\t" \ "pxor $f0, $f0, $f0 \n\t" \
LOAD_ROUNDER_MMI("%[rnd]") \ LOAD_ROUNDER_MMI("%[rnd]") \
"ldc1 $f10, %[ff_pw_53] \n\t" \
"ldc1 $f12, %[ff_pw_18] \n\t" \
".p2align 3 \n\t" \ ".p2align 3 \n\t" \
"1: \n\t" \ "1: \n\t" \
MSPEL_FILTER13_CORE(DO_UNPACK, MMI_ULWC1, 1, A1, A2, A3, A4) \ MSPEL_FILTER13_CORE(DO_UNPACK, MMI_ULWC1, 1, A1, A2, A3, A4) \
@ -2044,9 +2036,9 @@ OPNAME ## vc1_## NAME ## _mmi(uint8_t *dst, const uint8_t *src, \
: [offset_x1]"r"(offset), [offset_x2]"r"(2*offset), \ : [offset_x1]"r"(offset), [offset_x2]"r"(2*offset), \
[offset_x3]"r"(3*offset), [stride]"r"(stride), \ [offset_x3]"r"(3*offset), [stride]"r"(stride), \
[rnd]"m"(rnd), \ [rnd]"m"(rnd), \
[ff_pw_53]"m"(ff_pw_53), [ff_pw_18]"m"(ff_pw_18), \ [ff_pw_53]"f"(ff_pw_53.f), [ff_pw_18]"f"(ff_pw_18.f), \
[ff_pw_3]"f"(ff_pw_3) \ [ff_pw_3]"f"(ff_pw_3.f) \
: "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f12", \ : "$8", "$9", "$f0", "$f2", "$f4", "$f6", "$f8", \
"$f14", "$f16", "memory" \ "$f14", "$f16", "memory" \
); \ ); \
} }
@ -2246,14 +2238,15 @@ void ff_put_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst /* align 8 */,
uint8_t *src /* align 1 */, uint8_t *src /* align 1 */,
ptrdiff_t stride, int h, int x, int y) ptrdiff_t stride, int h, int x, int y)
{ {
const int A = (8 - x) * (8 - y); union mmi_intfloat64 A, B, C, D;
const int B = (x) * (8 - y);
const int C = (8 - x) * (y);
const int D = (x) * (y);
double ftmp[10]; double ftmp[10];
uint32_t tmp[1]; uint32_t tmp[1];
DECLARE_VAR_ALL64; DECLARE_VAR_ALL64;
DECLARE_VAR_ADDRT; DECLARE_VAR_ADDRT;
A.i = (8 - x) * (8 - y);
B.i = (x) * (8 - y);
C.i = (8 - x) * (y);
D.i = (x) * (y);
av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0); av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
@ -2290,9 +2283,9 @@ void ff_put_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst /* align 8 */,
[src]"+&r"(src), [dst]"+&r"(dst), [src]"+&r"(src), [dst]"+&r"(dst),
[h]"+&r"(h) [h]"+&r"(h)
: [stride]"r"((mips_reg)stride), : [stride]"r"((mips_reg)stride),
[A]"f"(A), [B]"f"(B), [A]"f"(A.f), [B]"f"(B.f),
[C]"f"(C), [D]"f"(D), [C]"f"(C.f), [D]"f"(D.f),
[ff_pw_28]"f"(ff_pw_28) [ff_pw_28]"f"(ff_pw_28.f)
: "memory" : "memory"
); );
} }
@ -2301,14 +2294,15 @@ void ff_put_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst /* align 8 */,
uint8_t *src /* align 1 */, uint8_t *src /* align 1 */,
ptrdiff_t stride, int h, int x, int y) ptrdiff_t stride, int h, int x, int y)
{ {
const int A = (8 - x) * (8 - y); union mmi_intfloat64 A, B, C, D;
const int B = (x) * (8 - y);
const int C = (8 - x) * (y);
const int D = (x) * (y);
double ftmp[6]; double ftmp[6];
uint32_t tmp[1]; uint32_t tmp[1];
DECLARE_VAR_LOW32; DECLARE_VAR_LOW32;
DECLARE_VAR_ADDRT; DECLARE_VAR_ADDRT;
A.i = (8 - x) * (8 - y);
B.i = (x) * (8 - y);
C.i = (8 - x) * (y);
D.i = (x) * (y);
av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0); av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
@ -2343,9 +2337,9 @@ void ff_put_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst /* align 8 */,
[src]"+&r"(src), [dst]"+&r"(dst), [src]"+&r"(src), [dst]"+&r"(dst),
[h]"+&r"(h) [h]"+&r"(h)
: [stride]"r"((mips_reg)stride), : [stride]"r"((mips_reg)stride),
[A]"f"(A), [B]"f"(B), [A]"f"(A.f), [B]"f"(B.f),
[C]"f"(C), [D]"f"(D), [C]"f"(C.f), [D]"f"(D.f),
[ff_pw_28]"f"(ff_pw_28) [ff_pw_28]"f"(ff_pw_28.f)
: "memory" : "memory"
); );
} }
@ -2354,14 +2348,15 @@ void ff_avg_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst /* align 8 */,
uint8_t *src /* align 1 */, uint8_t *src /* align 1 */,
ptrdiff_t stride, int h, int x, int y) ptrdiff_t stride, int h, int x, int y)
{ {
const int A = (8 - x) * (8 - y); union mmi_intfloat64 A, B, C, D;
const int B = (x) * (8 - y);
const int C = (8 - x) * (y);
const int D = (x) * (y);
double ftmp[10]; double ftmp[10];
uint32_t tmp[1]; uint32_t tmp[1];
DECLARE_VAR_ALL64; DECLARE_VAR_ALL64;
DECLARE_VAR_ADDRT; DECLARE_VAR_ADDRT;
A.i = (8 - x) * (8 - y);
B.i = (x) * (8 - y);
C.i = (8 - x) * (y);
D.i = (x) * (y);
av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0); av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
@ -2401,9 +2396,9 @@ void ff_avg_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst /* align 8 */,
[src]"+&r"(src), [dst]"+&r"(dst), [src]"+&r"(src), [dst]"+&r"(dst),
[h]"+&r"(h) [h]"+&r"(h)
: [stride]"r"((mips_reg)stride), : [stride]"r"((mips_reg)stride),
[A]"f"(A), [B]"f"(B), [A]"f"(A.f), [B]"f"(B.f),
[C]"f"(C), [D]"f"(D), [C]"f"(C.f), [D]"f"(D.f),
[ff_pw_28]"f"(ff_pw_28) [ff_pw_28]"f"(ff_pw_28.f)
: "memory" : "memory"
); );
} }
@ -2412,14 +2407,15 @@ void ff_avg_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst /* align 8 */,
uint8_t *src /* align 1 */, uint8_t *src /* align 1 */,
ptrdiff_t stride, int h, int x, int y) ptrdiff_t stride, int h, int x, int y)
{ {
const int A = (8 - x) * (8 - y); union mmi_intfloat64 A, B, C, D;
const int B = ( x) * (8 - y);
const int C = (8 - x) * ( y);
const int D = ( x) * ( y);
double ftmp[6]; double ftmp[6];
uint32_t tmp[1]; uint32_t tmp[1];
DECLARE_VAR_LOW32; DECLARE_VAR_LOW32;
DECLARE_VAR_ADDRT; DECLARE_VAR_ADDRT;
A.i = (8 - x) * (8 - y);
B.i = (x) * (8 - y);
C.i = (8 - x) * (y);
D.i = (x) * (y);
av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0); av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
@ -2457,9 +2453,9 @@ void ff_avg_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst /* align 8 */,
[src]"+&r"(src), [dst]"+&r"(dst), [src]"+&r"(src), [dst]"+&r"(dst),
[h]"+&r"(h) [h]"+&r"(h)
: [stride]"r"((mips_reg)stride), : [stride]"r"((mips_reg)stride),
[A]"f"(A), [B]"f"(B), [A]"f"(A.f), [B]"f"(B.f),
[C]"f"(C), [D]"f"(D), [C]"f"(C.f), [D]"f"(D.f),
[ff_pw_28]"f"(ff_pw_28) [ff_pw_28]"f"(ff_pw_28.f)
: "memory" : "memory"
); );
} }

View File

@ -1128,12 +1128,14 @@ void ff_vp8_luma_dc_wht_dc_mmi(int16_t block[4][4][16], int16_t dc[16])
void ff_vp8_idct_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride) void ff_vp8_idct_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
{ {
#if 1 #if 1
DECLARE_ALIGNED(8, const uint64_t, ff_ph_4e7b) = {0x4e7b4e7b4e7b4e7bULL};
DECLARE_ALIGNED(8, const uint64_t, ff_ph_22a3) = {0x22a322a322a322a3ULL};
double ftmp[12]; double ftmp[12];
uint32_t tmp[1]; uint32_t tmp[1];
union av_intfloat64 ff_ph_4e7b_u;
union av_intfloat64 ff_ph_22a3_u;
DECLARE_VAR_LOW32; DECLARE_VAR_LOW32;
DECLARE_VAR_ALL64; DECLARE_VAR_ALL64;
ff_ph_4e7b_u.i = 0x4e7b4e7b4e7b4e7bULL;
ff_ph_22a3_u.i = 0x22a322a322a322a3ULL;
__asm__ volatile ( __asm__ volatile (
"pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
@ -1253,8 +1255,8 @@ void ff_vp8_idct_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
[tmp0]"=&r"(tmp[0]) [tmp0]"=&r"(tmp[0])
: [dst0]"r"(dst), [dst1]"r"(dst+stride), : [dst0]"r"(dst), [dst1]"r"(dst+stride),
[dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride), [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride),
[block]"r"(block), [ff_pw_4]"f"(ff_pw_4), [block]"r"(block), [ff_pw_4]"f"(ff_pw_4.f),
[ff_ph_4e7b]"f"(ff_ph_4e7b), [ff_ph_22a3]"f"(ff_ph_22a3) [ff_ph_4e7b]"f"(ff_ph_4e7b_u.f), [ff_ph_22a3]"f"(ff_ph_22a3_u.f)
: "memory" : "memory"
); );
#else #else
@ -1595,8 +1597,16 @@ void ff_put_vp8_epel16_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
const uint64_t *filter = fourtap_subpel_filters[mx - 1]; const uint64_t *filter = fourtap_subpel_filters[mx - 1];
double ftmp[9]; double ftmp[9];
uint32_t tmp[1]; uint32_t tmp[1];
union av_intfloat64 filter1;
union av_intfloat64 filter2;
union av_intfloat64 filter3;
union av_intfloat64 filter4;
mips_reg src1, dst1; mips_reg src1, dst1;
DECLARE_VAR_ALL64; DECLARE_VAR_ALL64;
filter1.i = filter[1];
filter2.i = filter[2];
filter3.i = filter[3];
filter4.i = filter[4];
/* /*
dst[0] = cm[(filter[2] * src[0] - filter[1] * src[-1] + filter[3] * src[1] - filter[4] * src[2] + 64) >> 7]; dst[0] = cm[(filter[2] * src[0] - filter[1] * src[-1] + filter[3] * src[1] - filter[4] * src[2] + 64) >> 7];
@ -1644,11 +1654,11 @@ void ff_put_vp8_epel16_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
[dst1]"=&r"(dst1), [src1]"=&r"(src1), [dst1]"=&r"(dst1), [src1]"=&r"(src1),
[h]"+&r"(h), [h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src) [dst]"+&r"(dst), [src]"+&r"(src)
: [ff_pw_64]"f"(ff_pw_64), : [ff_pw_64]"f"(ff_pw_64.f),
[srcstride]"r"((mips_reg)srcstride), [srcstride]"r"((mips_reg)srcstride),
[dststride]"r"((mips_reg)dststride), [dststride]"r"((mips_reg)dststride),
[filter1]"f"(filter[1]), [filter2]"f"(filter[2]), [filter1]"f"(filter1.f), [filter2]"f"(filter2.f),
[filter3]"f"(filter[3]), [filter4]"f"(filter[4]) [filter3]"f"(filter3.f), [filter4]"f"(filter4.f)
: "memory" : "memory"
); );
#else #else
@ -1672,7 +1682,16 @@ void ff_put_vp8_epel8_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
const uint64_t *filter = fourtap_subpel_filters[mx - 1]; const uint64_t *filter = fourtap_subpel_filters[mx - 1];
double ftmp[9]; double ftmp[9];
uint32_t tmp[1]; uint32_t tmp[1];
union av_intfloat64 filter1;
union av_intfloat64 filter2;
union av_intfloat64 filter3;
union av_intfloat64 filter4;
DECLARE_VAR_ALL64; DECLARE_VAR_ALL64;
filter1.i = filter[1];
filter2.i = filter[2];
filter3.i = filter[3];
filter4.i = filter[4];
/* /*
dst[0] = cm[(filter[2] * src[0] - filter[1] * src[-1] + filter[3] * src[1] - filter[4] * src[2] + 64) >> 7]; dst[0] = cm[(filter[2] * src[0] - filter[1] * src[-1] + filter[3] * src[1] - filter[4] * src[2] + 64) >> 7];
@ -1705,11 +1724,11 @@ void ff_put_vp8_epel8_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
RESTRICT_ASM_ALL64 RESTRICT_ASM_ALL64
[h]"+&r"(h), [h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src) [dst]"+&r"(dst), [src]"+&r"(src)
: [ff_pw_64]"f"(ff_pw_64), : [ff_pw_64]"f"(ff_pw_64.f),
[srcstride]"r"((mips_reg)srcstride), [srcstride]"r"((mips_reg)srcstride),
[dststride]"r"((mips_reg)dststride), [dststride]"r"((mips_reg)dststride),
[filter1]"f"(filter[1]), [filter2]"f"(filter[2]), [filter1]"f"(filter1.f), [filter2]"f"(filter2.f),
[filter3]"f"(filter[3]), [filter4]"f"(filter[4]) [filter3]"f"(filter3.f), [filter4]"f"(filter4.f)
: "memory" : "memory"
); );
#else #else
@ -1733,7 +1752,15 @@ void ff_put_vp8_epel4_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
const uint64_t *filter = fourtap_subpel_filters[mx - 1]; const uint64_t *filter = fourtap_subpel_filters[mx - 1];
double ftmp[6]; double ftmp[6];
uint32_t tmp[1]; uint32_t tmp[1];
union av_intfloat64 filter1;
union av_intfloat64 filter2;
union av_intfloat64 filter3;
union av_intfloat64 filter4;
DECLARE_VAR_LOW32; DECLARE_VAR_LOW32;
filter1.i = filter[1];
filter2.i = filter[2];
filter3.i = filter[3];
filter4.i = filter[4];
/* /*
dst[0] = cm[(filter[2] * src[0] - filter[1] * src[-1] + filter[3] * src[1] - filter[4] * src[2] + 64) >> 7]; dst[0] = cm[(filter[2] * src[0] - filter[1] * src[-1] + filter[3] * src[1] - filter[4] * src[2] + 64) >> 7];
@ -1760,11 +1787,11 @@ void ff_put_vp8_epel4_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
RESTRICT_ASM_LOW32 RESTRICT_ASM_LOW32
[h]"+&r"(h), [h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src) [dst]"+&r"(dst), [src]"+&r"(src)
: [ff_pw_64]"f"(ff_pw_64), : [ff_pw_64]"f"(ff_pw_64.f),
[srcstride]"r"((mips_reg)srcstride), [srcstride]"r"((mips_reg)srcstride),
[dststride]"r"((mips_reg)dststride), [dststride]"r"((mips_reg)dststride),
[filter1]"f"(filter[1]), [filter2]"f"(filter[2]), [filter1]"f"(filter1.f), [filter2]"f"(filter2.f),
[filter3]"f"(filter[3]), [filter4]"f"(filter[4]) [filter3]"f"(filter3.f), [filter4]"f"(filter4.f)
: "memory" : "memory"
); );
#else #else
@ -1789,7 +1816,19 @@ void ff_put_vp8_epel16_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
double ftmp[9]; double ftmp[9];
uint32_t tmp[1]; uint32_t tmp[1];
mips_reg src1, dst1; mips_reg src1, dst1;
union av_intfloat64 filter0;
union av_intfloat64 filter1;
union av_intfloat64 filter2;
union av_intfloat64 filter3;
union av_intfloat64 filter4;
union av_intfloat64 filter5;
DECLARE_VAR_ALL64; DECLARE_VAR_ALL64;
filter0.i = filter[0];
filter1.i = filter[1];
filter2.i = filter[2];
filter3.i = filter[3];
filter4.i = filter[4];
filter5.i = filter[5];
/* /*
dst[ 0] = cm[(filter[2]*src[ 0] - filter[1]*src[-1] + filter[0]*src[-2] + filter[3]*src[ 1] - filter[4]*src[ 2] + filter[5]*src[ 3] + 64) >> 7]; dst[ 0] = cm[(filter[2]*src[ 0] - filter[1]*src[-1] + filter[0]*src[-2] + filter[3]*src[ 1] - filter[4]*src[ 2] + filter[5]*src[ 3] + 64) >> 7];
@ -1837,12 +1876,12 @@ void ff_put_vp8_epel16_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
[dst1]"=&r"(dst1), [src1]"=&r"(src1), [dst1]"=&r"(dst1), [src1]"=&r"(src1),
[h]"+&r"(h), [h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src) [dst]"+&r"(dst), [src]"+&r"(src)
: [ff_pw_64]"f"(ff_pw_64), : [ff_pw_64]"f"(ff_pw_64.f),
[srcstride]"r"((mips_reg)srcstride), [srcstride]"r"((mips_reg)srcstride),
[dststride]"r"((mips_reg)dststride), [dststride]"r"((mips_reg)dststride),
[filter0]"f"(filter[0]), [filter1]"f"(filter[1]), [filter0]"f"(filter0.f), [filter1]"f"(filter1.f),
[filter2]"f"(filter[2]), [filter3]"f"(filter[3]), [filter2]"f"(filter2.f), [filter3]"f"(filter3.f),
[filter4]"f"(filter[4]), [filter5]"f"(filter[5]) [filter4]"f"(filter4.f), [filter5]"f"(filter5.f)
: "memory" : "memory"
); );
#else #else
@ -1866,7 +1905,19 @@ void ff_put_vp8_epel8_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
const uint64_t *filter = fourtap_subpel_filters[mx - 1]; const uint64_t *filter = fourtap_subpel_filters[mx - 1];
double ftmp[9]; double ftmp[9];
uint32_t tmp[1]; uint32_t tmp[1];
union av_intfloat64 filter0;
union av_intfloat64 filter1;
union av_intfloat64 filter2;
union av_intfloat64 filter3;
union av_intfloat64 filter4;
union av_intfloat64 filter5;
DECLARE_VAR_ALL64; DECLARE_VAR_ALL64;
filter0.i = filter[0];
filter1.i = filter[1];
filter2.i = filter[2];
filter3.i = filter[3];
filter4.i = filter[4];
filter5.i = filter[5];
/* /*
dst[0] = cm[(filter[2]*src[0] - filter[1]*src[-1] + filter[0]*src[-2] + filter[3]*src[1] - filter[4]*src[2] + filter[5]*src[ 3] + 64) >> 7]; dst[0] = cm[(filter[2]*src[0] - filter[1]*src[-1] + filter[0]*src[-2] + filter[3]*src[1] - filter[4]*src[2] + filter[5]*src[ 3] + 64) >> 7];
@ -1899,12 +1950,12 @@ void ff_put_vp8_epel8_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
RESTRICT_ASM_ALL64 RESTRICT_ASM_ALL64
[h]"+&r"(h), [h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src) [dst]"+&r"(dst), [src]"+&r"(src)
: [ff_pw_64]"f"(ff_pw_64), : [ff_pw_64]"f"(ff_pw_64.f),
[srcstride]"r"((mips_reg)srcstride), [srcstride]"r"((mips_reg)srcstride),
[dststride]"r"((mips_reg)dststride), [dststride]"r"((mips_reg)dststride),
[filter0]"f"(filter[0]), [filter1]"f"(filter[1]), [filter0]"f"(filter0.f), [filter1]"f"(filter1.f),
[filter2]"f"(filter[2]), [filter3]"f"(filter[3]), [filter2]"f"(filter2.f), [filter3]"f"(filter3.f),
[filter4]"f"(filter[4]), [filter5]"f"(filter[5]) [filter4]"f"(filter4.f), [filter5]"f"(filter5.f)
: "memory" : "memory"
); );
#else #else
@ -1928,7 +1979,19 @@ void ff_put_vp8_epel4_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
const uint64_t *filter = fourtap_subpel_filters[mx - 1]; const uint64_t *filter = fourtap_subpel_filters[mx - 1];
double ftmp[6]; double ftmp[6];
uint32_t tmp[1]; uint32_t tmp[1];
union av_intfloat64 filter0;
union av_intfloat64 filter1;
union av_intfloat64 filter2;
union av_intfloat64 filter3;
union av_intfloat64 filter4;
union av_intfloat64 filter5;
DECLARE_VAR_LOW32; DECLARE_VAR_LOW32;
filter0.i = filter[0];
filter1.i = filter[1];
filter2.i = filter[2];
filter3.i = filter[3];
filter4.i = filter[4];
filter5.i = filter[5];
/* /*
dst[0] = cm[(filter[2]*src[0] - filter[1]*src[-1] + filter[0]*src[-2] + filter[3]*src[1] - filter[4]*src[2] + filter[5]*src[ 3] + 64) >> 7]; dst[0] = cm[(filter[2]*src[0] - filter[1]*src[-1] + filter[0]*src[-2] + filter[3]*src[1] - filter[4]*src[2] + filter[5]*src[ 3] + 64) >> 7];
@ -1955,12 +2018,12 @@ void ff_put_vp8_epel4_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
RESTRICT_ASM_LOW32 RESTRICT_ASM_LOW32
[h]"+&r"(h), [h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src) [dst]"+&r"(dst), [src]"+&r"(src)
: [ff_pw_64]"f"(ff_pw_64), : [ff_pw_64]"f"(ff_pw_64.f),
[srcstride]"r"((mips_reg)srcstride), [srcstride]"r"((mips_reg)srcstride),
[dststride]"r"((mips_reg)dststride), [dststride]"r"((mips_reg)dststride),
[filter0]"f"(filter[0]), [filter1]"f"(filter[1]), [filter0]"f"(filter0.f), [filter1]"f"(filter1.f),
[filter2]"f"(filter[2]), [filter3]"f"(filter[3]), [filter2]"f"(filter2.f), [filter3]"f"(filter3.f),
[filter4]"f"(filter[4]), [filter5]"f"(filter[5]) [filter4]"f"(filter4.f), [filter5]"f"(filter5.f)
: "memory" : "memory"
); );
#else #else
@ -1985,7 +2048,15 @@ void ff_put_vp8_epel16_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
double ftmp[9]; double ftmp[9];
uint32_t tmp[1]; uint32_t tmp[1];
mips_reg src0, src1, dst0; mips_reg src0, src1, dst0;
union av_intfloat64 filter1;
union av_intfloat64 filter2;
union av_intfloat64 filter3;
union av_intfloat64 filter4;
DECLARE_VAR_ALL64; DECLARE_VAR_ALL64;
filter1.i = filter[1];
filter2.i = filter[2];
filter3.i = filter[3];
filter4.i = filter[4];
/* /*
dst[0] = cm[(filter[2] * src[0] - filter[1] * src[ -srcstride] + filter[3] * src[ srcstride] - filter[4] * src[ 2*srcstride] + 64) >> 7]; dst[0] = cm[(filter[2] * src[0] - filter[1] * src[ -srcstride] + filter[3] * src[ srcstride] - filter[4] * src[ 2*srcstride] + 64) >> 7];
@ -2034,11 +2105,11 @@ void ff_put_vp8_epel16_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
[src1]"=&r"(src1), [src1]"=&r"(src1),
[h]"+&r"(h), [h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src) [dst]"+&r"(dst), [src]"+&r"(src)
: [ff_pw_64]"f"(ff_pw_64), : [ff_pw_64]"f"(ff_pw_64.f),
[srcstride]"r"((mips_reg)srcstride), [srcstride]"r"((mips_reg)srcstride),
[dststride]"r"((mips_reg)dststride), [dststride]"r"((mips_reg)dststride),
[filter1]"f"(filter[1]), [filter2]"f"(filter[2]), [filter1]"f"(filter1.f), [filter2]"f"(filter2.f),
[filter3]"f"(filter[3]), [filter4]"f"(filter[4]) [filter3]"f"(filter3.f), [filter4]"f"(filter4.f)
: "memory" : "memory"
); );
#else #else
@ -2063,7 +2134,15 @@ void ff_put_vp8_epel8_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
double ftmp[9]; double ftmp[9];
uint32_t tmp[1]; uint32_t tmp[1];
mips_reg src1; mips_reg src1;
union av_intfloat64 filter1;
union av_intfloat64 filter2;
union av_intfloat64 filter3;
union av_intfloat64 filter4;
DECLARE_VAR_ALL64; DECLARE_VAR_ALL64;
filter1.i = filter[1];
filter2.i = filter[2];
filter3.i = filter[3];
filter4.i = filter[4];
/* /*
dst[0] = cm[(filter[2] * src[0] - filter[1] * src[ -srcstride] + filter[3] * src[ srcstride] - filter[4] * src[ 2*srcstride] + 64) >> 7]; dst[0] = cm[(filter[2] * src[0] - filter[1] * src[ -srcstride] + filter[3] * src[ srcstride] - filter[4] * src[ 2*srcstride] + 64) >> 7];
@ -2097,11 +2176,11 @@ void ff_put_vp8_epel8_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
[src1]"=&r"(src1), [src1]"=&r"(src1),
[h]"+&r"(h), [h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src) [dst]"+&r"(dst), [src]"+&r"(src)
: [ff_pw_64]"f"(ff_pw_64), : [ff_pw_64]"f"(ff_pw_64.f),
[srcstride]"r"((mips_reg)srcstride), [srcstride]"r"((mips_reg)srcstride),
[dststride]"r"((mips_reg)dststride), [dststride]"r"((mips_reg)dststride),
[filter1]"f"(filter[1]), [filter2]"f"(filter[2]), [filter1]"f"(filter1.f), [filter2]"f"(filter2.f),
[filter3]"f"(filter[3]), [filter4]"f"(filter[4]) [filter3]"f"(filter3.f), [filter4]"f"(filter4.f)
: "memory" : "memory"
); );
#else #else
@ -2126,7 +2205,15 @@ void ff_put_vp8_epel4_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
double ftmp[6]; double ftmp[6];
uint32_t tmp[1]; uint32_t tmp[1];
mips_reg src1; mips_reg src1;
union av_intfloat64 filter1;
union av_intfloat64 filter2;
union av_intfloat64 filter3;
union av_intfloat64 filter4;
DECLARE_VAR_LOW32; DECLARE_VAR_LOW32;
filter1.i = filter[1];
filter2.i = filter[2];
filter3.i = filter[3];
filter4.i = filter[4];
/* /*
dst[0] = cm[(filter[2] * src[0] - filter[1] * src[ -srcstride] + filter[3] * src[ srcstride] - filter[4] * src[ 2*srcstride] + 64) >> 7]; dst[0] = cm[(filter[2] * src[0] - filter[1] * src[ -srcstride] + filter[3] * src[ srcstride] - filter[4] * src[ 2*srcstride] + 64) >> 7];
@ -2154,11 +2241,11 @@ void ff_put_vp8_epel4_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
[src1]"=&r"(src1), [src1]"=&r"(src1),
[h]"+&r"(h), [h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src) [dst]"+&r"(dst), [src]"+&r"(src)
: [ff_pw_64]"f"(ff_pw_64), : [ff_pw_64]"f"(ff_pw_64.f),
[srcstride]"r"((mips_reg)srcstride), [srcstride]"r"((mips_reg)srcstride),
[dststride]"r"((mips_reg)dststride), [dststride]"r"((mips_reg)dststride),
[filter1]"f"(filter[1]), [filter2]"f"(filter[2]), [filter1]"f"(filter1.f), [filter2]"f"(filter2.f),
[filter3]"f"(filter[3]), [filter4]"f"(filter[4]) [filter3]"f"(filter3.f), [filter4]"f"(filter4.f)
: "memory" : "memory"
); );
#else #else
@ -2183,7 +2270,19 @@ void ff_put_vp8_epel16_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
double ftmp[9]; double ftmp[9];
uint32_t tmp[1]; uint32_t tmp[1];
mips_reg src0, src1, dst0; mips_reg src0, src1, dst0;
union av_intfloat64 filter0;
union av_intfloat64 filter1;
union av_intfloat64 filter2;
union av_intfloat64 filter3;
union av_intfloat64 filter4;
union av_intfloat64 filter5;
DECLARE_VAR_ALL64; DECLARE_VAR_ALL64;
filter0.i = filter[0];
filter1.i = filter[1];
filter2.i = filter[2];
filter3.i = filter[3];
filter4.i = filter[4];
filter5.i = filter[5];
/* /*
dst[0] = cm[(filter[2]*src[0] - filter[1]*src[0-srcstride] + filter[0]*src[0-2*srcstride] + filter[3]*src[0+srcstride] - filter[4]*src[0+2*srcstride] + filter[5]*src[0+3*srcstride] + 64) >> 7]; dst[0] = cm[(filter[2]*src[0] - filter[1]*src[0-srcstride] + filter[0]*src[0-2*srcstride] + filter[3]*src[0+srcstride] - filter[4]*src[0+2*srcstride] + filter[5]*src[0+3*srcstride] + 64) >> 7];
@ -2232,12 +2331,12 @@ void ff_put_vp8_epel16_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
[src1]"=&r"(src1), [src1]"=&r"(src1),
[h]"+&r"(h), [h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src) [dst]"+&r"(dst), [src]"+&r"(src)
: [ff_pw_64]"f"(ff_pw_64), : [ff_pw_64]"f"(ff_pw_64.f),
[srcstride]"r"((mips_reg)srcstride), [srcstride]"r"((mips_reg)srcstride),
[dststride]"r"((mips_reg)dststride), [dststride]"r"((mips_reg)dststride),
[filter0]"f"(filter[0]), [filter1]"f"(filter[1]), [filter0]"f"(filter0.f), [filter1]"f"(filter1.f),
[filter2]"f"(filter[2]), [filter3]"f"(filter[3]), [filter2]"f"(filter2.f), [filter3]"f"(filter3.f),
[filter4]"f"(filter[4]), [filter5]"f"(filter[5]) [filter4]"f"(filter4.f), [filter5]"f"(filter5.f)
: "memory" : "memory"
); );
#else #else
@ -2262,7 +2361,19 @@ void ff_put_vp8_epel8_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
double ftmp[9]; double ftmp[9];
uint32_t tmp[1]; uint32_t tmp[1];
mips_reg src1; mips_reg src1;
union av_intfloat64 filter0;
union av_intfloat64 filter1;
union av_intfloat64 filter2;
union av_intfloat64 filter3;
union av_intfloat64 filter4;
union av_intfloat64 filter5;
DECLARE_VAR_ALL64; DECLARE_VAR_ALL64;
filter0.i = filter[0];
filter1.i = filter[1];
filter2.i = filter[2];
filter3.i = filter[3];
filter4.i = filter[4];
filter5.i = filter[5];
/* /*
dst[0] = cm[(filter[2]*src[0] - filter[1]*src[0-srcstride] + filter[0]*src[0-2*srcstride] + filter[3]*src[0+srcstride] - filter[4]*src[0+2*srcstride] + filter[5]*src[0+3*srcstride] + 64) >> 7]; dst[0] = cm[(filter[2]*src[0] - filter[1]*src[0-srcstride] + filter[0]*src[0-2*srcstride] + filter[3]*src[0+srcstride] - filter[4]*src[0+2*srcstride] + filter[5]*src[0+3*srcstride] + 64) >> 7];
@ -2296,12 +2407,12 @@ void ff_put_vp8_epel8_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
[src1]"=&r"(src1), [src1]"=&r"(src1),
[h]"+&r"(h), [h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src) [dst]"+&r"(dst), [src]"+&r"(src)
: [ff_pw_64]"f"(ff_pw_64), : [ff_pw_64]"f"(ff_pw_64.f),
[srcstride]"r"((mips_reg)srcstride), [srcstride]"r"((mips_reg)srcstride),
[dststride]"r"((mips_reg)dststride), [dststride]"r"((mips_reg)dststride),
[filter0]"f"(filter[0]), [filter1]"f"(filter[1]), [filter0]"f"(filter0.f), [filter1]"f"(filter1.f),
[filter2]"f"(filter[2]), [filter3]"f"(filter[3]), [filter2]"f"(filter2.f), [filter3]"f"(filter3.f),
[filter4]"f"(filter[4]), [filter5]"f"(filter[5]) [filter4]"f"(filter4.f), [filter5]"f"(filter5.f)
: "memory" : "memory"
); );
#else #else
@ -2326,7 +2437,19 @@ void ff_put_vp8_epel4_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
double ftmp[6]; double ftmp[6];
uint32_t tmp[1]; uint32_t tmp[1];
mips_reg src1; mips_reg src1;
union av_intfloat64 filter0;
union av_intfloat64 filter1;
union av_intfloat64 filter2;
union av_intfloat64 filter3;
union av_intfloat64 filter4;
union av_intfloat64 filter5;
DECLARE_VAR_LOW32; DECLARE_VAR_LOW32;
filter0.i = filter[0];
filter1.i = filter[1];
filter2.i = filter[2];
filter3.i = filter[3];
filter4.i = filter[4];
filter5.i = filter[5];
/* /*
dst[0] = cm[(filter[2]*src[0] - filter[1]*src[0-srcstride] + filter[0]*src[0-2*srcstride] + filter[3]*src[0+srcstride] - filter[4]*src[0+2*srcstride] + filter[5]*src[0+3*srcstride] + 64) >> 7]; dst[0] = cm[(filter[2]*src[0] - filter[1]*src[0-srcstride] + filter[0]*src[0-2*srcstride] + filter[3]*src[0+srcstride] - filter[4]*src[0+2*srcstride] + filter[5]*src[0+3*srcstride] + 64) >> 7];
@ -2354,12 +2477,12 @@ void ff_put_vp8_epel4_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
[src1]"=&r"(src1), [src1]"=&r"(src1),
[h]"+&r"(h), [h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src) [dst]"+&r"(dst), [src]"+&r"(src)
: [ff_pw_64]"f"(ff_pw_64), : [ff_pw_64]"f"(ff_pw_64.f),
[srcstride]"r"((mips_reg)srcstride), [srcstride]"r"((mips_reg)srcstride),
[dststride]"r"((mips_reg)dststride), [dststride]"r"((mips_reg)dststride),
[filter0]"f"(filter[0]), [filter1]"f"(filter[1]), [filter0]"f"(filter0.f), [filter1]"f"(filter1.f),
[filter2]"f"(filter[2]), [filter3]"f"(filter[3]), [filter2]"f"(filter2.f), [filter3]"f"(filter3.f),
[filter4]"f"(filter[4]), [filter5]"f"(filter[5]) [filter4]"f"(filter4.f), [filter5]"f"(filter5.f)
: "memory" : "memory"
); );
#else #else
@ -2847,11 +2970,13 @@ void ff_put_vp8_bilinear16_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
ptrdiff_t sstride, int h, int mx, int my) ptrdiff_t sstride, int h, int mx, int my)
{ {
#if 1 #if 1
int a = 8 - mx, b = mx; union mmi_intfloat64 a, b;
double ftmp[7]; double ftmp[7];
uint32_t tmp[1]; uint32_t tmp[1];
mips_reg dst0, src0; mips_reg dst0, src0;
DECLARE_VAR_ALL64; DECLARE_VAR_ALL64;
a.i = 8 - mx;
b.i = mx;
/* /*
dst[0] = (a * src[0] + b * src[1] + 4) >> 3; dst[0] = (a * src[0] + b * src[1] + 4) >> 3;
@ -2900,10 +3025,10 @@ void ff_put_vp8_bilinear16_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
[dst0]"=&r"(dst0), [src0]"=&r"(src0), [dst0]"=&r"(dst0), [src0]"=&r"(src0),
[h]"+&r"(h), [h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src), [dst]"+&r"(dst), [src]"+&r"(src),
[a]"+&f"(a), [b]"+&f"(b) [a]"+&f"(a.f), [b]"+&f"(b.f)
: [sstride]"r"((mips_reg)sstride), : [sstride]"r"((mips_reg)sstride),
[dstride]"r"((mips_reg)dstride), [dstride]"r"((mips_reg)dstride),
[ff_pw_4]"f"(ff_pw_4) [ff_pw_4]"f"(ff_pw_4.f)
: "memory" : "memory"
); );
#else #else
@ -2923,11 +3048,13 @@ void ff_put_vp8_bilinear16_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
ptrdiff_t sstride, int h, int mx, int my) ptrdiff_t sstride, int h, int mx, int my)
{ {
#if 1 #if 1
int c = 8 - my, d = my; union mmi_intfloat64 c, d;
double ftmp[7]; double ftmp[7];
uint32_t tmp[1]; uint32_t tmp[1];
mips_reg src0, src1, dst0; mips_reg src0, src1, dst0;
DECLARE_VAR_ALL64; DECLARE_VAR_ALL64;
c.i = 8 - my;
d.i = my;
/* /*
dst[0] = (c * src[0] + d * src[ sstride] + 4) >> 3; dst[0] = (c * src[0] + d * src[ sstride] + 4) >> 3;
@ -2968,10 +3095,10 @@ void ff_put_vp8_bilinear16_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
[src1]"=&r"(src1), [src1]"=&r"(src1),
[h]"+&r"(h), [h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src), [dst]"+&r"(dst), [src]"+&r"(src),
[c]"+&f"(c), [d]"+&f"(d) [c]"+&f"(c.f), [d]"+&f"(d.f)
: [sstride]"r"((mips_reg)sstride), : [sstride]"r"((mips_reg)sstride),
[dstride]"r"((mips_reg)dstride), [dstride]"r"((mips_reg)dstride),
[ff_pw_4]"f"(ff_pw_4) [ff_pw_4]"f"(ff_pw_4.f)
: "memory" : "memory"
); );
#else #else
@ -3025,10 +3152,12 @@ void ff_put_vp8_bilinear8_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
ptrdiff_t sstride, int h, int mx, int my) ptrdiff_t sstride, int h, int mx, int my)
{ {
#if 1 #if 1
int a = 8 - mx, b = mx; union mmi_intfloat64 a, b;
double ftmp[7]; double ftmp[7];
uint32_t tmp[1]; uint32_t tmp[1];
DECLARE_VAR_ALL64; DECLARE_VAR_ALL64;
a.i = 8 - mx;
b.i = mx;
/* /*
dst[0] = (a * src[0] + b * src[1] + 4) >> 3; dst[0] = (a * src[0] + b * src[1] + 4) >> 3;
@ -3062,10 +3191,10 @@ void ff_put_vp8_bilinear8_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
RESTRICT_ASM_ALL64 RESTRICT_ASM_ALL64
[h]"+&r"(h), [h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src), [dst]"+&r"(dst), [src]"+&r"(src),
[a]"+&f"(a), [b]"+&f"(b) [a]"+&f"(a.f), [b]"+&f"(b.f)
: [sstride]"r"((mips_reg)sstride), : [sstride]"r"((mips_reg)sstride),
[dstride]"r"((mips_reg)dstride), [dstride]"r"((mips_reg)dstride),
[ff_pw_4]"f"(ff_pw_4) [ff_pw_4]"f"(ff_pw_4.f)
: "memory" : "memory"
); );
#else #else
@ -3085,11 +3214,13 @@ void ff_put_vp8_bilinear8_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
ptrdiff_t sstride, int h, int mx, int my) ptrdiff_t sstride, int h, int mx, int my)
{ {
#if 1 #if 1
int c = 8 - my, d = my; union mmi_intfloat64 c, d;
double ftmp[7]; double ftmp[7];
uint32_t tmp[1]; uint32_t tmp[1];
mips_reg src1; mips_reg src1;
DECLARE_VAR_ALL64; DECLARE_VAR_ALL64;
c.i = 8 - my;
d.i = my;
/* /*
dst[0] = (c * src[0] + d * src[ sstride] + 4) >> 3; dst[0] = (c * src[0] + d * src[ sstride] + 4) >> 3;
@ -3124,10 +3255,10 @@ void ff_put_vp8_bilinear8_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
[src1]"=&r"(src1), [src1]"=&r"(src1),
[h]"+&r"(h), [h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src), [dst]"+&r"(dst), [src]"+&r"(src),
[c]"+&f"(c), [d]"+&f"(d) [c]"+&f"(c.f), [d]"+&f"(d.f)
: [sstride]"r"((mips_reg)sstride), : [sstride]"r"((mips_reg)sstride),
[dstride]"r"((mips_reg)dstride), [dstride]"r"((mips_reg)dstride),
[ff_pw_4]"f"(ff_pw_4) [ff_pw_4]"f"(ff_pw_4.f)
: "memory" : "memory"
); );
#else #else
@ -3181,11 +3312,13 @@ void ff_put_vp8_bilinear4_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
ptrdiff_t sstride, int h, int mx, int my) ptrdiff_t sstride, int h, int mx, int my)
{ {
#if 1 #if 1
int a = 8 - mx, b = mx; union mmi_intfloat64 a, b;
double ftmp[5]; double ftmp[5];
uint32_t tmp[1]; uint32_t tmp[1];
DECLARE_VAR_LOW32; DECLARE_VAR_LOW32;
DECLARE_VAR_ALL64; DECLARE_VAR_ALL64;
a.i = 8 - mx;
b.i = mx;
/* /*
dst[0] = (a * src[0] + b * src[1] + 4) >> 3; dst[0] = (a * src[0] + b * src[1] + 4) >> 3;
@ -3215,10 +3348,10 @@ void ff_put_vp8_bilinear4_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
RESTRICT_ASM_ALL64 RESTRICT_ASM_ALL64
[h]"+&r"(h), [h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src), [dst]"+&r"(dst), [src]"+&r"(src),
[a]"+&f"(a), [b]"+&f"(b) [a]"+&f"(a.f), [b]"+&f"(b.f)
: [sstride]"r"((mips_reg)sstride), : [sstride]"r"((mips_reg)sstride),
[dstride]"r"((mips_reg)dstride), [dstride]"r"((mips_reg)dstride),
[ff_pw_4]"f"(ff_pw_4) [ff_pw_4]"f"(ff_pw_4.f)
: "memory" : "memory"
); );
#else #else
@ -3238,12 +3371,14 @@ void ff_put_vp8_bilinear4_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
ptrdiff_t sstride, int h, int mx, int my) ptrdiff_t sstride, int h, int mx, int my)
{ {
#if 1 #if 1
int c = 8 - my, d = my; union mmi_intfloat64 c, d;
double ftmp[7]; double ftmp[7];
uint32_t tmp[1]; uint32_t tmp[1];
mips_reg src1; mips_reg src1;
DECLARE_VAR_LOW32; DECLARE_VAR_LOW32;
DECLARE_VAR_ALL64; DECLARE_VAR_ALL64;
c.i = 8 - my;
d.i = my;
/* /*
dst[0] = (c * src[0] + d * src[ sstride] + 4) >> 3; dst[0] = (c * src[0] + d * src[ sstride] + 4) >> 3;
@ -3274,10 +3409,10 @@ void ff_put_vp8_bilinear4_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
[src1]"=&r"(src1), [src1]"=&r"(src1),
[h]"+&r"(h), [h]"+&r"(h),
[dst]"+&r"(dst), [src]"+&r"(src), [dst]"+&r"(dst), [src]"+&r"(src),
[c]"+&f"(c), [d]"+&f"(d) [c]"+&f"(c.f), [d]"+&f"(d.f)
: [sstride]"r"((mips_reg)sstride), : [sstride]"r"((mips_reg)sstride),
[dstride]"r"((mips_reg)dstride), [dstride]"r"((mips_reg)dstride),
[ff_pw_4]"f"(ff_pw_4) [ff_pw_4]"f"(ff_pw_4.f)
: "memory" : "memory"
); );
#else #else

View File

@ -27,6 +27,8 @@
#ifndef AVUTIL_MIPS_ASMDEFS_H #ifndef AVUTIL_MIPS_ASMDEFS_H
#define AVUTIL_MIPS_ASMDEFS_H #define AVUTIL_MIPS_ASMDEFS_H
#include <stdint.h>
#if defined(_ABI64) && _MIPS_SIM == _ABI64 #if defined(_ABI64) && _MIPS_SIM == _ABI64
# define mips_reg int64_t # define mips_reg int64_t
# define PTRSIZE " 8 " # define PTRSIZE " 8 "
@ -97,4 +99,10 @@ __asm__(".macro parse_r var r\n\t"
".endif\n\t" ".endif\n\t"
".endm"); ".endm");
/* General union structure for clang adaption */
union mmi_intfloat64 {
int64_t i;
double f;
};
#endif /* AVCODEC_MIPS_ASMDEFS_H */ #endif /* AVCODEC_MIPS_ASMDEFS_H */