You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-11-23 21:54:53 +02:00
avcodec/x86/qpel: Add specializations for put_l2 functions
These functions are currently always called with height either
being equal to the block size or block size+1. height is
a compile-time constant at every callsite. This makes it possible
to split this function into two to avoid the check inside
the function for whether height is odd or even.
The corresponding avg function is only used with height == block size,
so that it does not have a height parameter at all. Removing the
parameter from the put_l2 functions as well therefore simplifies
the C code.
The new functions increase the size of .text from qpel{dsp}.o
by 32B here, yet they save 464B of C code here.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
@@ -32,20 +32,18 @@
|
||||
|
||||
#if HAVE_X86ASM
|
||||
void ff_avg_pixels4_mmxext(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
void ff_put_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
void ff_put_pixels4x4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
ptrdiff_t stride);
|
||||
void ff_avg_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
void ff_avg_pixels4x4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
ptrdiff_t stride);
|
||||
#define ff_put_pixels4_l2_mmxext(dst, src1, src2, dststride, src1stride, h) \
|
||||
ff_put_pixels4_l2_mmxext((dst), (src1), (src2), (dststride))
|
||||
#define ff_avg_pixels4_l2_mmxext(dst, src1, src2, dststride, src1stride, h) \
|
||||
ff_avg_pixels4_l2_mmxext((dst), (src1), (src2), (dststride))
|
||||
#define ff_put_pixels8_l2_sse2 ff_put_pixels8_l2_mmxext
|
||||
#define ff_avg_pixels8_l2_sse2(dst, src1, src2, dststride, src1stride, h) \
|
||||
ff_avg_pixels8_l2_mmxext((dst), (src1), (src2), (dststride), (src1stride))
|
||||
#define ff_put_pixels16_l2_sse2 ff_put_pixels16_l2_mmxext
|
||||
#define ff_avg_pixels16_l2_sse2(dst, src1, src2, dststride, src1stride, h) \
|
||||
ff_avg_pixels16_l2_mmxext((dst), (src1), (src2), (dststride), (src1stride))
|
||||
#define ff_put_pixels4x4_l2_mmxext(dst, src1, src2, dststride, src1stride) \
|
||||
ff_put_pixels4x4_l2_mmxext((dst), (src1), (src2), (dststride))
|
||||
#define ff_avg_pixels4x4_l2_mmxext(dst, src1, src2, dststride, src1stride) \
|
||||
ff_avg_pixels4x4_l2_mmxext((dst), (src1), (src2), (dststride))
|
||||
#define ff_put_pixels8x8_l2_sse2 ff_put_pixels8x8_l2_mmxext
|
||||
#define ff_avg_pixels8x8_l2_sse2 ff_avg_pixels8x8_l2_mmxext
|
||||
#define ff_put_pixels16x16_l2_sse2 ff_put_pixels16x16_l2_mmxext
|
||||
#define ff_avg_pixels16x16_l2_sse2 ff_avg_pixels16x16_l2_mmxext
|
||||
|
||||
#define DEF_QPEL(OPNAME)\
|
||||
void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride);\
|
||||
@@ -177,7 +175,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, const uin
|
||||
{\
|
||||
LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
|
||||
ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
|
||||
ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\
|
||||
ff_ ## OPNAME ## pixels ## SIZE ## x ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride);\
|
||||
}\
|
||||
\
|
||||
static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
|
||||
@@ -189,7 +187,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uin
|
||||
{\
|
||||
LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
|
||||
ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
|
||||
ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\
|
||||
ff_ ## OPNAME ## pixels ## SIZE ## x ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride);\
|
||||
}\
|
||||
|
||||
#define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN, SHIFT5_EXT) \
|
||||
|
||||
@@ -69,11 +69,11 @@ cglobal avg_pixels4, 3,4
|
||||
mova %2, %1
|
||||
%endmacro
|
||||
|
||||
; void ff_put/avg_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
; void ff_put/avg_pixels4x4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
; ptrdiff_t stride)
|
||||
%macro PIXELS4_L2 1
|
||||
%define OP op_%1h
|
||||
cglobal %1_pixels4_l2, 4,4
|
||||
cglobal %1_pixels4x4_l2, 4,4
|
||||
mova m0, [r1]
|
||||
mova m1, [r1+r3]
|
||||
lea r1, [r1+2*r3]
|
||||
|
||||
@@ -37,11 +37,9 @@ SECTION .text
|
||||
%macro PIXELS8_L2 1
|
||||
%define OP op_%1
|
||||
%ifidn %1, put
|
||||
; void ff_put_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
|
||||
; ptrdiff_t dstStride, ptrdiff_t src1Stride, int h)
|
||||
cglobal put_pixels8_l2, 6,6
|
||||
test r5d, 1
|
||||
je .loop
|
||||
; void ff_put_pixels8x9_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
; ptrdiff_t dstStride, ptrdiff_t src1Stride)
|
||||
cglobal put_pixels8x9_l2, 5,6
|
||||
mova m0, [r1]
|
||||
mova m1, [r2]
|
||||
add r1, r4
|
||||
@@ -49,13 +47,14 @@ cglobal put_pixels8_l2, 6,6
|
||||
pavgb m0, m1
|
||||
OP m0, [r0]
|
||||
add r0, r3
|
||||
dec r5d
|
||||
%else
|
||||
; void ff_avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
|
||||
; ptrdiff_t dstStride, ptrdiff_t src1Stride)
|
||||
cglobal avg_pixels8_l2, 5,6
|
||||
mov r5d, 8
|
||||
; FIXME: avoid jump if prologue is empty
|
||||
jmp %1_pixels8x8_after_prologue_ %+ cpuname
|
||||
%endif
|
||||
; void ff_avg/put_pixels8x8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
; ptrdiff_t dstStride, ptrdiff_t src1Stride)
|
||||
cglobal %1_pixels8x8_l2, 5,6
|
||||
%1_pixels8x8_after_prologue_ %+ cpuname:
|
||||
mov r5d, 8
|
||||
.loop:
|
||||
mova m0, [r1]
|
||||
mova m1, [r1+r4]
|
||||
@@ -86,11 +85,9 @@ PIXELS8_L2 avg
|
||||
%macro PIXELS16_L2 1
|
||||
%define OP op_%1
|
||||
%ifidn %1, put
|
||||
; void ff_put_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
|
||||
; ptrdiff_t dstStride, ptrdiff_t src1Stride, int h)
|
||||
cglobal put_pixels16_l2, 6,6
|
||||
test r5d, 1
|
||||
je .loop
|
||||
; void ff_put_pixels16x17_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
; ptrdiff_t dstStride, ptrdiff_t src1Stride)
|
||||
cglobal put_pixels16x17_l2, 5,6
|
||||
mova m0, [r1]
|
||||
mova m1, [r1+8]
|
||||
pavgb m0, [r2]
|
||||
@@ -100,13 +97,14 @@ cglobal put_pixels16_l2, 6,6
|
||||
OP m0, [r0]
|
||||
OP m1, [r0+8]
|
||||
add r0, r3
|
||||
dec r5d
|
||||
%else
|
||||
; void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
|
||||
; ptrdiff_t dstStride, ptrdiff_t src1Stride)
|
||||
cglobal avg_pixels16_l2, 5,6
|
||||
mov r5d, 16
|
||||
; FIXME: avoid jump if prologue is empty
|
||||
jmp %1_pixels16x16_after_prologue_ %+ cpuname
|
||||
%endif
|
||||
; void ff_avg/put_pixels16x16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
; ptrdiff_t dstStride, ptrdiff_t src1Stride)
|
||||
cglobal %1_pixels16x16_l2, 5,6
|
||||
%1_pixels16x16_after_prologue_ %+ cpuname:
|
||||
mov r5d, 16
|
||||
.loop:
|
||||
mova m0, [r1]
|
||||
mova m1, [r1+8]
|
||||
|
||||
@@ -22,16 +22,16 @@
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
void ff_put_pixels8_l2_mmxext(uint8_t *dst,
|
||||
const uint8_t *src1, const uint8_t *src2,
|
||||
ptrdiff_t dstStride, ptrdiff_t src1Stride, int h);
|
||||
void ff_avg_pixels8_l2_mmxext(uint8_t *dst,
|
||||
void ff_put_pixels8x8_l2_mmxext(uint8_t *dst,
|
||||
const uint8_t *src1, const uint8_t *src2,
|
||||
ptrdiff_t dstStride, ptrdiff_t src1Stride);
|
||||
void ff_put_pixels16_l2_mmxext(uint8_t *dst,
|
||||
void ff_avg_pixels8x8_l2_mmxext(uint8_t *dst,
|
||||
const uint8_t *src1, const uint8_t *src2,
|
||||
ptrdiff_t dstStride, ptrdiff_t src1Stride, int h);
|
||||
void ff_avg_pixels16_l2_mmxext(uint8_t *dst,
|
||||
ptrdiff_t dstStride, ptrdiff_t src1Stride);
|
||||
void ff_put_pixels16x16_l2_mmxext(uint8_t *dst,
|
||||
const uint8_t *src1, const uint8_t *src2,
|
||||
ptrdiff_t dstStride, ptrdiff_t src1Stride);
|
||||
void ff_avg_pixels16x16_l2_mmxext(uint8_t *dst,
|
||||
const uint8_t *src1, const uint8_t *src2,
|
||||
ptrdiff_t dstStride, ptrdiff_t src1Stride);
|
||||
|
||||
|
||||
@@ -32,13 +32,11 @@ cextern pw_20
|
||||
|
||||
SECTION .text
|
||||
|
||||
; void ff_put_no_rnd_pixels8_l2(uint8_t *dst, uint8_t *src1, uint8_t *src2,
|
||||
; ptrdiff_t dstStride, ptrdiff_t src1Stride, int h)
|
||||
%macro PUT_NO_RND_PIXELS8_L2 0
|
||||
cglobal put_no_rnd_pixels8_l2, 6,6
|
||||
; void ff_put_no_rnd_pixels8x9_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
; ptrdiff_t dstStride, ptrdiff_t src1Stride)
|
||||
cglobal put_no_rnd_pixels8x9_l2, 5,6
|
||||
pcmpeqb m6, m6
|
||||
test r5d, 1
|
||||
je .loop
|
||||
mova m0, [r1]
|
||||
mova m1, [r2]
|
||||
add r1, r4
|
||||
@@ -49,7 +47,14 @@ cglobal put_no_rnd_pixels8_l2, 6,6
|
||||
pxor m0, m6
|
||||
mova [r0], m0
|
||||
add r0, r3
|
||||
dec r5d
|
||||
jmp put_no_rnd_pixels8x8_after_prologue_ %+ cpuname
|
||||
|
||||
; void ff_put_no_rnd_pixels8x8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
; ptrdiff_t dstStride, ptrdiff_t src1Stride)
|
||||
cglobal put_no_rnd_pixels8x8_l2, 5,6
|
||||
pcmpeqb m6, m6
|
||||
put_no_rnd_pixels8x8_after_prologue_ %+ cpuname:
|
||||
mov r5d, 8
|
||||
.loop:
|
||||
mova m0, [r1]
|
||||
add r1, r4
|
||||
@@ -97,13 +102,11 @@ INIT_MMX mmxext
|
||||
PUT_NO_RND_PIXELS8_L2
|
||||
|
||||
|
||||
; void ff_put_no_rnd_pixels16_l2(uint8_t *dst, uint8_t *src1, uint8_t *src2,
|
||||
; ptrdiff_t dstStride, ptrdiff_t src1Stride, int h)
|
||||
; void ff_put_no_rnd_pixels16x17_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
; ptrdiff_t dstStride, ptrdiff_t src1Stride)
|
||||
%macro PUT_NO_RND_PIXELS16_l2 0
|
||||
cglobal put_no_rnd_pixels16_l2, 6,6
|
||||
cglobal put_no_rnd_pixels16x17_l2, 5,6
|
||||
pcmpeqb m6, m6
|
||||
test r5d, 1
|
||||
je .loop
|
||||
mova m0, [r1]
|
||||
mova m1, [r1+8]
|
||||
mova m2, [r2]
|
||||
@@ -121,7 +124,14 @@ cglobal put_no_rnd_pixels16_l2, 6,6
|
||||
mova [r0], m0
|
||||
mova [r0+8], m1
|
||||
add r0, r3
|
||||
dec r5d
|
||||
jmp put_no_rnd_pixels16x16_after_prologue_ %+ cpuname
|
||||
|
||||
; void ff_put_no_rnd_pixels16x16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
; ptrdiff_t dstStride, ptrdiff_t src1Stride)
|
||||
cglobal put_no_rnd_pixels16x16_l2, 5,6
|
||||
pcmpeqb m6, m6
|
||||
put_no_rnd_pixels16x16_after_prologue_ %+ cpuname:
|
||||
mov r5d, 16
|
||||
.loop:
|
||||
mova m0, [r1]
|
||||
mova m1, [r1+8]
|
||||
|
||||
@@ -31,12 +31,24 @@
|
||||
#include "fpel.h"
|
||||
#include "qpel.h"
|
||||
|
||||
void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst,
|
||||
void ff_put_pixels8x9_l2_mmxext(uint8_t *dst,
|
||||
const uint8_t *src1, const uint8_t *src2,
|
||||
ptrdiff_t dstStride, ptrdiff_t src1Stride, int h);
|
||||
void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst,
|
||||
ptrdiff_t dstStride, ptrdiff_t src1Stride);
|
||||
void ff_put_pixels16x17_l2_mmxext(uint8_t *dst,
|
||||
const uint8_t *src1, const uint8_t *src2,
|
||||
ptrdiff_t dstStride, ptrdiff_t src1Stride, int h);
|
||||
ptrdiff_t dstStride, ptrdiff_t src1Stride);
|
||||
void ff_put_no_rnd_pixels8x8_l2_mmxext(uint8_t *dst,
|
||||
const uint8_t *src1, const uint8_t *src2,
|
||||
ptrdiff_t dstStride, ptrdiff_t src1Stride);
|
||||
void ff_put_no_rnd_pixels8x9_l2_mmxext(uint8_t *dst,
|
||||
const uint8_t *src1, const uint8_t *src2,
|
||||
ptrdiff_t dstStride, ptrdiff_t src1Stride);
|
||||
void ff_put_no_rnd_pixels16x16_l2_mmxext(uint8_t *dst,
|
||||
const uint8_t *src1, const uint8_t *src2,
|
||||
ptrdiff_t dstStride, ptrdiff_t src1Stride);
|
||||
void ff_put_no_rnd_pixels16x17_l2_mmxext(uint8_t *dst,
|
||||
const uint8_t *src1, const uint8_t *src2,
|
||||
ptrdiff_t dstStride, ptrdiff_t src1Stride);
|
||||
void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
|
||||
ptrdiff_t dstStride, ptrdiff_t srcStride, int h);
|
||||
void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
|
||||
@@ -70,7 +82,7 @@ void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst,
|
||||
|
||||
#if HAVE_X86ASM
|
||||
|
||||
#define QPEL_OP(OPNAME, RND, MMX, ARG) \
|
||||
#define QPEL_OP(OPNAME, RND, MMX) \
|
||||
static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, \
|
||||
const uint8_t *src, \
|
||||
ptrdiff_t stride) \
|
||||
@@ -79,8 +91,8 @@ static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, \
|
||||
uint8_t *const half = (uint8_t *) temp; \
|
||||
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
|
||||
stride, 8); \
|
||||
ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, src, half, \
|
||||
stride, stride, 8)); \
|
||||
ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, src, half, \
|
||||
stride, stride); \
|
||||
} \
|
||||
\
|
||||
static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, \
|
||||
@@ -99,8 +111,8 @@ static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, \
|
||||
uint8_t *const half = (uint8_t *) temp; \
|
||||
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
|
||||
stride, 8); \
|
||||
ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, src + 1, half, \
|
||||
stride, stride, 8)); \
|
||||
ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, src + 1, half, \
|
||||
stride, stride); \
|
||||
} \
|
||||
\
|
||||
static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, \
|
||||
@@ -111,8 +123,8 @@ static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, \
|
||||
uint8_t *const half = (uint8_t *) temp; \
|
||||
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
|
||||
8, stride); \
|
||||
ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, src, half, \
|
||||
stride, stride, 8)); \
|
||||
ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, src, half, \
|
||||
stride, stride); \
|
||||
} \
|
||||
\
|
||||
static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, \
|
||||
@@ -131,8 +143,8 @@ static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, \
|
||||
uint8_t *const half = (uint8_t *) temp; \
|
||||
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
|
||||
8, stride); \
|
||||
ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, src + stride, half, \
|
||||
stride, stride, 8)); \
|
||||
ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, src + stride, half, \
|
||||
stride, stride); \
|
||||
} \
|
||||
\
|
||||
static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, \
|
||||
@@ -144,11 +156,11 @@ static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, \
|
||||
uint8_t *const halfHV = (uint8_t *) half; \
|
||||
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
|
||||
stride, 9); \
|
||||
ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
|
||||
stride, 9); \
|
||||
ff_put ## RND ## pixels8x9_l2_ ## MMX(halfH, src, halfH, 8, \
|
||||
stride); \
|
||||
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
|
||||
ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, halfH, halfHV, \
|
||||
stride, 8, 8)); \
|
||||
ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, halfH, halfHV, \
|
||||
stride, 8); \
|
||||
} \
|
||||
\
|
||||
static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, \
|
||||
@@ -160,11 +172,11 @@ static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, \
|
||||
uint8_t *const halfHV = (uint8_t *) half; \
|
||||
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
|
||||
stride, 9); \
|
||||
ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
|
||||
stride, 9); \
|
||||
ff_put ## RND ## pixels8x9_l2_ ## MMX(halfH, src + 1, halfH, 8, \
|
||||
stride); \
|
||||
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
|
||||
ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, halfH, halfHV, \
|
||||
stride, 8, 8)); \
|
||||
ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, halfH, halfHV, \
|
||||
stride, 8); \
|
||||
} \
|
||||
\
|
||||
static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, \
|
||||
@@ -176,11 +188,11 @@ static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, \
|
||||
uint8_t *const halfHV = (uint8_t *) half; \
|
||||
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
|
||||
stride, 9); \
|
||||
ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
|
||||
stride, 9); \
|
||||
ff_put ## RND ## pixels8x9_l2_ ## MMX(halfH, src, halfH, 8, \
|
||||
stride); \
|
||||
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
|
||||
ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, halfH + 8, halfHV, \
|
||||
stride, 8, 8)); \
|
||||
ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, halfH + 8, halfHV, \
|
||||
stride, 8); \
|
||||
} \
|
||||
\
|
||||
static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, \
|
||||
@@ -192,11 +204,11 @@ static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, \
|
||||
uint8_t *const halfHV = (uint8_t *) half; \
|
||||
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
|
||||
stride, 9); \
|
||||
ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
|
||||
stride, 9); \
|
||||
ff_put ## RND ## pixels8x9_l2_ ## MMX(halfH, src + 1, halfH, 8, \
|
||||
stride); \
|
||||
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
|
||||
ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, halfH + 8, halfHV, \
|
||||
stride, 8, 8)); \
|
||||
ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, halfH + 8, halfHV, \
|
||||
stride, 8); \
|
||||
} \
|
||||
\
|
||||
static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, \
|
||||
@@ -209,8 +221,8 @@ static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, \
|
||||
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
|
||||
stride, 9); \
|
||||
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
|
||||
ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, halfH, halfHV, \
|
||||
stride, 8, 8)); \
|
||||
ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, halfH, halfHV, \
|
||||
stride, 8); \
|
||||
} \
|
||||
\
|
||||
static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, \
|
||||
@@ -223,8 +235,8 @@ static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, \
|
||||
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
|
||||
stride, 9); \
|
||||
ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
|
||||
ff_ ## OPNAME ## pixels8_l2_ ## MMX(ARG(dst, halfH + 8, halfHV, \
|
||||
stride, 8, 8)); \
|
||||
ff_ ## OPNAME ## pixels8x8_l2_ ## MMX(dst, halfH + 8, halfHV, \
|
||||
stride, 8); \
|
||||
} \
|
||||
\
|
||||
static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, \
|
||||
@@ -235,8 +247,8 @@ static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, \
|
||||
uint8_t *const halfH = (uint8_t *) half; \
|
||||
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
|
||||
stride, 9); \
|
||||
ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, \
|
||||
8, stride, 9); \
|
||||
ff_put ## RND ## pixels8x9_l2_ ## MMX(halfH, src, halfH, \
|
||||
8, stride); \
|
||||
ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
|
||||
stride, 8); \
|
||||
} \
|
||||
@@ -249,8 +261,8 @@ static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, \
|
||||
uint8_t *const halfH = (uint8_t *) half; \
|
||||
ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
|
||||
stride, 9); \
|
||||
ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
|
||||
stride, 9); \
|
||||
ff_put ## RND ## pixels8x9_l2_ ## MMX(halfH, src + 1, halfH, 8, \
|
||||
stride); \
|
||||
ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
|
||||
stride, 8); \
|
||||
} \
|
||||
@@ -275,8 +287,8 @@ static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, \
|
||||
uint8_t *const half = (uint8_t *) temp; \
|
||||
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
|
||||
stride, 16); \
|
||||
ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, src, half, \
|
||||
stride, stride, 16)); \
|
||||
ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, src, half, \
|
||||
stride, stride); \
|
||||
} \
|
||||
\
|
||||
static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, \
|
||||
@@ -295,8 +307,8 @@ static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, \
|
||||
uint8_t *const half = (uint8_t*) temp; \
|
||||
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
|
||||
stride, 16); \
|
||||
ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, src + 1, half, \
|
||||
stride, stride, 16)); \
|
||||
ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, src + 1, half, \
|
||||
stride, stride); \
|
||||
} \
|
||||
\
|
||||
static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, \
|
||||
@@ -307,8 +319,8 @@ static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, \
|
||||
uint8_t *const half = (uint8_t *) temp; \
|
||||
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
|
||||
stride); \
|
||||
ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, src, half, \
|
||||
stride, stride, 16)); \
|
||||
ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, src, half, \
|
||||
stride, stride); \
|
||||
} \
|
||||
\
|
||||
static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, \
|
||||
@@ -327,8 +339,8 @@ static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, \
|
||||
uint8_t *const half = (uint8_t *) temp; \
|
||||
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
|
||||
stride); \
|
||||
ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, src+stride, half, \
|
||||
stride, stride, 16)); \
|
||||
ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, src+stride, half, \
|
||||
stride, stride); \
|
||||
} \
|
||||
\
|
||||
static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, \
|
||||
@@ -340,12 +352,12 @@ static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, \
|
||||
uint8_t *const halfHV = (uint8_t *) half; \
|
||||
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
|
||||
stride, 17); \
|
||||
ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
|
||||
stride, 17); \
|
||||
ff_put ## RND ## pixels16x17_l2_ ## MMX(halfH, src, halfH, 16, \
|
||||
stride); \
|
||||
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
|
||||
16, 16); \
|
||||
ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, halfH, halfHV, \
|
||||
stride, 16, 16)); \
|
||||
ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, halfH, halfHV, \
|
||||
stride, 16); \
|
||||
} \
|
||||
\
|
||||
static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, \
|
||||
@@ -357,12 +369,12 @@ static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, \
|
||||
uint8_t *const halfHV = (uint8_t *) half; \
|
||||
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
|
||||
stride, 17); \
|
||||
ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
|
||||
stride, 17); \
|
||||
ff_put ## RND ## pixels16x17_l2_ ## MMX(halfH, src + 1, halfH, 16, \
|
||||
stride); \
|
||||
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
|
||||
16, 16); \
|
||||
ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, halfH, halfHV, \
|
||||
stride, 16, 16)); \
|
||||
ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, halfH, halfHV, \
|
||||
stride, 16); \
|
||||
} \
|
||||
\
|
||||
static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, \
|
||||
@@ -374,12 +386,12 @@ static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, \
|
||||
uint8_t *const halfHV = (uint8_t *) half; \
|
||||
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
|
||||
stride, 17); \
|
||||
ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
|
||||
stride, 17); \
|
||||
ff_put ## RND ## pixels16x17_l2_ ## MMX(halfH, src, halfH, 16, \
|
||||
stride); \
|
||||
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
|
||||
16, 16); \
|
||||
ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, halfH + 16, halfHV, \
|
||||
stride, 16, 16)); \
|
||||
ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, halfH + 16, halfHV, \
|
||||
stride, 16); \
|
||||
} \
|
||||
\
|
||||
static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, \
|
||||
@@ -391,12 +403,12 @@ static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, \
|
||||
uint8_t *const halfHV = (uint8_t *) half; \
|
||||
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
|
||||
stride, 17); \
|
||||
ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
|
||||
stride, 17); \
|
||||
ff_put ## RND ## pixels16x17_l2_ ## MMX(halfH, src + 1, halfH, 16, \
|
||||
stride); \
|
||||
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
|
||||
16, 16); \
|
||||
ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, halfH + 16, halfHV, \
|
||||
stride, 16, 16)); \
|
||||
ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, halfH + 16, halfHV, \
|
||||
stride, 16); \
|
||||
} \
|
||||
\
|
||||
static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, \
|
||||
@@ -410,8 +422,8 @@ static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, \
|
||||
stride, 17); \
|
||||
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
|
||||
16, 16); \
|
||||
ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, halfH, halfHV, \
|
||||
stride, 16, 16)); \
|
||||
ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, halfH, halfHV, \
|
||||
stride, 16); \
|
||||
} \
|
||||
\
|
||||
static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, \
|
||||
@@ -425,8 +437,8 @@ static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, \
|
||||
stride, 17); \
|
||||
ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
|
||||
16, 16); \
|
||||
ff_ ## OPNAME ## pixels16_l2_ ## MMX(ARG(dst, halfH + 16, halfHV, \
|
||||
stride, 16, 16)); \
|
||||
ff_ ## OPNAME ## pixels16x16_l2_ ## MMX(dst, halfH + 16, halfHV, \
|
||||
stride, 16); \
|
||||
} \
|
||||
\
|
||||
static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, \
|
||||
@@ -437,8 +449,8 @@ static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, \
|
||||
uint8_t *const halfH = (uint8_t *) half; \
|
||||
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
|
||||
stride, 17); \
|
||||
ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
|
||||
stride, 17); \
|
||||
ff_put ## RND ## pixels16x17_l2_ ## MMX(halfH, src, halfH, 16, \
|
||||
stride); \
|
||||
ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
|
||||
stride, 16); \
|
||||
} \
|
||||
@@ -451,8 +463,8 @@ static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, \
|
||||
uint8_t *const halfH = (uint8_t *) half; \
|
||||
ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
|
||||
stride, 17); \
|
||||
ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
|
||||
stride, 17); \
|
||||
ff_put ## RND ## pixels16x17_l2_ ## MMX(halfH, src + 1, halfH, 16, \
|
||||
stride); \
|
||||
ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
|
||||
stride, 16); \
|
||||
} \
|
||||
@@ -469,13 +481,9 @@ static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, \
|
||||
stride, 16); \
|
||||
}
|
||||
|
||||
#define PASSTHROUGH(...) __VA_ARGS__
|
||||
#define STRIP_HEIGHT(dst, src1, src2, dststride, srcstride, height) \
|
||||
(dst), (src1), (src2), (dststride), (srcstride)
|
||||
|
||||
QPEL_OP(put_, _, mmxext, PASSTHROUGH)
|
||||
QPEL_OP(avg_, _, mmxext, STRIP_HEIGHT)
|
||||
QPEL_OP(put_no_rnd_, _no_rnd_, mmxext, PASSTHROUGH)
|
||||
QPEL_OP(put_, _, mmxext)
|
||||
QPEL_OP(avg_, _, mmxext)
|
||||
QPEL_OP(put_no_rnd_, _no_rnd_, mmxext)
|
||||
|
||||
#endif /* HAVE_X86ASM */
|
||||
|
||||
|
||||
Reference in New Issue
Block a user