You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-10-06 05:47:18 +02:00
avcodec/x86/h264_qpel: Use ptrdiff_t for strides
Avoids having to sign-extend the strides in the assembly (it also is more correct given that the qpel_mc_func already uses ptrdiff_t). Reviewed-by: James Almer <jamrial@gmail.com> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
@@ -19,14 +19,14 @@
|
|||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
#include "libavutil/attributes.h"
|
#include "libavutil/attributes.h"
|
||||||
#include "libavutil/avassert.h"
|
#include "libavutil/avassert.h"
|
||||||
#include "libavutil/cpu.h"
|
#include "libavutil/cpu.h"
|
||||||
#include "libavutil/mem_internal.h"
|
#include "libavutil/mem_internal.h"
|
||||||
#include "libavutil/x86/asm.h"
|
|
||||||
#include "libavutil/x86/cpu.h"
|
#include "libavutil/x86/cpu.h"
|
||||||
#include "libavcodec/h264qpel.h"
|
#include "libavcodec/h264qpel.h"
|
||||||
#include "libavcodec/pixels.h"
|
|
||||||
#include "fpel.h"
|
#include "fpel.h"
|
||||||
|
|
||||||
#if HAVE_X86ASM
|
#if HAVE_X86ASM
|
||||||
@@ -55,29 +55,30 @@ void ff_avg_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t
|
|||||||
ff_avg_pixels16_l2_mmxext((dst), (src1), (src2), (dststride), (src1stride))
|
ff_avg_pixels16_l2_mmxext((dst), (src1), (src2), (dststride), (src1stride))
|
||||||
|
|
||||||
#define DEF_QPEL(OPNAME)\
|
#define DEF_QPEL(OPNAME)\
|
||||||
void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\
|
void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride);\
|
||||||
void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_ssse3(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\
|
void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_ssse3(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride);\
|
||||||
void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_l2_mmxext(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\
|
void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_l2_mmxext(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);\
|
||||||
void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_sse2(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\
|
void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_sse2(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);\
|
||||||
void ff_ ## OPNAME ## _h264_qpel16_h_lowpass_l2_sse2(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\
|
void ff_ ## OPNAME ## _h264_qpel16_h_lowpass_l2_sse2(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);\
|
||||||
void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\
|
void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);\
|
||||||
void ff_ ## OPNAME ## _h264_qpel4_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\
|
void ff_ ## OPNAME ## _h264_qpel4_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride);\
|
||||||
void ff_ ## OPNAME ## _h264_qpel8or16_v_lowpass_sse2(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h);\
|
void ff_ ## OPNAME ## _h264_qpel8or16_v_lowpass_sse2(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h);\
|
||||||
void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_v_mmxext(const uint8_t *src, int16_t *tmp, int srcStride);\
|
void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_v_mmxext(const uint8_t *src, int16_t *tmp, ptrdiff_t srcStride);\
|
||||||
void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_h_mmxext(int16_t *tmp, uint8_t *dst, int dstStride);\
|
void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_h_mmxext(int16_t *tmp, uint8_t *dst, ptrdiff_t dstStride);\
|
||||||
void ff_ ## OPNAME ## _h264_qpel8or16_hv1_lowpass_op_sse2(const uint8_t *src, int16_t *tmp, int srcStride, int size);\
|
void ff_ ## OPNAME ## _h264_qpel8or16_hv1_lowpass_op_sse2(const uint8_t *src, int16_t *tmp, ptrdiff_t srcStride, int size);\
|
||||||
void ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_op_mmxext(uint8_t *dst, int16_t *tmp, int dstStride, int h);\
|
void ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_op_mmxext(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride, int h);\
|
||||||
void ff_ ## OPNAME ## _h264_qpel8_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, int dstStride);\
|
void ff_ ## OPNAME ## _h264_qpel8_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride);\
|
||||||
void ff_ ## OPNAME ## _h264_qpel16_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, int dstStride);\
|
void ff_ ## OPNAME ## _h264_qpel16_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride);\
|
||||||
void ff_ ## OPNAME ## _pixels4_l2_shift5_mmxext(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride);\
|
void ff_ ## OPNAME ## _pixels4_l2_shift5_mmxext(uint8_t *dst, const int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride);\
|
||||||
void ff_ ## OPNAME ## _pixels8_l2_shift5_sse2(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride);\
|
void ff_ ## OPNAME ## _pixels8_l2_shift5_sse2(uint8_t *dst, const int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride);\
|
||||||
void ff_ ## OPNAME ## _pixels16_l2_shift5_sse2(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride);\
|
void ff_ ## OPNAME ## _pixels16_l2_shift5_sse2(uint8_t *dst, const int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride);\
|
||||||
|
|
||||||
DEF_QPEL(avg)
|
DEF_QPEL(avg)
|
||||||
DEF_QPEL(put)
|
DEF_QPEL(put)
|
||||||
|
|
||||||
#define QPEL_H264(OPNAME, OP, MMX)\
|
#define QPEL_H264(OPNAME, OP, MMX)\
|
||||||
static av_always_inline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int srcStride){\
|
static av_always_inline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
|
||||||
|
{\
|
||||||
int w=3;\
|
int w=3;\
|
||||||
src -= 2*srcStride+2;\
|
src -= 2*srcStride+2;\
|
||||||
while(w--){\
|
while(w--){\
|
||||||
@@ -89,7 +90,8 @@ static av_always_inline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *ds
|
|||||||
ff_ ## OPNAME ## h264_qpel4_hv_lowpass_h_mmxext(tmp, dst, dstStride);\
|
ff_ ## OPNAME ## h264_qpel4_hv_lowpass_h_mmxext(tmp, dst, dstStride);\
|
||||||
}\
|
}\
|
||||||
\
|
\
|
||||||
static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int size){\
|
static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride, int size)\
|
||||||
|
{\
|
||||||
int w = size>>4;\
|
int w = size>>4;\
|
||||||
do{\
|
do{\
|
||||||
ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_op_mmxext(dst, tmp, dstStride, size);\
|
ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_op_mmxext(dst, tmp, dstStride, size);\
|
||||||
@@ -99,7 +101,8 @@ static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX
|
|||||||
}\
|
}\
|
||||||
|
|
||||||
#define QPEL_H264_H16(OPNAME, EXT) \
|
#define QPEL_H264_H16(OPNAME, EXT) \
|
||||||
static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## EXT(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride){\
|
static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## EXT(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)\
|
||||||
|
{\
|
||||||
ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## EXT(dst , src , src2 , dstStride, src2Stride);\
|
ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## EXT(dst , src , src2 , dstStride, src2Stride);\
|
||||||
ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## EXT(dst+8, src+8, src2+8, dstStride, src2Stride);\
|
ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## EXT(dst+8, src+8, src2+8, dstStride, src2Stride);\
|
||||||
src += 8*dstStride;\
|
src += 8*dstStride;\
|
||||||
@@ -113,8 +116,8 @@ static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## EXT(u
|
|||||||
#if ARCH_X86_64
|
#if ARCH_X86_64
|
||||||
#define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
|
#define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
|
||||||
|
|
||||||
void ff_avg_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);
|
void ff_avg_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);
|
||||||
void ff_put_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);
|
void ff_put_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);
|
||||||
|
|
||||||
#else // ARCH_X86_64
|
#else // ARCH_X86_64
|
||||||
#define QPEL_H264_H16_XMM(OPNAME, OP, EXT) QPEL_H264_H16(OPNAME, EXT)
|
#define QPEL_H264_H16_XMM(OPNAME, OP, EXT) QPEL_H264_H16(OPNAME, EXT)
|
||||||
@@ -122,7 +125,8 @@ void ff_put_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, con
|
|||||||
|
|
||||||
#define QPEL_H264_H_XMM(OPNAME, OP, MMX)\
|
#define QPEL_H264_H_XMM(OPNAME, OP, MMX)\
|
||||||
QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
|
QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
|
||||||
static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
|
static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
|
||||||
|
{\
|
||||||
ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
|
ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
|
||||||
ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
|
ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
|
||||||
src += 8*srcStride;\
|
src += 8*srcStride;\
|
||||||
@@ -132,17 +136,19 @@ static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint
|
|||||||
}\
|
}\
|
||||||
|
|
||||||
#define QPEL_H264_V_XMM(OPNAME, OP, MMX)\
|
#define QPEL_H264_V_XMM(OPNAME, OP, MMX)\
|
||||||
static av_always_inline void ff_ ## OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
|
static av_always_inline void ff_ ## OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
|
||||||
|
{\
|
||||||
ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\
|
ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\
|
||||||
}\
|
}\
|
||||||
static av_always_inline void ff_ ## OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
|
static av_always_inline void ff_ ## OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
|
||||||
|
{\
|
||||||
ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\
|
ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\
|
||||||
ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
|
ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
|
||||||
}
|
}
|
||||||
|
|
||||||
static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp,
|
static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp,
|
||||||
const uint8_t *src,
|
const uint8_t *src,
|
||||||
int srcStride,
|
ptrdiff_t srcStride,
|
||||||
int size)
|
int size)
|
||||||
{
|
{
|
||||||
int w = (size+8)>>3;
|
int w = (size+8)>>3;
|
||||||
@@ -155,20 +161,23 @@ static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define QPEL_H264_HV_XMM(OPNAME, OP, MMX)\
|
#define QPEL_H264_HV_XMM(OPNAME, OP, MMX)\
|
||||||
static av_always_inline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int srcStride, int size){\
|
static av_always_inline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int size)\
|
||||||
|
{\
|
||||||
put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, srcStride, size);\
|
put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, srcStride, size);\
|
||||||
ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, size);\
|
ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, size);\
|
||||||
}\
|
}\
|
||||||
static av_always_inline void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int srcStride){\
|
static av_always_inline void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
|
||||||
|
{\
|
||||||
OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, srcStride, 8);\
|
OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, srcStride, 8);\
|
||||||
}\
|
}\
|
||||||
static av_always_inline void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int srcStride){\
|
static av_always_inline void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
|
||||||
|
{\
|
||||||
OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, srcStride, 16);\
|
OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, srcStride, 16);\
|
||||||
}\
|
}\
|
||||||
|
|
||||||
#define SSSE3_HV2_LOWPASS_WRAPPER(OPNAME) \
|
#define SSSE3_HV2_LOWPASS_WRAPPER(OPNAME) \
|
||||||
static av_always_inline void \
|
static av_always_inline void \
|
||||||
ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, int dstStride, int size) \
|
ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride, int size) \
|
||||||
{\
|
{\
|
||||||
if (size == 8)\
|
if (size == 8)\
|
||||||
ff_ ## OPNAME ## _h264_qpel8_hv2_lowpass_ssse3(dst, tmp, dstStride);\
|
ff_ ## OPNAME ## _h264_qpel8_hv2_lowpass_ssse3(dst, tmp, dstStride);\
|
||||||
|
@@ -97,8 +97,6 @@ PIXELS4_L2 avg
|
|||||||
|
|
||||||
%macro QPEL4_H_LOWPASS_OP 1
|
%macro QPEL4_H_LOWPASS_OP 1
|
||||||
cglobal %1_h264_qpel4_h_lowpass, 4,5 ; dst, src, dstStride, srcStride
|
cglobal %1_h264_qpel4_h_lowpass, 4,5 ; dst, src, dstStride, srcStride
|
||||||
movsxdifnidn r2, r2d
|
|
||||||
movsxdifnidn r3, r3d
|
|
||||||
pxor m7, m7
|
pxor m7, m7
|
||||||
mova m4, [pw_5]
|
mova m4, [pw_5]
|
||||||
mova m5, [pw_16]
|
mova m5, [pw_16]
|
||||||
@@ -140,8 +138,6 @@ QPEL4_H_LOWPASS_OP avg
|
|||||||
|
|
||||||
%macro QPEL8_H_LOWPASS_OP_XMM 1
|
%macro QPEL8_H_LOWPASS_OP_XMM 1
|
||||||
cglobal %1_h264_qpel8_h_lowpass, 4,5,8 ; dst, src, dstStride, srcStride
|
cglobal %1_h264_qpel8_h_lowpass, 4,5,8 ; dst, src, dstStride, srcStride
|
||||||
movsxdifnidn r2, r2d
|
|
||||||
movsxdifnidn r3, r3d
|
|
||||||
mov r4d, 8
|
mov r4d, 8
|
||||||
pxor m7, m7
|
pxor m7, m7
|
||||||
mova m6, [pw_5]
|
mova m6, [pw_5]
|
||||||
@@ -184,8 +180,6 @@ QPEL8_H_LOWPASS_OP_XMM avg
|
|||||||
|
|
||||||
%macro QPEL4_H_LOWPASS_L2_OP 1
|
%macro QPEL4_H_LOWPASS_L2_OP 1
|
||||||
cglobal %1_h264_qpel4_h_lowpass_l2, 5,6 ; dst, src, src2, dstStride, srcStride
|
cglobal %1_h264_qpel4_h_lowpass_l2, 5,6 ; dst, src, src2, dstStride, srcStride
|
||||||
movsxdifnidn r3, r3d
|
|
||||||
movsxdifnidn r4, r4d
|
|
||||||
pxor m7, m7
|
pxor m7, m7
|
||||||
mova m4, [pw_5]
|
mova m4, [pw_5]
|
||||||
mova m5, [pw_16]
|
mova m5, [pw_16]
|
||||||
@@ -231,8 +225,6 @@ QPEL4_H_LOWPASS_L2_OP avg
|
|||||||
|
|
||||||
%macro QPEL8_H_LOWPASS_L2_OP 1
|
%macro QPEL8_H_LOWPASS_L2_OP 1
|
||||||
cglobal %1_h264_qpel8_h_lowpass_l2, 5,6,6 ; dst, src, src2, dstStride, srcStride
|
cglobal %1_h264_qpel8_h_lowpass_l2, 5,6,6 ; dst, src, src2, dstStride, srcStride
|
||||||
movsxdifnidn r3, r3d
|
|
||||||
movsxdifnidn r4, r4d
|
|
||||||
mova m3, [pw_16]
|
mova m3, [pw_16]
|
||||||
mov r5d, 8
|
mov r5d, 8
|
||||||
pxor m5, m5
|
pxor m5, m5
|
||||||
@@ -285,8 +277,6 @@ cglobal %1_h264_qpel16_h_lowpass_l2, 5,6,9 ; dst, src, src2, dstStride, srcStrid
|
|||||||
cglobal %1_h264_qpel16_h_lowpass_l2, 5,6,8 ; dst, src, src2, dstStride, srcStride
|
cglobal %1_h264_qpel16_h_lowpass_l2, 5,6,8 ; dst, src, src2, dstStride, srcStride
|
||||||
%define PW_16 [pw_16]
|
%define PW_16 [pw_16]
|
||||||
%endif
|
%endif
|
||||||
movsxdifnidn r3, r3d
|
|
||||||
movsxdifnidn r4, r4d
|
|
||||||
mov r5d, 16
|
mov r5d, 16
|
||||||
pxor m7, m7
|
pxor m7, m7
|
||||||
mova m6, [pw_5]
|
mova m6, [pw_5]
|
||||||
@@ -352,8 +342,6 @@ QPEL16_H_LOWPASS_L2 avg
|
|||||||
|
|
||||||
%macro QPEL8_H_LOWPASS_L2_OP_XMM 1
|
%macro QPEL8_H_LOWPASS_L2_OP_XMM 1
|
||||||
cglobal %1_h264_qpel8_h_lowpass_l2, 5,6,8 ; dst, src, src2, dstStride, src2Stride
|
cglobal %1_h264_qpel8_h_lowpass_l2, 5,6,8 ; dst, src, src2, dstStride, src2Stride
|
||||||
movsxdifnidn r3, r3d
|
|
||||||
movsxdifnidn r4, r4d
|
|
||||||
mov r5d, 8
|
mov r5d, 8
|
||||||
pxor m7, m7
|
pxor m7, m7
|
||||||
mova m6, [pw_5]
|
mova m6, [pw_5]
|
||||||
@@ -421,8 +409,6 @@ QPEL8_H_LOWPASS_L2_OP_XMM avg
|
|||||||
|
|
||||||
%macro QPEL4_V_LOWPASS_OP 1
|
%macro QPEL4_V_LOWPASS_OP 1
|
||||||
cglobal %1_h264_qpel4_v_lowpass, 4,4 ; dst, src, dstStride, srcStride
|
cglobal %1_h264_qpel4_v_lowpass, 4,4 ; dst, src, dstStride, srcStride
|
||||||
movsxdifnidn r2, r2d
|
|
||||||
movsxdifnidn r3, r3d
|
|
||||||
sub r1, r3
|
sub r1, r3
|
||||||
sub r1, r3
|
sub r1, r3
|
||||||
pxor m7, m7
|
pxor m7, m7
|
||||||
@@ -454,8 +440,6 @@ QPEL4_V_LOWPASS_OP avg
|
|||||||
|
|
||||||
%macro QPEL8OR16_V_LOWPASS_OP 1
|
%macro QPEL8OR16_V_LOWPASS_OP 1
|
||||||
cglobal %1_h264_qpel8or16_v_lowpass, 5,5,8 ; dst, src, dstStride, srcStride, h
|
cglobal %1_h264_qpel8or16_v_lowpass, 5,5,8 ; dst, src, dstStride, srcStride, h
|
||||||
movsxdifnidn r2, r2d
|
|
||||||
movsxdifnidn r3, r3d
|
|
||||||
sub r1, r3
|
sub r1, r3
|
||||||
sub r1, r3
|
sub r1, r3
|
||||||
pxor m7, m7
|
pxor m7, m7
|
||||||
@@ -520,7 +504,6 @@ QPEL8OR16_V_LOWPASS_OP avg
|
|||||||
|
|
||||||
%macro QPEL4_HV1_LOWPASS_OP 1
|
%macro QPEL4_HV1_LOWPASS_OP 1
|
||||||
cglobal %1_h264_qpel4_hv_lowpass_v, 3,3 ; src, tmp, srcStride
|
cglobal %1_h264_qpel4_hv_lowpass_v, 3,3 ; src, tmp, srcStride
|
||||||
movsxdifnidn r2, r2d
|
|
||||||
pxor m7, m7
|
pxor m7, m7
|
||||||
movh m0, [r0]
|
movh m0, [r0]
|
||||||
movh m1, [r0+r2]
|
movh m1, [r0+r2]
|
||||||
@@ -542,7 +525,6 @@ cglobal %1_h264_qpel4_hv_lowpass_v, 3,3 ; src, tmp, srcStride
|
|||||||
RET
|
RET
|
||||||
|
|
||||||
cglobal %1_h264_qpel4_hv_lowpass_h, 3,4 ; tmp, dst, dstStride
|
cglobal %1_h264_qpel4_hv_lowpass_h, 3,4 ; tmp, dst, dstStride
|
||||||
movsxdifnidn r2, r2d
|
|
||||||
mov r3d, 4
|
mov r3d, 4
|
||||||
.loop:
|
.loop:
|
||||||
mova m0, [r0]
|
mova m0, [r0]
|
||||||
@@ -573,7 +555,6 @@ QPEL4_HV1_LOWPASS_OP avg
|
|||||||
|
|
||||||
INIT_XMM sse2
|
INIT_XMM sse2
|
||||||
cglobal put_h264_qpel8or16_hv1_lowpass_op, 4,4,8 ; src, tmp, srcStride, size
|
cglobal put_h264_qpel8or16_hv1_lowpass_op, 4,4,8 ; src, tmp, srcStride, size
|
||||||
movsxdifnidn r2, r2d
|
|
||||||
pxor m7, m7
|
pxor m7, m7
|
||||||
movh m0, [r0]
|
movh m0, [r0]
|
||||||
movh m1, [r0+r2]
|
movh m1, [r0+r2]
|
||||||
@@ -612,7 +593,6 @@ cglobal put_h264_qpel8or16_hv1_lowpass_op, 4,4,8 ; src, tmp, srcStride, size
|
|||||||
|
|
||||||
%macro QPEL8OR16_HV2_LOWPASS_OP 1
|
%macro QPEL8OR16_HV2_LOWPASS_OP 1
|
||||||
cglobal %1_h264_qpel8or16_hv2_lowpass_op, 4,4 ; dst, tmp, dstStride, h
|
cglobal %1_h264_qpel8or16_hv2_lowpass_op, 4,4 ; dst, tmp, dstStride, h
|
||||||
movsxdifnidn r2, r2d
|
|
||||||
.loop:
|
.loop:
|
||||||
mova m0, [r1]
|
mova m0, [r1]
|
||||||
mova m3, [r1+8]
|
mova m3, [r1+8]
|
||||||
@@ -659,7 +639,6 @@ cglobal %1_h264_qpel8_hv2_lowpass, 3,4,7 ; dst, tmp, dstStride
|
|||||||
%else
|
%else
|
||||||
cglobal %1_h264_qpel8_hv2_lowpass, 3,4,6 ; dst, tmp, dstStride
|
cglobal %1_h264_qpel8_hv2_lowpass, 3,4,6 ; dst, tmp, dstStride
|
||||||
%endif
|
%endif
|
||||||
movsxdifnidn r2, r2d
|
|
||||||
mov r3d, 8
|
mov r3d, 8
|
||||||
.loop:
|
.loop:
|
||||||
mova m1, [r1+16]
|
mova m1, [r1+16]
|
||||||
@@ -692,7 +671,6 @@ cglobal %1_h264_qpel8_hv2_lowpass, 3,4,6 ; dst, tmp, dstStride
|
|||||||
RET
|
RET
|
||||||
|
|
||||||
cglobal %1_h264_qpel16_hv2_lowpass, 3,4,8 ; dst, tmp, dstStride
|
cglobal %1_h264_qpel16_hv2_lowpass, 3,4,8 ; dst, tmp, dstStride
|
||||||
movsxdifnidn r2, r2d
|
|
||||||
mov r3d, 16
|
mov r3d, 16
|
||||||
.loop:
|
.loop:
|
||||||
mova m4, [r1+32]
|
mova m4, [r1+32]
|
||||||
@@ -752,7 +730,6 @@ QPEL8OR16_HV2_LOWPASS_OP_XMM avg
|
|||||||
|
|
||||||
%macro PIXELS4_L2_SHIFT5 1
|
%macro PIXELS4_L2_SHIFT5 1
|
||||||
cglobal %1_pixels4_l2_shift5,4,4 ; dst, src16, src8, dstStride
|
cglobal %1_pixels4_l2_shift5,4,4 ; dst, src16, src8, dstStride
|
||||||
movsxdifnidn r3, r3d
|
|
||||||
mova m0, [r1]
|
mova m0, [r1]
|
||||||
mova m1, [r1+24]
|
mova m1, [r1+24]
|
||||||
psraw m0, 5
|
psraw m0, 5
|
||||||
@@ -783,7 +760,6 @@ PIXELS4_L2_SHIFT5 avg
|
|||||||
|
|
||||||
%macro PIXELS8_L2_SHIFT5 1
|
%macro PIXELS8_L2_SHIFT5 1
|
||||||
cglobal %1_pixels8_l2_shift5, 5, 5, 3 ; dst, src16, src8, dstStride
|
cglobal %1_pixels8_l2_shift5, 5, 5, 3 ; dst, src16, src8, dstStride
|
||||||
movsxdifnidn r3, r3d
|
|
||||||
mov r4d, 8
|
mov r4d, 8
|
||||||
.loop:
|
.loop:
|
||||||
movu m0, [r1]
|
movu m0, [r1]
|
||||||
@@ -805,7 +781,6 @@ cglobal %1_pixels8_l2_shift5, 5, 5, 3 ; dst, src16, src8, dstStride
|
|||||||
|
|
||||||
%macro PIXELS16_L2_SHIFT5 2
|
%macro PIXELS16_L2_SHIFT5 2
|
||||||
cglobal %1_pixels%2_l2_shift5, 5, 5, 4 ; dst, src16, src8, dstStride
|
cglobal %1_pixels%2_l2_shift5, 5, 5, 4 ; dst, src16, src8, dstStride
|
||||||
movsxdifnidn r3, r3d
|
|
||||||
mov r4d, %2
|
mov r4d, %2
|
||||||
.loop:
|
.loop:
|
||||||
movu m0, [r1]
|
movu m0, [r1]
|
||||||
@@ -840,8 +815,6 @@ PIXELS16_L2_SHIFT5 avg, 16
|
|||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
%macro QPEL16_H_LOWPASS_L2_OP 1
|
%macro QPEL16_H_LOWPASS_L2_OP 1
|
||||||
cglobal %1_h264_qpel16_h_lowpass_l2, 5, 6, 16 ; dst, src, src2, dstStride, src2Stride
|
cglobal %1_h264_qpel16_h_lowpass_l2, 5, 6, 16 ; dst, src, src2, dstStride, src2Stride
|
||||||
movsxdifnidn r3, r3d
|
|
||||||
movsxdifnidn r4, r4d
|
|
||||||
mov r5d, 16
|
mov r5d, 16
|
||||||
pxor m15, m15
|
pxor m15, m15
|
||||||
mova m14, [pw_5]
|
mova m14, [pw_5]
|
||||||
|
Reference in New Issue
Block a user