1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-08-15 14:13:16 +02:00

h264/x86: sign extend int stride in deblock functions

Fixes checkasm errors after adding the h264 deblock tests.
This commit is contained in:
Janne Grunau
2019-01-27 11:06:34 +01:00
parent eec93e5709
commit 156ea66c91
2 changed files with 17 additions and 0 deletions

View File

@@ -288,6 +288,7 @@ cextern pb_3
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro DEBLOCK_LUMA 0 %macro DEBLOCK_LUMA 0
cglobal deblock_v_luma_8, 5,5,10 cglobal deblock_v_luma_8, 5,5,10
movsxdifnidn r1, r1d
movd m8, [r4] ; tc0 movd m8, [r4] ; tc0
lea r4, [r1*3] lea r4, [r1*3]
dec r2d ; alpha-1 dec r2d ; alpha-1
@@ -335,6 +336,7 @@ cglobal deblock_v_luma_8, 5,5,10
INIT_MMX cpuname INIT_MMX cpuname
cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64 cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64
movsxd r7, r1d movsxd r7, r1d
movsxdifnidn r1, r1d
lea r8, [r7+r7*2] lea r8, [r7+r7*2]
lea r6, [r0-4] lea r6, [r0-4]
lea r5, [r0-4+r8] lea r5, [r0-4+r8]
@@ -395,6 +397,7 @@ DEBLOCK_LUMA
; int8_t *tc0) ; int8_t *tc0)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
cglobal deblock_%1_luma_8, 5,5,8,2*%2 cglobal deblock_%1_luma_8, 5,5,8,2*%2
movsxdifnidn r1, r1d
lea r4, [r1*3] lea r4, [r1*3]
dec r2 ; alpha-1 dec r2 ; alpha-1
neg r4 neg r4
@@ -445,6 +448,7 @@ cglobal deblock_%1_luma_8, 5,5,8,2*%2
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
INIT_MMX cpuname INIT_MMX cpuname
cglobal deblock_h_luma_8, 0,5,8,0x60+12 cglobal deblock_h_luma_8, 0,5,8,0x60+12
movsxdifnidn r1, r1d
mov r0, r0mp mov r0, r0mp
mov r3, r1m mov r3, r1m
lea r4, [r3*3] lea r4, [r3*3]
@@ -646,6 +650,7 @@ cglobal deblock_%1_luma_intra_8, 4,6,16,0x10
%else %else
cglobal deblock_%1_luma_intra_8, 4,6,16,ARCH_X86_64*0x50-0x50 cglobal deblock_%1_luma_intra_8, 4,6,16,ARCH_X86_64*0x50-0x50
%endif %endif
movsxdifnidn r1, r1d
lea r4, [r1*4] lea r4, [r1*4]
lea r5, [r1*3] ; 3*stride lea r5, [r1*3] ; 3*stride
dec r2d ; alpha-1 dec r2d ; alpha-1
@@ -703,6 +708,7 @@ INIT_MMX cpuname
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
cglobal deblock_h_luma_intra_8, 4,9,0,0x80 cglobal deblock_h_luma_intra_8, 4,9,0,0x80
movsxd r7, r1d movsxd r7, r1d
movsxdifnidn r1, r1d
lea r8, [r7*3] lea r8, [r7*3]
lea r6, [r0-4] lea r6, [r0-4]
lea r5, [r0-4+r8] lea r5, [r0-4+r8]
@@ -782,6 +788,7 @@ DEBLOCK_LUMA_INTRA v8
INIT_MMX mmxext INIT_MMX mmxext
%macro CHROMA_V_START 0 %macro CHROMA_V_START 0
movsxdifnidn r1, r1d
dec r2d ; alpha-1 dec r2d ; alpha-1
dec r3d ; beta-1 dec r3d ; beta-1
mov t5, r0 mov t5, r0
@@ -790,6 +797,7 @@ INIT_MMX mmxext
%endmacro %endmacro
%macro CHROMA_H_START 0 %macro CHROMA_H_START 0
movsxdifnidn r1, r1d
dec r2d dec r2d
dec r3d dec r3d
sub r0, 2 sub r0, 2

View File

@@ -162,6 +162,7 @@ cglobal deblock_v_luma_10, 5,5,8*(mmsize/16)
%define ms2 [rsp+mmsize*2] %define ms2 [rsp+mmsize*2]
%define am [rsp+mmsize*3] %define am [rsp+mmsize*3]
%define bm [rsp+mmsize*4] %define bm [rsp+mmsize*4]
movsxdifnidn r1, r1d
SUB rsp, pad SUB rsp, pad
shl r2d, 2 shl r2d, 2
shl r3d, 2 shl r3d, 2
@@ -219,6 +220,7 @@ cglobal deblock_h_luma_10, 5,6,8*(mmsize/16)
%define p2m [rsp+mmsize*4] %define p2m [rsp+mmsize*4]
%define am [rsp+mmsize*5] %define am [rsp+mmsize*5]
%define bm [rsp+mmsize*6] %define bm [rsp+mmsize*6]
movsxdifnidn r1, r1d
SUB rsp, pad SUB rsp, pad
shl r2d, 2 shl r2d, 2
shl r3d, 2 shl r3d, 2
@@ -349,6 +351,7 @@ cglobal deblock_v_luma_10, 5,5,15
%define mask0 m7 %define mask0 m7
%define mask1 m10 %define mask1 m10
%define mask2 m11 %define mask2 m11
movsxdifnidn r1, r1d
shl r2d, 2 shl r2d, 2
shl r3d, 2 shl r3d, 2
LOAD_AB m12, m13, r2d, r3d LOAD_AB m12, m13, r2d, r3d
@@ -377,6 +380,7 @@ cglobal deblock_v_luma_10, 5,5,15
REP_RET REP_RET
cglobal deblock_h_luma_10, 5,7,15 cglobal deblock_h_luma_10, 5,7,15
movsxdifnidn r1, r1d
shl r2d, 2 shl r2d, 2
shl r3d, 2 shl r3d, 2
LOAD_AB m12, m13, r2d, r3d LOAD_AB m12, m13, r2d, r3d
@@ -492,6 +496,7 @@ DEBLOCK_LUMA_64
CAT_XDEFINE t, i, [rsp+mmsize*(i-4)] CAT_XDEFINE t, i, [rsp+mmsize*(i-4)]
%assign i i+1 %assign i i+1
%endrep %endrep
movsxdifnidn r1, r1d
SUB rsp, pad SUB rsp, pad
%endmacro %endmacro
@@ -615,6 +620,7 @@ cglobal deblock_v_luma_intra_10, 4,7,16
%define q2 m13 %define q2 m13
%define aa m5 %define aa m5
%define bb m14 %define bb m14
movsxdifnidn r1, r1d
lea r4, [r1*4] lea r4, [r1*4]
lea r5, [r1*3] ; 3*stride lea r5, [r1*3] ; 3*stride
neg r4 neg r4
@@ -668,6 +674,7 @@ cglobal deblock_h_luma_intra_10, 4,7,16
%define p3 m4 %define p3 m4
%define spill [rsp] %define spill [rsp]
%assign pad 24-(stack_offset&15) %assign pad 24-(stack_offset&15)
movsxdifnidn r1, r1d
SUB rsp, pad SUB rsp, pad
lea r4, [r1*4] lea r4, [r1*4]
lea r5, [r1*3] ; 3*stride lea r5, [r1*3] ; 3*stride
@@ -852,6 +859,7 @@ DEBLOCK_LUMA_INTRA
; int8_t *tc0) ; int8_t *tc0)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16) cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16)
movsxdifnidn r1, r1d
mov r5, r0 mov r5, r0
sub r0, r1 sub r0, r1
sub r0, r1 sub r0, r1
@@ -887,6 +895,7 @@ cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16)
; int beta) ; int beta)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
cglobal deblock_v_chroma_intra_10, 4,6-(mmsize/16),8*(mmsize/16) cglobal deblock_v_chroma_intra_10, 4,6-(mmsize/16),8*(mmsize/16)
movsxdifnidn r1, r1d
mov r4, r0 mov r4, r0
sub r0, r1 sub r0, r1
sub r0, r1 sub r0, r1