You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-15 14:13:16 +02:00
h264/x86: sign extend int stride in deblock functions
Fixes checkasm errors after adding the h264 deblock tests.
This commit is contained in:
@@ -288,6 +288,7 @@ cextern pb_3
|
|||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
%macro DEBLOCK_LUMA 0
|
%macro DEBLOCK_LUMA 0
|
||||||
cglobal deblock_v_luma_8, 5,5,10
|
cglobal deblock_v_luma_8, 5,5,10
|
||||||
|
movsxdifnidn r1, r1d
|
||||||
movd m8, [r4] ; tc0
|
movd m8, [r4] ; tc0
|
||||||
lea r4, [r1*3]
|
lea r4, [r1*3]
|
||||||
dec r2d ; alpha-1
|
dec r2d ; alpha-1
|
||||||
@@ -335,6 +336,7 @@ cglobal deblock_v_luma_8, 5,5,10
|
|||||||
INIT_MMX cpuname
|
INIT_MMX cpuname
|
||||||
cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64
|
cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64
|
||||||
movsxd r7, r1d
|
movsxd r7, r1d
|
||||||
|
movsxdifnidn r1, r1d
|
||||||
lea r8, [r7+r7*2]
|
lea r8, [r7+r7*2]
|
||||||
lea r6, [r0-4]
|
lea r6, [r0-4]
|
||||||
lea r5, [r0-4+r8]
|
lea r5, [r0-4+r8]
|
||||||
@@ -395,6 +397,7 @@ DEBLOCK_LUMA
|
|||||||
; int8_t *tc0)
|
; int8_t *tc0)
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
cglobal deblock_%1_luma_8, 5,5,8,2*%2
|
cglobal deblock_%1_luma_8, 5,5,8,2*%2
|
||||||
|
movsxdifnidn r1, r1d
|
||||||
lea r4, [r1*3]
|
lea r4, [r1*3]
|
||||||
dec r2 ; alpha-1
|
dec r2 ; alpha-1
|
||||||
neg r4
|
neg r4
|
||||||
@@ -445,6 +448,7 @@ cglobal deblock_%1_luma_8, 5,5,8,2*%2
|
|||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
INIT_MMX cpuname
|
INIT_MMX cpuname
|
||||||
cglobal deblock_h_luma_8, 0,5,8,0x60+12
|
cglobal deblock_h_luma_8, 0,5,8,0x60+12
|
||||||
|
movsxdifnidn r1, r1d
|
||||||
mov r0, r0mp
|
mov r0, r0mp
|
||||||
mov r3, r1m
|
mov r3, r1m
|
||||||
lea r4, [r3*3]
|
lea r4, [r3*3]
|
||||||
@@ -646,6 +650,7 @@ cglobal deblock_%1_luma_intra_8, 4,6,16,0x10
|
|||||||
%else
|
%else
|
||||||
cglobal deblock_%1_luma_intra_8, 4,6,16,ARCH_X86_64*0x50-0x50
|
cglobal deblock_%1_luma_intra_8, 4,6,16,ARCH_X86_64*0x50-0x50
|
||||||
%endif
|
%endif
|
||||||
|
movsxdifnidn r1, r1d
|
||||||
lea r4, [r1*4]
|
lea r4, [r1*4]
|
||||||
lea r5, [r1*3] ; 3*stride
|
lea r5, [r1*3] ; 3*stride
|
||||||
dec r2d ; alpha-1
|
dec r2d ; alpha-1
|
||||||
@@ -703,6 +708,7 @@ INIT_MMX cpuname
|
|||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
cglobal deblock_h_luma_intra_8, 4,9,0,0x80
|
cglobal deblock_h_luma_intra_8, 4,9,0,0x80
|
||||||
movsxd r7, r1d
|
movsxd r7, r1d
|
||||||
|
movsxdifnidn r1, r1d
|
||||||
lea r8, [r7*3]
|
lea r8, [r7*3]
|
||||||
lea r6, [r0-4]
|
lea r6, [r0-4]
|
||||||
lea r5, [r0-4+r8]
|
lea r5, [r0-4+r8]
|
||||||
@@ -782,6 +788,7 @@ DEBLOCK_LUMA_INTRA v8
|
|||||||
INIT_MMX mmxext
|
INIT_MMX mmxext
|
||||||
|
|
||||||
%macro CHROMA_V_START 0
|
%macro CHROMA_V_START 0
|
||||||
|
movsxdifnidn r1, r1d
|
||||||
dec r2d ; alpha-1
|
dec r2d ; alpha-1
|
||||||
dec r3d ; beta-1
|
dec r3d ; beta-1
|
||||||
mov t5, r0
|
mov t5, r0
|
||||||
@@ -790,6 +797,7 @@ INIT_MMX mmxext
|
|||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro CHROMA_H_START 0
|
%macro CHROMA_H_START 0
|
||||||
|
movsxdifnidn r1, r1d
|
||||||
dec r2d
|
dec r2d
|
||||||
dec r3d
|
dec r3d
|
||||||
sub r0, 2
|
sub r0, 2
|
||||||
|
@@ -162,6 +162,7 @@ cglobal deblock_v_luma_10, 5,5,8*(mmsize/16)
|
|||||||
%define ms2 [rsp+mmsize*2]
|
%define ms2 [rsp+mmsize*2]
|
||||||
%define am [rsp+mmsize*3]
|
%define am [rsp+mmsize*3]
|
||||||
%define bm [rsp+mmsize*4]
|
%define bm [rsp+mmsize*4]
|
||||||
|
movsxdifnidn r1, r1d
|
||||||
SUB rsp, pad
|
SUB rsp, pad
|
||||||
shl r2d, 2
|
shl r2d, 2
|
||||||
shl r3d, 2
|
shl r3d, 2
|
||||||
@@ -219,6 +220,7 @@ cglobal deblock_h_luma_10, 5,6,8*(mmsize/16)
|
|||||||
%define p2m [rsp+mmsize*4]
|
%define p2m [rsp+mmsize*4]
|
||||||
%define am [rsp+mmsize*5]
|
%define am [rsp+mmsize*5]
|
||||||
%define bm [rsp+mmsize*6]
|
%define bm [rsp+mmsize*6]
|
||||||
|
movsxdifnidn r1, r1d
|
||||||
SUB rsp, pad
|
SUB rsp, pad
|
||||||
shl r2d, 2
|
shl r2d, 2
|
||||||
shl r3d, 2
|
shl r3d, 2
|
||||||
@@ -349,6 +351,7 @@ cglobal deblock_v_luma_10, 5,5,15
|
|||||||
%define mask0 m7
|
%define mask0 m7
|
||||||
%define mask1 m10
|
%define mask1 m10
|
||||||
%define mask2 m11
|
%define mask2 m11
|
||||||
|
movsxdifnidn r1, r1d
|
||||||
shl r2d, 2
|
shl r2d, 2
|
||||||
shl r3d, 2
|
shl r3d, 2
|
||||||
LOAD_AB m12, m13, r2d, r3d
|
LOAD_AB m12, m13, r2d, r3d
|
||||||
@@ -377,6 +380,7 @@ cglobal deblock_v_luma_10, 5,5,15
|
|||||||
REP_RET
|
REP_RET
|
||||||
|
|
||||||
cglobal deblock_h_luma_10, 5,7,15
|
cglobal deblock_h_luma_10, 5,7,15
|
||||||
|
movsxdifnidn r1, r1d
|
||||||
shl r2d, 2
|
shl r2d, 2
|
||||||
shl r3d, 2
|
shl r3d, 2
|
||||||
LOAD_AB m12, m13, r2d, r3d
|
LOAD_AB m12, m13, r2d, r3d
|
||||||
@@ -492,6 +496,7 @@ DEBLOCK_LUMA_64
|
|||||||
CAT_XDEFINE t, i, [rsp+mmsize*(i-4)]
|
CAT_XDEFINE t, i, [rsp+mmsize*(i-4)]
|
||||||
%assign i i+1
|
%assign i i+1
|
||||||
%endrep
|
%endrep
|
||||||
|
movsxdifnidn r1, r1d
|
||||||
SUB rsp, pad
|
SUB rsp, pad
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
@@ -615,6 +620,7 @@ cglobal deblock_v_luma_intra_10, 4,7,16
|
|||||||
%define q2 m13
|
%define q2 m13
|
||||||
%define aa m5
|
%define aa m5
|
||||||
%define bb m14
|
%define bb m14
|
||||||
|
movsxdifnidn r1, r1d
|
||||||
lea r4, [r1*4]
|
lea r4, [r1*4]
|
||||||
lea r5, [r1*3] ; 3*stride
|
lea r5, [r1*3] ; 3*stride
|
||||||
neg r4
|
neg r4
|
||||||
@@ -668,6 +674,7 @@ cglobal deblock_h_luma_intra_10, 4,7,16
|
|||||||
%define p3 m4
|
%define p3 m4
|
||||||
%define spill [rsp]
|
%define spill [rsp]
|
||||||
%assign pad 24-(stack_offset&15)
|
%assign pad 24-(stack_offset&15)
|
||||||
|
movsxdifnidn r1, r1d
|
||||||
SUB rsp, pad
|
SUB rsp, pad
|
||||||
lea r4, [r1*4]
|
lea r4, [r1*4]
|
||||||
lea r5, [r1*3] ; 3*stride
|
lea r5, [r1*3] ; 3*stride
|
||||||
@@ -852,6 +859,7 @@ DEBLOCK_LUMA_INTRA
|
|||||||
; int8_t *tc0)
|
; int8_t *tc0)
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16)
|
cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16)
|
||||||
|
movsxdifnidn r1, r1d
|
||||||
mov r5, r0
|
mov r5, r0
|
||||||
sub r0, r1
|
sub r0, r1
|
||||||
sub r0, r1
|
sub r0, r1
|
||||||
@@ -887,6 +895,7 @@ cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16)
|
|||||||
; int beta)
|
; int beta)
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
cglobal deblock_v_chroma_intra_10, 4,6-(mmsize/16),8*(mmsize/16)
|
cglobal deblock_v_chroma_intra_10, 4,6-(mmsize/16),8*(mmsize/16)
|
||||||
|
movsxdifnidn r1, r1d
|
||||||
mov r4, r0
|
mov r4, r0
|
||||||
sub r0, r1
|
sub r0, r1
|
||||||
sub r0, r1
|
sub r0, r1
|
||||||
|
Reference in New Issue
Block a user