1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-08 13:22:53 +02:00

lavc/aarch64/simple_idct: fix iOS build without gas-preprocessor

Separates macro arguments with commas and passes .4H/.8H as macro
arguments instead of 4H/8H (the later form being interpreted as an
hexadecimal value).

Fixes ticket #6324.

Suggested-by: Martin Storsjö <martin@martin.st>
This commit is contained in:
Matthieu Bouron 2017-04-28 21:58:55 +02:00
parent 36cf422521
commit 5d0b8b1ae3

View File

@ -61,37 +61,37 @@ endconst
br x10
.endm
.macro smull1 a b c
.macro smull1 a, b, c
smull \a, \b, \c
.endm
.macro smlal1 a b c
.macro smlal1 a, b, c
smlal \a, \b, \c
.endm
.macro smlsl1 a b c
.macro smlsl1 a, b, c
smlsl \a, \b, \c
.endm
.macro idct_col4_top y1 y2 y3 y4 i l
smull\i v7.4S, \y3\().\l, z2
smull\i v16.4S, \y3\().\l, z6
smull\i v17.4S, \y2\().\l, z1
.macro idct_col4_top y1, y2, y3, y4, i, l
smull\i v7.4S, \y3\l, z1
smull\i v16.4S, \y3\l, z6
smull\i v17.4S, \y2\l, z1
add v19.4S, v23.4S, v7.4S
smull\i v18.4S, \y2\().\l, z3
smull\i v18.4S, \y2\l, z3
add v20.4S, v23.4S, v16.4S
smull\i v5.4S, \y2\().\l, z5
smull\i v5.4S, \y2\l, z5
sub v21.4S, v23.4S, v16.4S
smull\i v6.4S, \y2\().\l, z7
smull\i v6.4S, \y2\l, z7
sub v22.4S, v23.4S, v7.4S
smlal\i v17.4S, \y4\().\l, z3
smlsl\i v18.4S, \y4\().\l, z7
smlsl\i v5.4S, \y4\().\l, z1
smlsl\i v6.4S, \y4\().\l, z5
smlal\i v17.4S, \y4\l, z3
smlsl\i v18.4S, \y4\l, z7
smlsl\i v5.4S, \y4\l, z1
smlsl\i v6.4S, \y4\l, z5
.endm
.macro idct_row4_neon y1 y2 y3 y4 pass
.macro idct_row4_neon y1, y2, y3, y4, pass
ld1 {\y1\().2D-\y2\().2D}, [x2], #32
movi v23.4S, #1<<2, lsl #8
orr v5.16B, \y1\().16B, \y2\().16B
@ -101,7 +101,7 @@ endconst
mov x3, v5.D[1]
smlal v23.4S, \y1\().4H, z4
idct_col4_top \y1 \y2 \y3 \y4 1 4H
idct_col4_top \y1, \y2, \y3, \y4, 1, .4H
cmp x3, #0
beq \pass\()f
@ -153,7 +153,7 @@ endconst
trn2 \y4\().4S, v17.4S, v19.4S
.endm
.macro declare_idct_col4_neon i l
.macro declare_idct_col4_neon i, l
function idct_col4_neon\i
dup v23.4H, z4c
.if \i == 1
@ -164,14 +164,14 @@ function idct_col4_neon\i
.endif
smull v23.4S, v23.4H, z4
idct_col4_top v24 v25 v26 v27 \i \l
idct_col4_top v24, v25, v26, v27, \i, \l
mov x4, v28.D[\i - 1]
mov x5, v29.D[\i - 1]
cmp x4, #0
beq 1f
smull\i v7.4S, v28.\l, z4
smull\i v7.4S, v28\l, z4
add v19.4S, v19.4S, v7.4S
sub v20.4S, v20.4S, v7.4S
sub v21.4S, v21.4S, v7.4S
@ -181,17 +181,17 @@ function idct_col4_neon\i
cmp x5, #0
beq 2f
smlal\i v17.4S, v29.\l, z5
smlsl\i v18.4S, v29.\l, z1
smlal\i v5.4S, v29.\l, z7
smlal\i v6.4S, v29.\l, z3
smlal\i v17.4S, v29\l, z5
smlsl\i v18.4S, v29\l, z1
smlal\i v5.4S, v29\l, z7
smlal\i v6.4S, v29\l, z3
2: mov x5, v31.D[\i - 1]
cmp x4, #0
beq 3f
smull\i v7.4S, v30.\l, z6
smull\i v16.4S, v30.\l, z2
smull\i v7.4S, v30\l, z6
smull\i v16.4S, v30\l, z2
add v19.4S, v19.4S, v7.4S
sub v22.4S, v22.4S, v7.4S
sub v20.4S, v20.4S, v16.4S
@ -200,10 +200,10 @@ function idct_col4_neon\i
3: cmp x5, #0
beq 4f
smlal\i v17.4S, v31.\l, z7
smlsl\i v18.4S, v31.\l, z5
smlal\i v5.4S, v31.\l, z3
smlsl\i v6.4S, v31.\l, z1
smlal\i v17.4S, v31\l, z7
smlsl\i v18.4S, v31\l, z5
smlal\i v5.4S, v31\l, z3
smlsl\i v6.4S, v31\l, z1
4: addhn v7.4H, v19.4S, v17.4S
addhn2 v7.8H, v20.4S, v18.4S
@ -219,14 +219,14 @@ function idct_col4_neon\i
endfunc
.endm
declare_idct_col4_neon 1 4H
declare_idct_col4_neon 2 8H
declare_idct_col4_neon 1, .4H
declare_idct_col4_neon 2, .8H
function ff_simple_idct_put_neon, export=1
idct_start x2
idct_row4_neon v24 v25 v26 v27 1
idct_row4_neon v28 v29 v30 v31 2
idct_row4_neon v24, v25, v26, v27, 1
idct_row4_neon v28, v29, v30, v31, 2
bl idct_col4_neon1
sqshrun v1.8B, v7.8H, #COL_SHIFT-16
@ -263,8 +263,8 @@ endfunc
function ff_simple_idct_add_neon, export=1
idct_start x2
idct_row4_neon v24 v25 v26 v27 1
idct_row4_neon v28 v29 v30 v31 2
idct_row4_neon v24, v25, v26, v27, 1
idct_row4_neon v28, v29, v30, v31, 2
bl idct_col4_neon1
sshr v1.8H, V7.8H, #COL_SHIFT-16
@ -328,8 +328,8 @@ function ff_simple_idct_neon, export=1
idct_start x0
mov x2, x0
idct_row4_neon v24 v25 v26 v27 1
idct_row4_neon v28 v29 v30 v31 2
idct_row4_neon v24, v25, v26, v27, 1
idct_row4_neon v28, v29, v30, v31, 2
add x2, x2, #-128
bl idct_col4_neon1