mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
swscale: aarch64: Avoid using the x18 register
The x18 is a reserved platform register on Darwin and Windows. x8/w8 seems to be unused in this function though (and same about x10 and x14), so there's really no reason to use x18 here - just change the uses of x18/w18 into x8/w8 instead without any further rewrites. Signed-off-by: Martin Storsjö <martin@martin.st>
This commit is contained in:
parent
16650beedf
commit
872790b1f9
@ -22,7 +22,7 @@
|
|||||||
|
|
||||||
function ff_hscale_8_to_15_neon, export=1
|
function ff_hscale_8_to_15_neon, export=1
|
||||||
sbfiz x7, x6, #1, #32 // filterSize*2 (*2 because int16)
|
sbfiz x7, x6, #1, #32 // filterSize*2 (*2 because int16)
|
||||||
1: ldr w18, [x5], #4 // filterPos[idx]
|
1: ldr w8, [x5], #4 // filterPos[idx]
|
||||||
ldr w0, [x5], #4 // filterPos[idx + 1]
|
ldr w0, [x5], #4 // filterPos[idx + 1]
|
||||||
ldr w11, [x5], #4 // filterPos[idx + 2]
|
ldr w11, [x5], #4 // filterPos[idx + 2]
|
||||||
ldr w9, [x5], #4 // filterPos[idx + 3]
|
ldr w9, [x5], #4 // filterPos[idx + 3]
|
||||||
@ -34,14 +34,14 @@ function ff_hscale_8_to_15_neon, export=1
|
|||||||
movi v1.2D, #0 // val sum part 2 (for dst[1])
|
movi v1.2D, #0 // val sum part 2 (for dst[1])
|
||||||
movi v2.2D, #0 // val sum part 3 (for dst[2])
|
movi v2.2D, #0 // val sum part 3 (for dst[2])
|
||||||
movi v3.2D, #0 // val sum part 4 (for dst[3])
|
movi v3.2D, #0 // val sum part 4 (for dst[3])
|
||||||
add x17, x3, w18, UXTW // srcp + filterPos[0]
|
add x17, x3, w8, UXTW // srcp + filterPos[0]
|
||||||
add x18, x3, w0, UXTW // srcp + filterPos[1]
|
add x8, x3, w0, UXTW // srcp + filterPos[1]
|
||||||
add x0, x3, w11, UXTW // srcp + filterPos[2]
|
add x0, x3, w11, UXTW // srcp + filterPos[2]
|
||||||
add x11, x3, w9, UXTW // srcp + filterPos[3]
|
add x11, x3, w9, UXTW // srcp + filterPos[3]
|
||||||
mov w15, w6 // filterSize counter
|
mov w15, w6 // filterSize counter
|
||||||
2: ld1 {v4.8B}, [x17], #8 // srcp[filterPos[0] + {0..7}]
|
2: ld1 {v4.8B}, [x17], #8 // srcp[filterPos[0] + {0..7}]
|
||||||
ld1 {v5.8H}, [x16], #16 // load 8x16-bit filter values, part 1
|
ld1 {v5.8H}, [x16], #16 // load 8x16-bit filter values, part 1
|
||||||
ld1 {v6.8B}, [x18], #8 // srcp[filterPos[1] + {0..7}]
|
ld1 {v6.8B}, [x8], #8 // srcp[filterPos[1] + {0..7}]
|
||||||
ld1 {v7.8H}, [x12], #16 // load 8x16-bit at filter+filterSize
|
ld1 {v7.8H}, [x12], #16 // load 8x16-bit at filter+filterSize
|
||||||
uxtl v4.8H, v4.8B // unpack part 1 to 16-bit
|
uxtl v4.8H, v4.8B // unpack part 1 to 16-bit
|
||||||
smlal v0.4S, v4.4H, v5.4H // v0 accumulates srcp[filterPos[0] + {0..3}] * filter[{0..3}]
|
smlal v0.4S, v4.4H, v5.4H // v0 accumulates srcp[filterPos[0] + {0..3}] * filter[{0..3}]
|
||||||
|
Loading…
Reference in New Issue
Block a user