1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-13 21:28:01 +02:00

Merge commit 'f23d26a6864128001b03876b0b92fffe131f2060'

* commit 'f23d26a6864128001b03876b0b92fffe131f2060':
  h264: avoid using uninitialized memory in NEON chroma mc

Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Michael Niedermayer 2014-06-23 20:34:25 +02:00
commit 32cf26cc6a
2 changed files with 111 additions and 8 deletions

View File

@ -95,9 +95,10 @@ function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
b.gt 1b b.gt 1b
ret ret
2: tst w6, w6 2: adds w12, w12, w6
add w12, w12, w6
dup v0.8B, w4 dup v0.8B, w4
b.eq 5f
tst w6, w6
dup v1.8B, w12 dup v1.8B, w12
b.eq 4f b.eq 4f
@ -161,6 +162,33 @@ function ff_\type\()_\codec\()_chroma_mc8_neon, export=1
st1 {v17.8B}, [x0], x2 st1 {v17.8B}, [x0], x2
b.gt 4b b.gt 4b
ret ret
5: ld1 {v4.8B}, [x1], x2
ld1 {v5.8B}, [x1], x2
prfm pldl1strm, [x1]
subs w3, w3, #2
umull v16.8H, v4.8B, v0.8B
umull v17.8H, v5.8B, v0.8B
prfm pldl1strm, [x1, x2]
.ifc \codec,h264
rshrn v16.8B, v16.8H, #6
rshrn v17.8B, v17.8H, #6
.else
add v16.8H, v16.8H, v22.8H
add v17.8H, v17.8H, v22.8H
shrn v16.8B, v16.8H, #6
shrn v17.8B, v17.8H, #6
.endif
.ifc \type,avg
ld1 {v20.8B}, [x8], x2
ld1 {v21.8B}, [x8], x2
urhadd v16.8B, v16.8B, v20.8B
urhadd v17.8B, v17.8B, v21.8B
.endif
st1 {v16.8B}, [x0], x2
st1 {v17.8B}, [x0], x2
b.gt 5b
ret
endfunc endfunc
.endm .endm
@ -238,9 +266,10 @@ function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
b.gt 1b b.gt 1b
ret ret
2: tst w6, w6 2: adds w12, w12, w6
add w12, w12, w6
dup v30.8B, w4 dup v30.8B, w4
b.eq 5f
tst w6, w6
dup v31.8B, w12 dup v31.8B, w12
trn1 v0.2S, v30.2S, v31.2S trn1 v0.2S, v30.2S, v31.2S
trn2 v1.2S, v30.2S, v31.2S trn2 v1.2S, v30.2S, v31.2S
@ -303,6 +332,28 @@ function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
st1 {v16.S}[1], [x0], x2 st1 {v16.S}[1], [x0], x2
b.gt 4b b.gt 4b
ret ret
5: ld1 {v4.S}[0], [x1], x2
ld1 {v4.S}[1], [x1], x2
umull v18.8H, v4.8B, v30.8B
subs w3, w3, #2
prfm pldl1strm, [x1]
.ifc \codec,h264
rshrn v16.8B, v18.8H, #6
.else
add v18.8H, v18.8H, v22.8H
shrn v16.8B, v18.8H, #6
.endif
.ifc \type,avg
ld1 {v20.S}[0], [x8], x2
ld1 {v20.S}[1], [x8], x2
urhadd v16.8B, v16.8B, v20.8B
.endif
prfm pldl1strm, [x1]
st1 {v16.S}[0], [x0], x2
st1 {v16.S}[1], [x0], x2
b.gt 5b
ret
endfunc endfunc
.endm .endm

View File

@ -96,9 +96,10 @@ T cmp r7, #0
pop {r4-r7, pc} pop {r4-r7, pc}
2: tst r6, r6 2: adds r12, r12, r6
add r12, r12, r6
vdup.8 d0, r4 vdup.8 d0, r4
beq 5f
tst r6, r6
vdup.8 d1, r12 vdup.8 d1, r12
beq 4f beq 4f
@ -163,6 +164,33 @@ T cmp r7, #0
vst1.8 {d17}, [r0,:64], r2 vst1.8 {d17}, [r0,:64], r2
bgt 4b bgt 4b
pop {r4-r7, pc}
5: vld1.8 {d4}, [r1], r2
vld1.8 {d5}, [r1], r2
pld [r1]
subs r3, r3, #2
vmull.u8 q8, d4, d0
vmull.u8 q9, d5, d0
pld [r1, r2]
.ifc \codec,h264
vrshrn.u16 d16, q8, #6
vrshrn.u16 d17, q9, #6
.else
vadd.u16 q8, q8, q11
vadd.u16 q9, q9, q11
vshrn.u16 d16, q8, #6
vshrn.u16 d17, q9, #6
.endif
.ifc \type,avg
vld1.8 {d20}, [lr,:64], r2
vld1.8 {d21}, [lr,:64], r2
vrhadd.u8 q8, q8, q10
.endif
vst1.8 {d16}, [r0,:64], r2
vst1.8 {d17}, [r0,:64], r2
bgt 5b
pop {r4-r7, pc} pop {r4-r7, pc}
endfunc endfunc
.endm .endm
@ -245,9 +273,10 @@ T cmp r7, #0
pop {r4-r7, pc} pop {r4-r7, pc}
2: tst r6, r6 2: adds r12, r12, r6
add r12, r12, r6
vdup.8 d0, r4 vdup.8 d0, r4
beq 5f
tst r6, r6
vdup.8 d1, r12 vdup.8 d1, r12
vtrn.32 d0, d1 vtrn.32 d0, d1
@ -310,6 +339,29 @@ T cmp r7, #0
vst1.32 {d16[1]}, [r0,:32], r2 vst1.32 {d16[1]}, [r0,:32], r2
bgt 4b bgt 4b
pop {r4-r7, pc}
5: vld1.32 {d4[0]}, [r1], r2
vld1.32 {d4[1]}, [r1], r2
vmull.u8 q8, d4, d0
subs r3, r3, #2
pld [r1]
.ifc \codec,h264
vrshrn.u16 d16, q8, #6
.else
vadd.u16 q8, q8, q11
vshrn.u16 d16, q8, #6
.endif
.ifc \type,avg
vld1.32 {d20[0]}, [lr,:32], r2
vld1.32 {d20[1]}, [lr,:32], r2
vrhadd.u8 d16, d16, d20
.endif
pld [r1]
vst1.32 {d16[0]}, [r0,:32], r2
vst1.32 {d16[1]}, [r0,:32], r2
bgt 5b
pop {r4-r7, pc} pop {r4-r7, pc}
endfunc endfunc
.endm .endm