mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-01-13 21:28:01 +02:00
58bcdeb742
aarch64 A55: chrRangeFromJpeg8_1920_c: 28836.2 (1.00x) chrRangeFromJpeg8_1920_neon: 5312.6 (5.43x) 5313.9 (5.43x) chrRangeToJpeg8_1920_c: 44196.2 (1.00x) chrRangeToJpeg8_1920_neon: 6034.6 (7.32x) 5551.3 (7.96x) lumRangeFromJpeg8_1920_c: 15388.5 (1.00x) lumRangeFromJpeg8_1920_neon: 3150.7 (4.88x) 3152.3 (4.88x) lumRangeToJpeg8_1920_c: 23069.7 (1.00x) lumRangeToJpeg8_1920_neon: 3873.2 (5.96x) 3628.7 (6.36x) aarch64 A76: chrRangeFromJpeg8_1920_c: 6334.7 (1.00x) chrRangeFromJpeg8_1920_neon: 2264.5 (2.80x) 2344.5 (2.70x) chrRangeToJpeg8_1920_c: 11474.5 (1.00x) chrRangeToJpeg8_1920_neon: 2646.5 (4.34x) 2824.2 (4.06x) lumRangeFromJpeg8_1920_c: 4453.2 (1.00x) lumRangeFromJpeg8_1920_neon: 1104.8 (4.03x) 1104.5 (4.03x) lumRangeToJpeg8_1920_c: 6645.0 (1.00x) lumRangeToJpeg8_1920_neon: 1310.5 (5.07x) 1329.8 (5.00x)
97 lines
3.4 KiB
ArmAsm
97 lines
3.4 KiB
ArmAsm
/*
|
|
* Copyright (c) 2024 Ramiro Polla
|
|
*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#include "libavutil/aarch64/asm.S"
|
|
|
|
.macro lumConvertRange name, fromto, mult, offset, shift
|
|
function ff_\name, export=1
|
|
mov w3, #\mult
|
|
dup v25.4s, w3
|
|
movz w3, #(\offset & 0xffff)
|
|
movk w3, #((\offset >> 16) & 0xffff), lsl #16
|
|
dup v26.4s, w3
|
|
1:
|
|
ld1 {v0.8h}, [x0]
|
|
mov v16.16b, v26.16b
|
|
mov v18.16b, v26.16b
|
|
sxtl v20.4s, v0.4h
|
|
sxtl2 v22.4s, v0.8h
|
|
mla v16.4s, v20.4s, v25.4s
|
|
mla v18.4s, v22.4s, v25.4s
|
|
.ifc \fromto, To
|
|
sqshrn v0.4h, v16.4s, #\shift
|
|
sqshrn2 v0.8h, v18.4s, #\shift
|
|
.else
|
|
shrn v0.4h, v16.4s, #\shift
|
|
shrn2 v0.8h, v18.4s, #\shift
|
|
.endif
|
|
subs w1, w1, #8
|
|
st1 {v0.8h}, [x0], #16
|
|
b.gt 1b
|
|
ret
|
|
endfunc
|
|
.endm
|
|
|
|
.macro chrConvertRange name, fromto, mult, offset, shift
|
|
function ff_\name, export=1
|
|
mov w3, #\mult
|
|
dup v25.4s, w3
|
|
movz w3, #(\offset & 0xffff)
|
|
movk w3, #((\offset >> 16) & 0xffff), lsl #16
|
|
dup v26.4s, w3
|
|
1:
|
|
ld1 {v0.8h}, [x0]
|
|
ld1 {v1.8h}, [x1]
|
|
mov v16.16b, v26.16b
|
|
mov v17.16b, v26.16b
|
|
mov v18.16b, v26.16b
|
|
mov v19.16b, v26.16b
|
|
sxtl v20.4s, v0.4h
|
|
sxtl v21.4s, v1.4h
|
|
sxtl2 v22.4s, v0.8h
|
|
sxtl2 v23.4s, v1.8h
|
|
mla v16.4s, v20.4s, v25.4s
|
|
mla v17.4s, v21.4s, v25.4s
|
|
mla v18.4s, v22.4s, v25.4s
|
|
mla v19.4s, v23.4s, v25.4s
|
|
.ifc \fromto, To
|
|
sqshrn v0.4h, v16.4s, #\shift
|
|
sqshrn v1.4h, v17.4s, #\shift
|
|
sqshrn2 v0.8h, v18.4s, #\shift
|
|
sqshrn2 v1.8h, v19.4s, #\shift
|
|
.else
|
|
shrn v0.4h, v16.4s, #\shift
|
|
shrn v1.4h, v17.4s, #\shift
|
|
shrn2 v0.8h, v18.4s, #\shift
|
|
shrn2 v1.8h, v19.4s, #\shift
|
|
.endif
|
|
subs w2, w2, #8
|
|
st1 {v0.8h}, [x0], #16
|
|
st1 {v1.8h}, [x1], #16
|
|
b.gt 1b
|
|
ret
|
|
endfunc
|
|
.endm
|
|
|
|
lumConvertRange lumRangeToJpeg_neon, To, 19077, -39057361, 14
|
|
chrConvertRange chrRangeToJpeg_neon, To, 4663, -9289992, 12
|
|
lumConvertRange lumRangeFromJpeg_neon, From, 14071, 33561947, 14
|
|
chrConvertRange chrRangeFromJpeg_neon, From, 1799, 4081085, 11
|