1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-08-15 14:13:16 +02:00

lavc/audiodsp: rework RISC-V V scalar product

Take vector reduction out of the loop and unroll.

Before:
audiodsp.scalarproduct_int16_c: 12321.0
audiodsp.scalarproduct_int16_rvv_i32: 4175.7

After:
audiodsp.scalarproduct_int16_c: 12320.5
audiodsp.scalarproduct_int16_rvv_i32: 1230.2
This commit is contained in:
Rémi Denis-Courmont
2023-07-17 20:46:06 +03:00
parent 29b9d616c2
commit 44cac1def0

View File

@@ -21,21 +21,22 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_scalarproduct_int16_rvv, zve32x func ff_scalarproduct_int16_rvv, zve32x
vsetivli zero, 1, e32, m1, ta, ma vsetvli t0, zero, e32, m8, ta, ma
vmv.s.x v8, zero vmv.v.x v8, zero
vmv.s.x v0, zero
1: 1:
vsetvli t0, a2, e16, m1, ta, ma vsetvli t0, a2, e16, m4, tu, ma
vle16.v v16, (a0) vle16.v v16, (a0)
sub a2, a2, t0 sub a2, a2, t0
vle16.v v24, (a1) vle16.v v24, (a1)
sh1add a0, t0, a0 sh1add a0, t0, a0
vwmul.vv v0, v16, v24 vwmacc.vv v8, v16, v24
sh1add a1, t0, a1 sh1add a1, t0, a1
vsetvli zero, t0, e32, m2, ta, ma
vredsum.vs v8, v0, v8
bnez a2, 1b bnez a2, 1b
vmv.x.s a0, v8 vsetvli t0, zero, e32, m8, ta, ma
vredsum.vs v0, v8, v0
vmv.x.s a0, v0
ret ret
endfunc endfunc