1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-24 13:56:33 +02:00

lavc/llauddsp: R-V V scalarproduct_and_madd_int32

scalarproduct_and_madd_int32_c:      10899.7
scalarproduct_and_madd_int32_rvv_i32: 1749.0
This commit is contained in:
Rémi Denis-Courmont 2023-11-12 22:12:53 +02:00
parent 45d0eb3f70
commit d076517056
2 changed files with 30 additions and 0 deletions

View File

@ -27,6 +27,9 @@
int32_t ff_scalarproduct_and_madd_int16_rvv(int16_t *v1, const int16_t *v2,
const int16_t *v3, int len,
int mul);
int32_t ff_scalarproduct_and_madd_int32_rvv(int16_t *v1, const int32_t *v2,
const int16_t *v3, int len,
int mul);
av_cold void ff_llauddsp_init_riscv(LLAudDSPContext *c)
{
@ -35,6 +38,7 @@ av_cold void ff_llauddsp_init_riscv(LLAudDSPContext *c)
if ((flags & AV_CPU_FLAG_RVV_I32) && (flags & AV_CPU_FLAG_RVB_ADDR)) {
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_rvv;
c->scalarproduct_and_madd_int32 = ff_scalarproduct_and_madd_int32_rvv;
}
#endif
}

View File

@ -43,3 +43,29 @@ func ff_scalarproduct_and_madd_int16_rvv, zve32x
vmv.x.s a0, v0
ret
endfunc
func ff_scalarproduct_and_madd_int32_rvv, zve32x
vsetvli t0, zero, e32, m8, ta, ma
vmv.v.x v0, zero
1:
vsetvli t0, a3, e32, m8, tu, ma
vle16.v v8, (a0)
sub a3, a3, t0
vsext.vf2 v24, v8
vle32.v v16, (a1)
sh2add a1, t0, a1
vmacc.vv v0, v16, v24
vsetvli zero, zero, e16, m4, ta, ma
vle16.v v24, (a2)
sh1add a2, t0, a2
vmacc.vx v8, a4, v24
vse16.v v8, (a0)
sh1add a0, t0, a0
bnez a3, 1b
vsetvli t0, zero, e32, m8, ta, ma
vmv.s.x v8, zero
vredsum.vs v0, v0, v8
vmv.x.s a0, v0
ret
endfunc