1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-24 13:56:33 +02:00

lavu/floatdsp: RISC-V V scalarproduct_float

This commit is contained in:
Rémi Denis-Courmont 2022-09-26 17:52:38 +03:00 committed by Lynne
parent b493370662
commit cd77662953
2 changed files with 22 additions and 0 deletions

View File

@ -38,6 +38,7 @@ void ff_vector_fmul_add_rvv(float *dst, const float *src0, const float *src1,
void ff_vector_fmul_reverse_rvv(float *dst, const float *src0, void ff_vector_fmul_reverse_rvv(float *dst, const float *src0,
const float *src1, int len); const float *src1, int len);
void ff_butterflies_float_rvv(float *v1, float *v2, int len); void ff_butterflies_float_rvv(float *v1, float *v2, int len);
float ff_scalarproduct_float_rvv(const float *v1, const float *v2, int len);
void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1, void ff_vector_dmul_rvv(double *dst, const double *src0, const double *src1,
int len); int len);
@ -59,6 +60,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
fdsp->vector_fmul_add = ff_vector_fmul_add_rvv; fdsp->vector_fmul_add = ff_vector_fmul_add_rvv;
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_rvv; fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_rvv;
fdsp->butterflies_float = ff_butterflies_float_rvv; fdsp->butterflies_float = ff_butterflies_float_rvv;
fdsp->scalarproduct_float = ff_scalarproduct_float_rvv;
} }
if (flags & AV_CPU_FLAG_RVV_F64) { if (flags & AV_CPU_FLAG_RVV_F64) {

View File

@ -165,6 +165,26 @@ func ff_butterflies_float_rvv, zve32f
ret ret
endfunc endfunc
// a0 = (a0).(a1) [0..a2-1]
func ff_scalarproduct_float_rvv, zve32f
vsetvli zero, zero, e32, m1, ta, ma
vmv.s.x v8, zero
1:
vsetvli t0, a2, e32, m1, ta, ma
vle32.v v16, (a0)
sub a2, a2, t0
vle32.v v24, (a1)
sh2add a0, t0, a0
vfmul.vv v16, v16, v24
sh2add a1, t0, a1
vfredusum.vs v8, v16, v8
bnez a2, 1b
vfmv.f.s fa0, v8
NOHWF fmv.x.w a0, fa0
ret
endfunc
// (a0) = (a1) * (a2) [0..a3-1] // (a0) = (a1) * (a2) [0..a3-1]
func ff_vector_dmul_rvv, zve64d func ff_vector_dmul_rvv, zve64d
1: 1: