1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-24 13:56:33 +02:00

lavu/floatdsp: RISC-V V vector_fmul_window

This commit is contained in:
Rémi Denis-Courmont 2022-09-26 17:52:37 +03:00 committed by Lynne
parent 9aeb6aca3a
commit b493370662
2 changed files with 36 additions and 0 deletions

View File

@ -31,6 +31,8 @@ void ff_vector_fmac_scalar_rvv(float *dst, const float *src, float mul,
int len); int len);
void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul, void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
int len); int len);
void ff_vector_fmul_window_rvv(float *dst, const float *src0,
const float *src1, const float *win, int len);
void ff_vector_fmul_add_rvv(float *dst, const float *src0, const float *src1, void ff_vector_fmul_add_rvv(float *dst, const float *src0, const float *src1,
const float *src2, int len); const float *src2, int len);
void ff_vector_fmul_reverse_rvv(float *dst, const float *src0, void ff_vector_fmul_reverse_rvv(float *dst, const float *src0,
@ -53,6 +55,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
fdsp->vector_fmul = ff_vector_fmul_rvv; fdsp->vector_fmul = ff_vector_fmul_rvv;
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_rvv; fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_rvv;
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv; fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
fdsp->vector_fmul_window = ff_vector_fmul_window_rvv;
fdsp->vector_fmul_add = ff_vector_fmul_add_rvv; fdsp->vector_fmul_add = ff_vector_fmul_add_rvv;
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_rvv; fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_rvv;
fdsp->butterflies_float = ff_butterflies_float_rvv; fdsp->butterflies_float = ff_butterflies_float_rvv;

View File

@ -74,6 +74,39 @@ NOHWF mv a2, a3
ret ret
endfunc endfunc
func ff_vector_fmul_window_rvv, zve32f
// a0: dst, a1: src0, a2: src1, a3: window, a4: length
addi t0, a4, -1
add t1, t0, a4
sh2add a2, t0, a2
sh2add t0, t1, a0
sh2add t3, t1, a3
li t1, -4 // byte stride
1:
vsetvli t2, a4, e32, m1, ta, ma
vle32.v v16, (a1)
slli t4, t2, 2
vlse32.v v20, (a2), t1
sub a4, a4, t2
vle32.v v24, (a3)
add a1, a1, t4
vlse32.v v28, (t3), t1
sub a2, a2, t4
vfmul.vv v0, v16, v28
add a3, a3, t4
vfmul.vv v8, v16, v24
sub t3, t3, t4
vfnmsac.vv v0, v20, v24
vfmacc.vv v8, v20, v28
vse32.v v0, (a0)
add a0, a0, t4
vsse32.v v8, (t0), t1
sub t0, t0, t4
bnez a4, 1b
ret
endfunc
// (a0) = (a1) * (a2) + (a3) [0..a4-1] // (a0) = (a1) * (a2) + (a3) [0..a4-1]
func ff_vector_fmul_add_rvv, zve32f func ff_vector_fmul_add_rvv, zve32f
1: 1: