1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-12-23 12:43:46 +02:00

lavu/fixed_dsp: R-V V vector_fmul_window

This commit is contained in:
Rémi Denis-Courmont 2023-10-04 19:13:13 +03:00
parent 10eb3b9c9f
commit f39a8790e1
2 changed files with 50 additions and 0 deletions

View File

@ -25,6 +25,9 @@
#include "libavutil/cpu.h"
#include "libavutil/fixed_dsp.h"
void ff_vector_fmul_window_fixed_rvv(int32_t *dst, const int32_t *src0,
const int32_t *src1, const int32_t *win,
int len);
void ff_vector_fmul_fixed_rvv(int *dst, const int *src0, const int *src1,
int len);
void ff_vector_fmul_reverse_fixed_rvv(int *dst, const int *src0,
@ -40,6 +43,9 @@ av_cold void ff_fixed_dsp_init_riscv(AVFixedDSPContext *fdsp)
int flags = av_get_cpu_flags();
if ((flags & AV_CPU_FLAG_RVV_I32) && (flags & AV_CPU_FLAG_RVB_ADDR)) {
if (flags & AV_CPU_FLAG_RVV_I64)
fdsp->vector_fmul_window = ff_vector_fmul_window_fixed_rvv;
fdsp->vector_fmul = ff_vector_fmul_fixed_rvv;
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_fixed_rvv;
fdsp->vector_fmul_add = ff_vector_fmul_add_fixed_rvv;

View File

@ -20,6 +20,50 @@
#include "asm.S"
func ff_vector_fmul_window_fixed_rvv, zve64x
csrwi vxrm, 0
vsetvli t0, zero, e16, m1, ta, ma
sh2add a2, a4, a2
vid.v v0
sh3add t3, a4, a3
vadd.vi v0, v0, 1
sh3add t0, a4, a0
1:
vsetvli t2, a4, e16, m1, ta, ma
slli t4, t2, 2
vrsub.vx v2, v0, t2
sub t3, t3, t4
vsetvli zero, zero, e32, m2, ta, ma
sub a2, a2, t4
vle32.v v8, (t3)
sub t0, t0, t4
vle32.v v4, (a2)
sub a4, a4, t2
vrgatherei16.vv v28, v8, v2
vle32.v v16, (a1)
add a1, a1, t4
vrgatherei16.vv v20, v4, v2
vle32.v v24, (a3)
add a3, a3, t4
vwmul.vv v12, v16, v28
vwmul.vv v8, v16, v24
// vwnmsac.vv does _not_ exist so multiply & subtract separately
vwmul.vv v4, v20, v24
vwmacc.vv v8, v20, v28
vsetvli zero, zero, e64, m4, ta, ma
vsub.vv v12, v12, v4
vsetvli zero, zero, e32, m2, ta, ma
vnclip.wi v16, v8, 31
vnclip.wi v20, v12, 31
vrgatherei16.vv v8, v16, v2
vse32.v v20, (a0)
add a0, a0, t4
vse32.v v8, (t0)
bnez a4, 1b
ret
endfunc
func ff_vector_fmul_fixed_rvv, zve32x
csrwi vxrm, 0
1: