mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
lavc/opusdsp: RISC-V V (256-bit) postfilter
This adds a variant of the postfilter for use with 256-bit vectors. As a single vector is then large enough to perform the scalar product, the group multipler is reduced to just one at run-time. The different vector type is passed via register. Unfortunately, there is no VSETIVL instruction, so the constant vector size (5) also needs to be passed via a register.
This commit is contained in:
parent
f59a767ccd
commit
97d34befea
@ -26,6 +26,7 @@
|
||||
#include "libavcodec/opusdsp.h"
|
||||
|
||||
void ff_opus_postfilter_rvv_128(float *data, int period, float *g, int len);
|
||||
void ff_opus_postfilter_rvv_256(float *data, int period, float *g, int len);
|
||||
|
||||
av_cold void ff_opus_dsp_init_riscv(OpusDSP *d)
|
||||
{
|
||||
@ -37,6 +38,9 @@ av_cold void ff_opus_dsp_init_riscv(OpusDSP *d)
|
||||
case 16:
|
||||
d->postfilter = ff_opus_postfilter_rvv_128;
|
||||
break;
|
||||
case 32:
|
||||
d->postfilter = ff_opus_postfilter_rvv_256;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -21,30 +21,38 @@
|
||||
#include "libavutil/riscv/asm.S"
|
||||
|
||||
func ff_opus_postfilter_rvv_128, zve32f
|
||||
lvtypei a5, e32, m2, ta, ma
|
||||
j 1f
|
||||
endfunc
|
||||
|
||||
func ff_opus_postfilter_rvv_256, zve32f
|
||||
lvtypei a5, e32, m1, ta, ma
|
||||
1:
|
||||
li a4, 5
|
||||
addi a1, a1, 2
|
||||
slli a1, a1, 2
|
||||
lw t1, 4(a2)
|
||||
vsetivli zero, 3, e32, m1, ta, ma
|
||||
vle32.v v24, (a2)
|
||||
sub a1, a0, a1 // a1 = &x4 = &data[-(period + 2)]
|
||||
vsetivli zero, 5, e32, m2, ta, ma
|
||||
vsetvl zero, a4, a5
|
||||
vslide1up.vx v8, v24, t1
|
||||
lw t2, 8(a2)
|
||||
vle32.v v16, (a1)
|
||||
vslide1up.vx v24, v8, t2 // v24 = { g[2], g[1], g[0], g[1], g[2] }
|
||||
2:
|
||||
vsetvli t0, a3, e32, m2, ta, ma
|
||||
vsetvl t0, a3, a5
|
||||
vle32.v v0, (a0)
|
||||
sub a3, a3, t0
|
||||
3:
|
||||
vsetivli zero, 5, e32, m2, ta, ma
|
||||
vsetvl zero, a4, a5
|
||||
lw t2, 20(a1)
|
||||
vfmul.vv v8, v24, v16
|
||||
addi a0, a0, 4
|
||||
vslide1down.vx v16, v16, t2
|
||||
addi a1, a1, 4
|
||||
vfredusum.vs v0, v8, v0
|
||||
vsetvli zero, t0, e32, m2, ta, ma
|
||||
vsetvl zero, t0, a5
|
||||
vmv.x.s t1, v0
|
||||
addi t0, t0, -1
|
||||
vslide1down.vx v0, v0, zero
|
||||
|
Loading…
Reference in New Issue
Block a user