1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-07-11 14:30:22 +02:00

lavc/opusdsp: RISC-V V (256-bit) postfilter

This adds a variant of the postfilter for use with 256-bit vectors.
As a single vector is then large enough to perform the scalar product,
the group multipler is reduced to just one at run-time.

The different vector type is passed via register. Unfortunately,
there is no VSETIVL instruction, so the constant vector size (5) also
needs to be passed via a register.
This commit is contained in:
Rémi Denis-Courmont
2022-10-05 19:12:55 +03:00
committed by Lynne
parent f59a767ccd
commit 97d34befea
2 changed files with 16 additions and 4 deletions

View File

@ -26,6 +26,7 @@
#include "libavcodec/opusdsp.h" #include "libavcodec/opusdsp.h"
void ff_opus_postfilter_rvv_128(float *data, int period, float *g, int len); void ff_opus_postfilter_rvv_128(float *data, int period, float *g, int len);
void ff_opus_postfilter_rvv_256(float *data, int period, float *g, int len);
av_cold void ff_opus_dsp_init_riscv(OpusDSP *d) av_cold void ff_opus_dsp_init_riscv(OpusDSP *d)
{ {
@ -37,6 +38,9 @@ av_cold void ff_opus_dsp_init_riscv(OpusDSP *d)
case 16: case 16:
d->postfilter = ff_opus_postfilter_rvv_128; d->postfilter = ff_opus_postfilter_rvv_128;
break; break;
case 32:
d->postfilter = ff_opus_postfilter_rvv_256;
break;
} }
#endif #endif
} }

View File

@ -21,30 +21,38 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_opus_postfilter_rvv_128, zve32f func ff_opus_postfilter_rvv_128, zve32f
lvtypei a5, e32, m2, ta, ma
j 1f
endfunc
func ff_opus_postfilter_rvv_256, zve32f
lvtypei a5, e32, m1, ta, ma
1:
li a4, 5
addi a1, a1, 2 addi a1, a1, 2
slli a1, a1, 2 slli a1, a1, 2
lw t1, 4(a2) lw t1, 4(a2)
vsetivli zero, 3, e32, m1, ta, ma vsetivli zero, 3, e32, m1, ta, ma
vle32.v v24, (a2) vle32.v v24, (a2)
sub a1, a0, a1 // a1 = &x4 = &data[-(period + 2)] sub a1, a0, a1 // a1 = &x4 = &data[-(period + 2)]
vsetivli zero, 5, e32, m2, ta, ma vsetvl zero, a4, a5
vslide1up.vx v8, v24, t1 vslide1up.vx v8, v24, t1
lw t2, 8(a2) lw t2, 8(a2)
vle32.v v16, (a1) vle32.v v16, (a1)
vslide1up.vx v24, v8, t2 // v24 = { g[2], g[1], g[0], g[1], g[2] } vslide1up.vx v24, v8, t2 // v24 = { g[2], g[1], g[0], g[1], g[2] }
2: 2:
vsetvli t0, a3, e32, m2, ta, ma vsetvl t0, a3, a5
vle32.v v0, (a0) vle32.v v0, (a0)
sub a3, a3, t0 sub a3, a3, t0
3: 3:
vsetivli zero, 5, e32, m2, ta, ma vsetvl zero, a4, a5
lw t2, 20(a1) lw t2, 20(a1)
vfmul.vv v8, v24, v16 vfmul.vv v8, v24, v16
addi a0, a0, 4 addi a0, a0, 4
vslide1down.vx v16, v16, t2 vslide1down.vx v16, v16, t2
addi a1, a1, 4 addi a1, a1, 4
vfredusum.vs v0, v8, v0 vfredusum.vs v0, v8, v0
vsetvli zero, t0, e32, m2, ta, ma vsetvl zero, t0, a5
vmv.x.s t1, v0 vmv.x.s t1, v0
addi t0, t0, -1 addi t0, t0, -1
vslide1down.vx v0, v0, zero vslide1down.vx v0, v0, zero