You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	lavc/opusdsp: RISC-V V (256-bit) postfilter
This adds a variant of the postfilter for use with 256-bit vectors. As a single vector is then large enough to perform the scalar product, the group multipler is reduced to just one at run-time. The different vector type is passed via register. Unfortunately, there is no VSETIVL instruction, so the constant vector size (5) also needs to be passed via a register.
This commit is contained in:
		
				
					committed by
					
						 Lynne
						Lynne
					
				
			
			
				
	
			
			
			
						parent
						
							f59a767ccd
						
					
				
				
					commit
					97d34befea
				
			| @@ -26,6 +26,7 @@ | ||||
| #include "libavcodec/opusdsp.h" | ||||
|  | ||||
| void ff_opus_postfilter_rvv_128(float *data, int period, float *g, int len); | ||||
| void ff_opus_postfilter_rvv_256(float *data, int period, float *g, int len); | ||||
|  | ||||
| av_cold void ff_opus_dsp_init_riscv(OpusDSP *d) | ||||
| { | ||||
| @@ -37,6 +38,9 @@ av_cold void ff_opus_dsp_init_riscv(OpusDSP *d) | ||||
|         case 16: | ||||
|             d->postfilter = ff_opus_postfilter_rvv_128; | ||||
|             break; | ||||
|         case 32: | ||||
|             d->postfilter = ff_opus_postfilter_rvv_256; | ||||
|             break; | ||||
|         } | ||||
| #endif | ||||
| } | ||||
|   | ||||
| @@ -21,30 +21,38 @@ | ||||
| #include "libavutil/riscv/asm.S" | ||||
|  | ||||
| func ff_opus_postfilter_rvv_128, zve32f | ||||
|         lvtypei a5, e32, m2, ta, ma | ||||
|         j       1f | ||||
| endfunc | ||||
|  | ||||
| func ff_opus_postfilter_rvv_256, zve32f | ||||
|         lvtypei a5, e32, m1, ta, ma | ||||
| 1: | ||||
|         li           a4, 5 | ||||
|         addi         a1, a1, 2 | ||||
|         slli         a1, a1, 2 | ||||
|         lw           t1, 4(a2) | ||||
|         vsetivli     zero, 3, e32, m1, ta, ma | ||||
|         vle32.v      v24, (a2) | ||||
|         sub          a1, a0, a1      // a1 = &x4 = &data[-(period + 2)] | ||||
|         vsetivli     zero, 5, e32, m2, ta, ma | ||||
|         vsetvl       zero, a4, a5 | ||||
|         vslide1up.vx v8, v24, t1 | ||||
|         lw           t2, 8(a2) | ||||
|         vle32.v      v16, (a1) | ||||
|         vslide1up.vx v24, v8, t2     // v24 = { g[2], g[1], g[0], g[1], g[2] } | ||||
| 2: | ||||
|         vsetvli t0, a3, e32, m2, ta, ma | ||||
|         vsetvl  t0, a3, a5 | ||||
|         vle32.v v0, (a0) | ||||
|         sub     a3, a3, t0 | ||||
| 3: | ||||
|         vsetivli       zero, 5, e32, m2, ta, ma | ||||
|         vsetvl         zero, a4, a5 | ||||
|         lw             t2, 20(a1) | ||||
|         vfmul.vv       v8, v24, v16 | ||||
|         addi           a0, a0, 4 | ||||
|         vslide1down.vx v16, v16, t2 | ||||
|         addi           a1, a1, 4 | ||||
|         vfredusum.vs   v0, v8, v0 | ||||
|         vsetvli        zero, t0, e32, m2, ta, ma | ||||
|         vsetvl         zero, t0, a5 | ||||
|         vmv.x.s        t1, v0 | ||||
|         addi           t0, t0, -1 | ||||
|         vslide1down.vx v0, v0, zero | ||||
|   | ||||
		Reference in New Issue
	
	Block a user