You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	lavu/fixed_dsp: R-V V vector_fmul_window
This commit is contained in:
		| @@ -25,6 +25,9 @@ | ||||
| #include "libavutil/cpu.h" | ||||
| #include "libavutil/fixed_dsp.h" | ||||
|  | ||||
| void ff_vector_fmul_window_fixed_rvv(int32_t *dst, const int32_t *src0, | ||||
|                                      const int32_t *src1, const int32_t *win, | ||||
|                                      int len); | ||||
| void ff_vector_fmul_fixed_rvv(int *dst, const int *src0, const int *src1, | ||||
|                               int len); | ||||
| void ff_vector_fmul_reverse_fixed_rvv(int *dst, const int *src0, | ||||
| @@ -40,6 +43,9 @@ av_cold void ff_fixed_dsp_init_riscv(AVFixedDSPContext *fdsp) | ||||
|     int flags = av_get_cpu_flags(); | ||||
|  | ||||
|     if ((flags & AV_CPU_FLAG_RVV_I32) && (flags & AV_CPU_FLAG_RVB_ADDR)) { | ||||
|         if (flags & AV_CPU_FLAG_RVV_I64) | ||||
|             fdsp->vector_fmul_window = ff_vector_fmul_window_fixed_rvv; | ||||
|  | ||||
|         fdsp->vector_fmul = ff_vector_fmul_fixed_rvv; | ||||
|         fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_fixed_rvv; | ||||
|         fdsp->vector_fmul_add = ff_vector_fmul_add_fixed_rvv; | ||||
|   | ||||
| @@ -20,6 +20,50 @@ | ||||
|  | ||||
| #include "asm.S" | ||||
|  | ||||
| func ff_vector_fmul_window_fixed_rvv, zve64x | ||||
|         csrwi   vxrm, 0 | ||||
|         vsetvli t0, zero, e16, m1, ta, ma | ||||
|         sh2add  a2, a4, a2 | ||||
|         vid.v   v0 | ||||
|         sh3add  t3, a4, a3 | ||||
|         vadd.vi v0, v0, 1 | ||||
|         sh3add  t0, a4, a0 | ||||
| 1: | ||||
|         vsetvli t2, a4, e16, m1, ta, ma | ||||
|         slli    t4, t2, 2 | ||||
|         vrsub.vx v2, v0, t2 | ||||
|         sub     t3, t3, t4 | ||||
|         vsetvli zero, zero, e32, m2, ta, ma | ||||
|         sub     a2, a2, t4 | ||||
|         vle32.v v8, (t3) | ||||
|         sub     t0, t0, t4 | ||||
|         vle32.v v4, (a2) | ||||
|         sub     a4, a4, t2 | ||||
|         vrgatherei16.vv v28, v8, v2 | ||||
|         vle32.v v16, (a1) | ||||
|         add     a1, a1, t4 | ||||
|         vrgatherei16.vv v20, v4, v2 | ||||
|         vle32.v v24, (a3) | ||||
|         add     a3, a3, t4 | ||||
|         vwmul.vv v12, v16, v28 | ||||
|         vwmul.vv v8, v16, v24 | ||||
|         // vwnmsac.vv does _not_ exist so multiply & subtract separately | ||||
|         vwmul.vv v4, v20, v24 | ||||
|         vwmacc.vv v8, v20, v28 | ||||
|         vsetvli zero, zero, e64, m4, ta, ma | ||||
|         vsub.vv v12, v12, v4 | ||||
|         vsetvli zero, zero, e32, m2, ta, ma | ||||
|         vnclip.wi v16, v8, 31 | ||||
|         vnclip.wi v20, v12, 31 | ||||
|         vrgatherei16.vv v8, v16, v2 | ||||
|         vse32.v v20, (a0) | ||||
|         add     a0, a0, t4 | ||||
|         vse32.v v8, (t0) | ||||
|         bnez    a4, 1b | ||||
|  | ||||
|         ret | ||||
| endfunc | ||||
|  | ||||
| func ff_vector_fmul_fixed_rvv, zve32x | ||||
|         csrwi   vxrm, 0 | ||||
| 1: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user