You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	af_afir: RISC-V V fcmul_add
Segmented loads are slow, so here we use unit-strided load and narrowing shifts. c910: fcmul_add_c: 2179 fcmul_add_rvv_f64: 1652 c908: fcmul_add_c: 4891.2 fcmul_add_rvv_f64: 2399.5 Signed-off-by: Rémi Denis-Courmont <remi@remlab.net>
This commit is contained in:
		
				
					committed by
					
						 Rémi Denis-Courmont
						Rémi Denis-Courmont
					
				
			
			
				
	
			
			
			
						parent
						
							d076517056
						
					
				
				
					commit
					afb967b81e
				
			| @@ -33,6 +33,7 @@ typedef struct AudioFIRDSPContext { | ||||
|                       ptrdiff_t len); | ||||
| } AudioFIRDSPContext; | ||||
|  | ||||
| void ff_afir_init_riscv(AudioFIRDSPContext *s); | ||||
| void ff_afir_init_x86(AudioFIRDSPContext *s); | ||||
|  | ||||
| static void fcmul_add_c(float *sum, const float *t, const float *c, ptrdiff_t len) | ||||
| @@ -74,7 +75,9 @@ static av_unused void ff_afir_init(AudioFIRDSPContext *dsp) | ||||
|     dsp->fcmul_add = fcmul_add_c; | ||||
|     dsp->dcmul_add = dcmul_add_c; | ||||
|  | ||||
| #if ARCH_X86 | ||||
| #if ARCH_RISCV | ||||
|     ff_afir_init_riscv(dsp); | ||||
| #elif ARCH_X86 | ||||
|     ff_afir_init_x86(dsp); | ||||
| #endif | ||||
| } | ||||
|   | ||||
							
								
								
									
										2
									
								
								libavfilter/riscv/Makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								libavfilter/riscv/Makefile
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,2 @@ | ||||
| OBJS += riscv/af_afir_init.o | ||||
| RVV-OBJS += riscv/af_afir_rvv.o | ||||
							
								
								
									
										42
									
								
								libavfilter/riscv/af_afir_init.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								libavfilter/riscv/af_afir_init.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | ||||
| /* | ||||
|  * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS). | ||||
|  * | ||||
|  * This file is part of FFmpeg. | ||||
|  * | ||||
|  * FFmpeg is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * FFmpeg is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with FFmpeg; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
|  | ||||
| #include <stdint.h> | ||||
|  | ||||
| #include "config.h" | ||||
| #include "libavutil/attributes.h" | ||||
| #include "libavutil/cpu.h" | ||||
| #include "libavfilter/af_afirdsp.h" | ||||
|  | ||||
| void ff_fcmul_add_rvv(float *sum, const float *t, const float *c, | ||||
|                        ptrdiff_t len); | ||||
|  | ||||
| av_cold void ff_afir_init_riscv(AudioFIRDSPContext *s) | ||||
| { | ||||
| #if HAVE_RVV | ||||
|     int flags = av_get_cpu_flags(); | ||||
|  | ||||
|     if (flags & AV_CPU_FLAG_RVV_F64) { | ||||
|         if (flags & AV_CPU_FLAG_RVB_ADDR) { | ||||
|             s->fcmul_add = ff_fcmul_add_rvv; | ||||
|         } | ||||
|     } | ||||
| #endif | ||||
| } | ||||
							
								
								
									
										55
									
								
								libavfilter/riscv/af_afir_rvv.S
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								libavfilter/riscv/af_afir_rvv.S
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,55 @@ | ||||
| /* | ||||
|  * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS). | ||||
|  * | ||||
|  * This file is part of FFmpeg. | ||||
|  * | ||||
|  * FFmpeg is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * FFmpeg is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with FFmpeg; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
|  | ||||
| #include "libavutil/riscv/asm.S" | ||||
|  | ||||
| //  void ff_fcmul_add(float *sum, const float *t, const float *c, int len) | ||||
| func ff_fcmul_add_rvv, zve64f | ||||
|         li          t1, 32 | ||||
| 1: | ||||
|         vsetvli     t0, a3, e32, m4, ta, ma | ||||
|         vle64.v     v24, (a0) | ||||
|         sub         a3, a3, t0 | ||||
|         vnsrl.wx    v16, v24, zero | ||||
|         vnsrl.wx    v20, v24, t1 | ||||
|         vle64.v     v24, (a1) | ||||
|         sh3add      a1, t0, a1 | ||||
|         vnsrl.wx    v0, v24, zero | ||||
|         vnsrl.wx    v4, v24, t1 | ||||
|         vle64.v     v24, (a2) | ||||
|         sh3add      a2, t0, a2 | ||||
|         vnsrl.wx    v8, v24, zero | ||||
|         vnsrl.wx    v12, v24, t1 | ||||
|         vfmacc.vv   v16, v0, v8 | ||||
|         vfmacc.vv   v20, v4, v8 | ||||
|         vfnmsac.vv  v16, v4, v12 | ||||
|         vfmacc.vv   v20, v0, v12 | ||||
|         vsseg2e32.v v16, (a0) | ||||
|         sh3add      a0, t0, a0 | ||||
|         bgtz        a3, 1b | ||||
|  | ||||
|         flw         fa0, 0(a1) | ||||
|         flw         fa1, 0(a2) | ||||
|         flw         fa2, 0(a0) | ||||
|         fmadd.s     fa2, fa0, fa1, fa2 | ||||
|         fsw         fa2, 0(a0) | ||||
|  | ||||
|         ret | ||||
| endfunc | ||||
		Reference in New Issue
	
	Block a user