You've already forked FFmpeg
							
							
				mirror of
				https://github.com/FFmpeg/FFmpeg.git
				synced 2025-10-30 23:18:11 +02:00 
			
		
		
		
	avfilter: add anlmdn filter x86 SIMD optimizations
This commit is contained in:
		| @@ -27,6 +27,8 @@ | ||||
| #include "audio.h" | ||||
| #include "formats.h" | ||||
|  | ||||
| #include "af_anlmdndsp.h" | ||||
|  | ||||
| #define SQR(x) ((x) * (x)) | ||||
|  | ||||
| typedef struct AudioNLMeansContext { | ||||
| @@ -49,7 +51,7 @@ typedef struct AudioNLMeansContext { | ||||
|  | ||||
|     AVAudioFifo *fifo; | ||||
|  | ||||
|     float (*compute_distance)(const float *f1, const float *f2, int K); | ||||
|     AudioNLMDNDSPContext dsp; | ||||
| } AudioNLMeansContext; | ||||
|  | ||||
| #define OFFSET(x) offsetof(AudioNLMeansContext, x) | ||||
| @@ -93,7 +95,7 @@ static int query_formats(AVFilterContext *ctx) | ||||
|     return ff_set_common_samplerates(ctx, formats); | ||||
| } | ||||
|  | ||||
| static float compute_distance_ssd(const float *f1, const float *f2, int K) | ||||
| static float compute_distance_ssd_c(const float *f1, const float *f2, ptrdiff_t K) | ||||
| { | ||||
|     float distance = 0.; | ||||
|  | ||||
| @@ -103,6 +105,25 @@ static float compute_distance_ssd(const float *f1, const float *f2, int K) | ||||
|     return distance; | ||||
| } | ||||
|  | ||||
| static void compute_cache_c(float *cache, const float *f, | ||||
|                             ptrdiff_t S, ptrdiff_t K, | ||||
|                             ptrdiff_t i, ptrdiff_t jj) | ||||
| { | ||||
|     int v = 0; | ||||
|  | ||||
|     for (int j = jj; j < jj + S; j++, v++) | ||||
|         cache[v] += -SQR(f[i - K - 1] - f[j - K - 1]) + SQR(f[i + K] - f[j + K]); | ||||
| } | ||||
|  | ||||
| void ff_anlmdn_init(AudioNLMDNDSPContext *dsp) | ||||
| { | ||||
|     dsp->compute_distance_ssd = compute_distance_ssd_c; | ||||
|     dsp->compute_cache        = compute_cache_c; | ||||
|  | ||||
|     if (ARCH_X86) | ||||
|         ff_anlmdn_init_x86(dsp); | ||||
| } | ||||
|  | ||||
| static int config_output(AVFilterLink *outlink) | ||||
| { | ||||
|     AVFilterContext *ctx = outlink->src; | ||||
| @@ -129,7 +150,7 @@ static int config_output(AVFilterLink *outlink) | ||||
|     if (!s->fifo) | ||||
|         return AVERROR(ENOMEM); | ||||
|  | ||||
|     s->compute_distance = compute_distance_ssd; | ||||
|     ff_anlmdn_init(&s->dsp); | ||||
|  | ||||
|     return 0; | ||||
| } | ||||
| @@ -153,17 +174,14 @@ static int filter_channel(AVFilterContext *ctx, void *arg, int ch, int nb_jobs) | ||||
|             for (int j = i - S; j <= i + S; j++) { | ||||
|                 if (i == j) | ||||
|                     continue; | ||||
|                 cache[v++] = s->compute_distance(f + i, f + j, K); | ||||
|                 cache[v++] = s->dsp.compute_distance_ssd(f + i, f + j, K); | ||||
|             } | ||||
|         } else { | ||||
|             for (int j = i - S; j < i; j++, v++) | ||||
|                 cache[v] = cache[v] - SQR(f[i - K - 1] - f[j - K - 1]) + SQR(f[i + K] - f[j + K]); | ||||
|  | ||||
|             for (int j = i + 1; j <= i + S; j++, v++) | ||||
|                 cache[v] = cache[v] - SQR(f[i - K - 1] - f[j - K - 1]) + SQR(f[i + K] - f[j + K]); | ||||
|             s->dsp.compute_cache(cache, f, S, K, i, i - S); | ||||
|             s->dsp.compute_cache(cache + S, f, S, K, i, i + 1); | ||||
|         } | ||||
|  | ||||
|         for (int j = 0; j < v; j++) { | ||||
|         for (int j = 0; j < 2 * S; j++) { | ||||
|             const float distance = cache[j]; | ||||
|             float w; | ||||
|  | ||||
|   | ||||
							
								
								
									
										40
									
								
								libavfilter/af_anlmdndsp.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								libavfilter/af_anlmdndsp.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,40 @@ | ||||
| /* | ||||
|  * Copyright (c) 2019 Paul B Mahol | ||||
|  * | ||||
|  * This file is part of FFmpeg. | ||||
|  * | ||||
|  * FFmpeg is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * FFmpeg is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with FFmpeg; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
|  | ||||
| #ifndef AVFILTER_ANLMDNDSP_H | ||||
| #define AVFILTER_ANLMDNDSP_H | ||||
|  | ||||
| #include "libavutil/common.h" | ||||
|  | ||||
| #include "audio.h" | ||||
| #include "avfilter.h" | ||||
| #include "formats.h" | ||||
| #include "internal.h" | ||||
|  | ||||
| typedef struct AudioNLMDNDSPContext { | ||||
|     float (*compute_distance_ssd)(const float *f1, const float *f2, ptrdiff_t K); | ||||
|     void (*compute_cache)(float *cache, const float *f, ptrdiff_t S, ptrdiff_t K, | ||||
|                           ptrdiff_t i, ptrdiff_t jj); | ||||
| } AudioNLMDNDSPContext; | ||||
|  | ||||
| void ff_anlmdn_init(AudioNLMDNDSPContext *s); | ||||
| void ff_anlmdn_init_x86(AudioNLMDNDSPContext *s); | ||||
|  | ||||
| #endif /* AVFILTER_ANLMDNDSP_H */ | ||||
| @@ -1,6 +1,7 @@ | ||||
| OBJS-$(CONFIG_SCENE_SAD)                     += x86/scene_sad_init.o | ||||
|  | ||||
| OBJS-$(CONFIG_AFIR_FILTER)                   += x86/af_afir_init.o | ||||
| OBJS-$(CONFIG_ANLMDN_FILTER)                 += x86/af_anlmdn_init.o | ||||
| OBJS-$(CONFIG_BLEND_FILTER)                  += x86/vf_blend_init.o | ||||
| OBJS-$(CONFIG_BWDIF_FILTER)                  += x86/vf_bwdif_init.o | ||||
| OBJS-$(CONFIG_COLORSPACE_FILTER)             += x86/colorspacedsp_init.o | ||||
| @@ -34,6 +35,7 @@ OBJS-$(CONFIG_YADIF_FILTER)                  += x86/vf_yadif_init.o | ||||
| X86ASM-OBJS-$(CONFIG_SCENE_SAD)              += x86/scene_sad.o | ||||
|  | ||||
| X86ASM-OBJS-$(CONFIG_AFIR_FILTER)            += x86/af_afir.o | ||||
| X86ASM-OBJS-$(CONFIG_ANLMDN_FILTER)          += x86/af_anlmdn.o | ||||
| X86ASM-OBJS-$(CONFIG_BLEND_FILTER)           += x86/vf_blend.o | ||||
| X86ASM-OBJS-$(CONFIG_BWDIF_FILTER)           += x86/vf_bwdif.o | ||||
| X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER)      += x86/colorspacedsp.o | ||||
|   | ||||
							
								
								
									
										80
									
								
								libavfilter/x86/af_anlmdn.asm
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								libavfilter/x86/af_anlmdn.asm
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,80 @@ | ||||
| ;***************************************************************************** | ||||
| ;* x86-optimized functions for anlmdn filter | ||||
| ;* Copyright (c) 2017 Paul B Mahol | ||||
| ;* | ||||
| ;* This file is part of FFmpeg. | ||||
| ;* | ||||
| ;* FFmpeg is free software; you can redistribute it and/or | ||||
| ;* modify it under the terms of the GNU Lesser General Public | ||||
| ;* License as published by the Free Software Foundation; either | ||||
| ;* version 2.1 of the License, or (at your option) any later version. | ||||
| ;* | ||||
| ;* FFmpeg is distributed in the hope that it will be useful, | ||||
| ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
| ;* Lesser General Public License for more details. | ||||
| ;* | ||||
| ;* You should have received a copy of the GNU Lesser General Public | ||||
| ;* License along with FFmpeg; if not, write to the Free Software | ||||
| ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
| ;****************************************************************************** | ||||
|  | ||||
| %include "libavutil/x86/x86util.asm" | ||||
|  | ||||
| SECTION .text | ||||
|  | ||||
| ;------------------------------------------------------------------------------ | ||||
| ; float ff_compute_distance_ssd(float *f1, const float *f2, ptrdiff_t len) | ||||
| ;------------------------------------------------------------------------------ | ||||
|  | ||||
| INIT_XMM sse | ||||
| cglobal compute_distance_ssd, 3,5,3, f1, f2, len, r, x | ||||
|     mov       xq, lenq | ||||
|     shl       xq, 2 | ||||
|     neg       xq | ||||
|     add       f1q, xq | ||||
|     add       f2q, xq | ||||
|     xor       xq, xq | ||||
|     shl       lenq, 1 | ||||
|     add       lenq, 1 | ||||
|     shl       lenq, 2 | ||||
|     mov       rq, lenq | ||||
|     and       rq, mmsize - 1 | ||||
|     xorps     m0, m0 | ||||
|     cmp       lenq, mmsize | ||||
|     jl .loop1 | ||||
|     sub       lenq, rq | ||||
| ALIGN 16 | ||||
|     .loop0: | ||||
|         movups    m1, [f1q + xq] | ||||
|         movups    m2, [f2q + xq] | ||||
|         subps     m1, m2 | ||||
|         mulps     m1, m1 | ||||
|         addps     m0, m1 | ||||
|         add       xq, mmsize | ||||
|         cmp       xq, lenq | ||||
|         jl .loop0 | ||||
|  | ||||
|     movhlps   xmm1, xmm0 | ||||
|     addps     xmm0, xmm1 | ||||
|     movss     xmm1, xmm0 | ||||
|     shufps    xmm0, xmm0, 1 | ||||
|     addss     xmm0, xmm1 | ||||
|  | ||||
|     cmp       rq, 0 | ||||
|     je .end | ||||
|     add       lenq, rq | ||||
|     .loop1: | ||||
|         movss    xm1, [f1q + xq] | ||||
|         subss    xm1, [f2q + xq] | ||||
|         mulss    xm1, xm1 | ||||
|         addss    xm0, xm1 | ||||
|         add       xq, 4 | ||||
|         cmp       xq, lenq | ||||
|         jl .loop1 | ||||
|     .end: | ||||
| %if ARCH_X86_64 == 0 | ||||
|     movss     r0m, xm0 | ||||
|     fld dword r0m | ||||
| %endif | ||||
|     RET | ||||
							
								
								
									
										35
									
								
								libavfilter/x86/af_anlmdn_init.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								libavfilter/x86/af_anlmdn_init.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,35 @@ | ||||
| /* | ||||
|  * This file is part of FFmpeg. | ||||
|  * | ||||
|  * FFmpeg is free software; you can redistribute it and/or | ||||
|  * modify it under the terms of the GNU Lesser General Public | ||||
|  * License as published by the Free Software Foundation; either | ||||
|  * version 2.1 of the License, or (at your option) any later version. | ||||
|  * | ||||
|  * FFmpeg is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||
|  * Lesser General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU Lesser General Public | ||||
|  * License along with FFmpeg; if not, write to the Free Software | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
|  | ||||
| #include "config.h" | ||||
| #include "libavutil/attributes.h" | ||||
| #include "libavutil/cpu.h" | ||||
| #include "libavutil/x86/cpu.h" | ||||
| #include "libavfilter/af_anlmdndsp.h" | ||||
|  | ||||
| float ff_compute_distance_ssd_sse(const float *f1, const float *f2, | ||||
|                                   ptrdiff_t len); | ||||
|  | ||||
| av_cold void ff_anlmdn_init_x86(AudioNLMDNDSPContext *s) | ||||
| { | ||||
|     int cpu_flags = av_get_cpu_flags(); | ||||
|  | ||||
|     if (EXTERNAL_SSE(cpu_flags)) { | ||||
|         s->compute_distance_ssd = ff_compute_distance_ssd_sse; | ||||
|     } | ||||
| } | ||||
		Reference in New Issue
	
	Block a user