From a54d6b1d91ba17f6e1316997dd5f0ced4cee8ee5 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt Date: Tue, 23 Sep 2025 05:01:41 +0200 Subject: [PATCH] avcodec/x86/rnd_template: Merge into hpeldsp_init.c It is now only included exactly once. Reviewed-by: Lynne Signed-off-by: Andreas Rheinhardt --- libavcodec/x86/hpeldsp_init.c | 74 +++++++++++++++++++++++--- libavcodec/x86/rnd_template.c | 98 ----------------------------------- 2 files changed, 67 insertions(+), 105 deletions(-) delete mode 100644 libavcodec/x86/rnd_template.c diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c index 66ed886ea9..cb47cb7752 100644 --- a/libavcodec/x86/hpeldsp_init.c +++ b/libavcodec/x86/hpeldsp_init.c @@ -33,6 +33,7 @@ #include "libavcodec/pixels.h" #include "fpel.h" #include "hpeldsp.h" +#include "inline_asm.h" void ff_put_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); @@ -73,15 +74,74 @@ void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, /***********************************/ /* MMX no rounding */ -#define DEF(x, y) x ## _no_rnd_ ## y ## _mmx -#define SET_RND MOVQ_WONE -#define STATIC static -#include "rnd_template.c" +// put_pixels +static void put_no_rnd_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h) +{ + MOVQ_ZERO(mm7); + MOVQ_WONE(mm6); // =1 for no_rnd version + __asm__ volatile( + "movq (%1), %%mm0 \n\t" + "movq 1(%1), %%mm4 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm4, %%mm5 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" + "punpckhbw %%mm7, %%mm1 \n\t" + "punpckhbw %%mm7, %%mm5 \n\t" + "paddusw %%mm0, %%mm4 \n\t" + "paddusw %%mm1, %%mm5 \n\t" + "xor %%"FF_REG_a", %%"FF_REG_a" \n\t" + "add %3, %1 \n\t" + ".p2align 3 \n\t" + "1: \n\t" + "movq (%1, %%"FF_REG_a"), %%mm0 \n\t" + "movq 1(%1, %%"FF_REG_a"), %%mm2 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm2, %%mm3 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpckhbw %%mm7, %%mm1 \n\t" + "punpckhbw %%mm7, %%mm3 \n\t" + "paddusw %%mm2, %%mm0 \n\t" + "paddusw %%mm3, %%mm1 \n\t" + "paddusw %%mm6, %%mm4 \n\t" + "paddusw %%mm6, %%mm5 \n\t" + "paddusw %%mm0, %%mm4 \n\t" + "paddusw %%mm1, %%mm5 \n\t" + "psrlw $2, %%mm4 \n\t" + "psrlw $2, %%mm5 \n\t" + "packuswb %%mm5, %%mm4 \n\t" + "movq %%mm4, (%2, %%"FF_REG_a") \n\t" + "add %3, %%"FF_REG_a" \n\t" -#undef DEF -#undef SET_RND -#undef STATIC + "movq (%1, %%"FF_REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 + "movq 1(%1, %%"FF_REG_a"), %%mm4 \n\t" + "movq %%mm2, %%mm3 \n\t" + "movq %%mm4, %%mm5 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" + "punpckhbw %%mm7, %%mm3 \n\t" + "punpckhbw %%mm7, %%mm5 \n\t" + "paddusw %%mm2, %%mm4 \n\t" + "paddusw %%mm3, %%mm5 \n\t" + "paddusw %%mm6, %%mm0 \n\t" + "paddusw %%mm6, %%mm1 \n\t" + "paddusw %%mm4, %%mm0 \n\t" + "paddusw %%mm5, %%mm1 \n\t" + "psrlw $2, %%mm0 \n\t" + "psrlw $2, %%mm1 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "movq %%mm0, (%2, %%"FF_REG_a") \n\t" + "add %3, %%"FF_REG_a" \n\t" + + "subl $2, %0 \n\t" + "jnz 1b \n\t" + :"+g"(h), "+S"(pixels) + :"D"(block), "r"((x86_reg)line_size) + :FF_REG_a, "memory"); +} // this routine is 'slightly' suboptimal but mostly unused static void avg_no_rnd_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels, diff --git a/libavcodec/x86/rnd_template.c b/libavcodec/x86/rnd_template.c deleted file mode 100644 index 4590aeddf0..0000000000 --- a/libavcodec/x86/rnd_template.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * SIMD-optimized halfpel functions are compiled twice for rnd/no_rnd - * Copyright (c) 2000, 2001 Fabrice Bellard - * Copyright (c) 2003-2004 Michael Niedermayer - * - * MMX optimization by Nick Kurshev - * mostly rewritten by Michael Niedermayer - * and improved by Zdenek Kabelac - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include -#include - -#include "inline_asm.h" - -// put_pixels -av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h) -{ - MOVQ_ZERO(mm7); - SET_RND(mm6); // =2 for rnd and =1 for no_rnd version - __asm__ volatile( - "movq (%1), %%mm0 \n\t" - "movq 1(%1), %%mm4 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm4, %%mm5 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" - "punpckhbw %%mm7, %%mm1 \n\t" - "punpckhbw %%mm7, %%mm5 \n\t" - "paddusw %%mm0, %%mm4 \n\t" - "paddusw %%mm1, %%mm5 \n\t" - "xor %%"FF_REG_a", %%"FF_REG_a" \n\t" - "add %3, %1 \n\t" - ".p2align 3 \n\t" - "1: \n\t" - "movq (%1, %%"FF_REG_a"), %%mm0 \n\t" - "movq 1(%1, %%"FF_REG_a"), %%mm2 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpckhbw %%mm7, %%mm1 \n\t" - "punpckhbw %%mm7, %%mm3 \n\t" - "paddusw %%mm2, %%mm0 \n\t" - "paddusw %%mm3, %%mm1 \n\t" - "paddusw %%mm6, %%mm4 \n\t" - "paddusw %%mm6, %%mm5 \n\t" - "paddusw %%mm0, %%mm4 \n\t" - "paddusw %%mm1, %%mm5 \n\t" - "psrlw $2, %%mm4 \n\t" - "psrlw $2, %%mm5 \n\t" - "packuswb %%mm5, %%mm4 \n\t" - "movq %%mm4, (%2, %%"FF_REG_a") \n\t" - "add %3, %%"FF_REG_a" \n\t" - - "movq (%1, %%"FF_REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 - "movq 1(%1, %%"FF_REG_a"), %%mm4 \n\t" - "movq %%mm2, %%mm3 \n\t" - "movq %%mm4, %%mm5 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" - "punpckhbw %%mm7, %%mm3 \n\t" - "punpckhbw %%mm7, %%mm5 \n\t" - "paddusw %%mm2, %%mm4 \n\t" - "paddusw %%mm3, %%mm5 \n\t" - "paddusw %%mm6, %%mm0 \n\t" - "paddusw %%mm6, %%mm1 \n\t" - "paddusw %%mm4, %%mm0 \n\t" - "paddusw %%mm5, %%mm1 \n\t" - "psrlw $2, %%mm0 \n\t" - "psrlw $2, %%mm1 \n\t" - "packuswb %%mm1, %%mm0 \n\t" - "movq %%mm0, (%2, %%"FF_REG_a") \n\t" - "add %3, %%"FF_REG_a" \n\t" - - "subl $2, %0 \n\t" - "jnz 1b \n\t" - :"+g"(h), "+S"(pixels) - :"D"(block), "r"((x86_reg)line_size) - :FF_REG_a, "memory"); -}