1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-08-04 22:03:09 +02:00

avcodec/x86/mpegvideoenc_template: Remove remnants of MMX

Forgotten in 7284ab789d.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
Andreas Rheinhardt
2025-03-25 03:07:55 +01:00
parent 8c7b00ba3a
commit 1422f0057c
2 changed files with 53 additions and 92 deletions

View File

@ -43,26 +43,16 @@ DECLARE_ALIGNED(16, static const uint16_t, inv_zigzag_direct16)[64] = {
#if HAVE_6REGS #if HAVE_6REGS
#if HAVE_SSE2_INLINE #if HAVE_SSE2_INLINE
#undef COMPILE_TEMPLATE_SSE2
#undef COMPILE_TEMPLATE_SSSE3
#define COMPILE_TEMPLATE_SSE2 1
#define COMPILE_TEMPLATE_SSSE3 0 #define COMPILE_TEMPLATE_SSSE3 0
#undef RENAME
#undef RENAME_FDCT
#define RENAME(a) a ## _sse2 #define RENAME(a) a ## _sse2
#define RENAME_FDCT(a) a ## _sse2
#include "mpegvideoenc_template.c" #include "mpegvideoenc_template.c"
#endif /* HAVE_SSE2_INLINE */ #endif /* HAVE_SSE2_INLINE */
#if HAVE_SSSE3_INLINE #if HAVE_SSSE3_INLINE
#undef COMPILE_TEMPLATE_SSE2
#undef COMPILE_TEMPLATE_SSSE3 #undef COMPILE_TEMPLATE_SSSE3
#define COMPILE_TEMPLATE_SSE2 1
#define COMPILE_TEMPLATE_SSSE3 1 #define COMPILE_TEMPLATE_SSSE3 1
#undef RENAME #undef RENAME
#undef RENAME_FDCT
#define RENAME(a) a ## _ssse3 #define RENAME(a) a ## _ssse3
#define RENAME_FDCT(a) a ## _sse2
#include "mpegvideoenc_template.c" #include "mpegvideoenc_template.c"
#endif /* HAVE_SSSE3_INLINE */ #endif /* HAVE_SSSE3_INLINE */

View File

@ -29,49 +29,22 @@
#include "libavcodec/mpegvideoenc.h" #include "libavcodec/mpegvideoenc.h"
#include "fdct.h" #include "fdct.h"
#undef MMREG_WIDTH
#undef MM
#undef MOVQ
#undef SPREADW #undef SPREADW
#undef PMAXW #undef PMAXW
#undef PMAX #undef PMAX
#undef SAVE_SIGN #undef SAVE_SIGN
#undef RESTORE_SIGN #undef RESTORE_SIGN
#if COMPILE_TEMPLATE_SSE2
#define MMREG_WIDTH "16"
#define MM "%%xmm"
#define MOVQ "movdqa"
#define SPREADW(a) \ #define SPREADW(a) \
"pshuflw $0, "a", "a" \n\t"\ "pshuflw $0, "a", "a" \n\t"\
"punpcklwd "a", "a" \n\t" "punpcklwd "a", "a" \n\t"
#define PMAXW(a,b) "pmaxsw "a", "b" \n\t"
#define PMAX(a,b) \ #define PMAX(a,b) \
"movhlps "a", "b" \n\t"\ "movhlps "a", "b" \n\t"\
PMAXW(b, a)\ "pmaxsw "b", "a" \n\t"\
"pshuflw $0x0E, "a", "b" \n\t"\ "pshuflw $0x0E, "a", "b" \n\t"\
PMAXW(b, a)\ "pmaxsw "b", "a" \n\t"\
"pshuflw $0x01, "a", "b" \n\t"\ "pshuflw $0x01, "a", "b" \n\t"\
PMAXW(b, a) "pmaxsw "b", "a" \n\t"
#else
#define MMREG_WIDTH "8"
#define MM "%%mm"
#define MOVQ "movq"
#define SPREADW(a) \
"punpcklwd "a", "a" \n\t"\
"punpcklwd "a", "a" \n\t"
#define PMAXW(a,b) \
"psubusw "a", "b" \n\t"\
"paddw "a", "b" \n\t"
#define PMAX(a,b) \
"movq "a", "b" \n\t"\
"psrlq $32, "a" \n\t"\
PMAXW(b, a)\
"movq "a", "b" \n\t"\
"psrlq $16, "a" \n\t"\
PMAXW(b, a)
#endif
#if COMPILE_TEMPLATE_SSSE3 #if COMPILE_TEMPLATE_SSSE3
#define SAVE_SIGN(a,b) \ #define SAVE_SIGN(a,b) \
@ -100,7 +73,7 @@ static int RENAME(dct_quantize)(MPVEncContext *const s,
LOCAL_ALIGNED_16(int16_t, temp_block, [64]); LOCAL_ALIGNED_16(int16_t, temp_block, [64]);
//s->fdct (block); //s->fdct (block);
RENAME_FDCT(ff_fdct)(block); // cannot be anything else ... ff_fdct_sse2(block); // cannot be anything else ...
if(s->dct_error_sum) if(s->dct_error_sum)
s->denoise_dct(s, block); s->denoise_dct(s, block);
@ -138,32 +111,32 @@ static int RENAME(dct_quantize)(MPVEncContext *const s,
if ((s->c.out_format == FMT_H263 || s->c.out_format == FMT_H261) && !s->c.mpeg_quant) { if ((s->c.out_format == FMT_H263 || s->c.out_format == FMT_H261) && !s->c.mpeg_quant) {
__asm__ volatile( __asm__ volatile(
"movd %%"FF_REG_a", "MM"3 \n\t" // last_non_zero_p1 "movd %%"FF_REG_a", %%xmm3 \n\t" // last_non_zero_p1
SPREADW(MM"3") SPREADW("%%xmm3")
"pxor "MM"7, "MM"7 \n\t" // 0 "pxor %%xmm7, %%xmm7 \n\t" // 0
"pxor "MM"4, "MM"4 \n\t" // 0 "pxor %%xmm4, %%xmm4 \n\t" // 0
MOVQ" (%2), "MM"5 \n\t" // qmat[0] "movdqa (%2), %%xmm5 \n\t" // qmat[0]
"pxor "MM"6, "MM"6 \n\t" "pxor %%xmm6, %%xmm6 \n\t"
"psubw (%3), "MM"6 \n\t" // -bias[0] "psubw (%3), %%xmm6 \n\t" // -bias[0]
"mov $-128, %%"FF_REG_a" \n\t" "mov $-128, %%"FF_REG_a" \n\t"
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
MOVQ" (%1, %%"FF_REG_a"), "MM"0 \n\t" // block[i] "movdqa (%1, %%"FF_REG_a"), %%xmm0 \n\t" // block[i]
SAVE_SIGN(MM"1", MM"0") // ABS(block[i]) SAVE_SIGN("%%xmm1", "%%xmm0") // ABS(block[i])
"psubusw "MM"6, "MM"0 \n\t" // ABS(block[i]) + bias[0] "psubusw %%xmm6, %%xmm0 \n\t" // ABS(block[i]) + bias[0]
"pmulhw "MM"5, "MM"0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16 "pmulhw %%xmm5, %%xmm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
"por "MM"0, "MM"4 \n\t" "por %%xmm0, %%xmm4 \n\t"
RESTORE_SIGN(MM"1", MM"0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) RESTORE_SIGN("%%xmm1", "%%xmm0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
MOVQ" "MM"0, (%5, %%"FF_REG_a") \n\t" "movdqa %%xmm0, (%5, %%"FF_REG_a") \n\t"
"pcmpeqw "MM"7, "MM"0 \n\t" // out==0 ? 0xFF : 0x00 "pcmpeqw %%xmm7, %%xmm0 \n\t" // out==0 ? 0xFF : 0x00
MOVQ" (%4, %%"FF_REG_a"), "MM"1 \n\t" "movdqa (%4, %%"FF_REG_a"), %%xmm1 \n\t"
MOVQ" "MM"7, (%1, %%"FF_REG_a") \n\t" // 0 "movdqa %%xmm7, (%1, %%"FF_REG_a") \n\t" // 0
"pandn "MM"1, "MM"0 \n\t" "pandn %%xmm1, %%xmm0 \n\t"
PMAXW(MM"0", MM"3") "pmaxsw %%xmm0, %%xmm3 \n\t"
"add $"MMREG_WIDTH", %%"FF_REG_a" \n\t" "add $16, %%"FF_REG_a" \n\t"
" js 1b \n\t" " js 1b \n\t"
PMAX(MM"3", MM"0") PMAX("%%xmm3", "%%xmm0")
"movd "MM"3, %%"FF_REG_a" \n\t" "movd %%xmm3, %%"FF_REG_a" \n\t"
"movzbl %%al, %%eax \n\t" // last_non_zero_p1 "movzbl %%al, %%eax \n\t" // last_non_zero_p1
: "+a" (last_non_zero_p1) : "+a" (last_non_zero_p1)
: "r" (block+64), "r" (qmat), "r" (bias), : "r" (block+64), "r" (qmat), "r" (bias),
@ -173,31 +146,31 @@ static int RENAME(dct_quantize)(MPVEncContext *const s,
); );
}else{ // FMT_H263 }else{ // FMT_H263
__asm__ volatile( __asm__ volatile(
"movd %%"FF_REG_a", "MM"3 \n\t" // last_non_zero_p1 "movd %%"FF_REG_a", %%xmm3 \n\t" // last_non_zero_p1
SPREADW(MM"3") SPREADW("%%xmm3")
"pxor "MM"7, "MM"7 \n\t" // 0 "pxor %%xmm7, %%xmm7 \n\t" // 0
"pxor "MM"4, "MM"4 \n\t" // 0 "pxor %%xmm4, %%xmm4 \n\t" // 0
"mov $-128, %%"FF_REG_a" \n\t" "mov $-128, %%"FF_REG_a" \n\t"
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
MOVQ" (%1, %%"FF_REG_a"), "MM"0 \n\t" // block[i] "movdqa (%1, %%"FF_REG_a"), %%xmm0 \n\t" // block[i]
SAVE_SIGN(MM"1", MM"0") // ABS(block[i]) SAVE_SIGN("%%xmm1", "%%xmm0") // ABS(block[i])
MOVQ" (%3, %%"FF_REG_a"), "MM"6 \n\t" // bias[0] "movdqa (%3, %%"FF_REG_a"), %%xmm6 \n\t" // bias[0]
"paddusw "MM"6, "MM"0 \n\t" // ABS(block[i]) + bias[0] "paddusw %%xmm6, %%xmm0 \n\t" // ABS(block[i]) + bias[0]
MOVQ" (%2, %%"FF_REG_a"), "MM"5 \n\t" // qmat[i] "movdqa (%2, %%"FF_REG_a"), %%xmm5 \n\t" // qmat[i]
"pmulhw "MM"5, "MM"0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16 "pmulhw %%xmm5, %%xmm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
"por "MM"0, "MM"4 \n\t" "por %%xmm0, %%xmm4 \n\t"
RESTORE_SIGN(MM"1", MM"0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) RESTORE_SIGN("%%xmm1", "%%xmm0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
MOVQ" "MM"0, (%5, %%"FF_REG_a") \n\t" "movdqa %%xmm0, (%5, %%"FF_REG_a") \n\t"
"pcmpeqw "MM"7, "MM"0 \n\t" // out==0 ? 0xFF : 0x00 "pcmpeqw %%xmm7, %%xmm0 \n\t" // out==0 ? 0xFF : 0x00
MOVQ" (%4, %%"FF_REG_a"), "MM"1 \n\t" "movdqa (%4, %%"FF_REG_a"), %%xmm1 \n\t"
MOVQ" "MM"7, (%1, %%"FF_REG_a") \n\t" // 0 "movdqa %%xmm7, (%1, %%"FF_REG_a") \n\t" // 0
"pandn "MM"1, "MM"0 \n\t" "pandn %%xmm1, %%xmm0 \n\t"
PMAXW(MM"0", MM"3") "pmaxsw %%xmm0, %%xmm3 \n\t"
"add $"MMREG_WIDTH", %%"FF_REG_a" \n\t" "add $16, %%"FF_REG_a" \n\t"
" js 1b \n\t" " js 1b \n\t"
PMAX(MM"3", MM"0") PMAX("%%xmm3", "%%xmm0")
"movd "MM"3, %%"FF_REG_a" \n\t" "movd %%xmm3, %%"FF_REG_a" \n\t"
"movzbl %%al, %%eax \n\t" // last_non_zero_p1 "movzbl %%al, %%eax \n\t" // last_non_zero_p1
: "+a" (last_non_zero_p1) : "+a" (last_non_zero_p1)
: "r" (block+64), "r" (qmat+64), "r" (bias+64), : "r" (block+64), "r" (qmat+64), "r" (bias+64),
@ -207,14 +180,12 @@ static int RENAME(dct_quantize)(MPVEncContext *const s,
); );
} }
__asm__ volatile( __asm__ volatile(
"movd %1, "MM"1 \n\t" // max_qcoeff "movd %1, %%xmm1 \n\t" // max_qcoeff
SPREADW(MM"1") SPREADW("%%xmm1")
"psubusw "MM"1, "MM"4 \n\t" "psubusw %%xmm1, %%xmm4 \n\t"
"packuswb "MM"4, "MM"4 \n\t" "packuswb %%xmm4, %%xmm4 \n\t"
#if COMPILE_TEMPLATE_SSE2 "packsswb %%xmm4, %%xmm4 \n\t"
"packsswb "MM"4, "MM"4 \n\t" "movd %%xmm4, %0 \n\t" // *overflow
#endif
"movd "MM"4, %0 \n\t" // *overflow
: "=g" (*overflow) : "=g" (*overflow)
: "g" (s->max_qcoeff) : "g" (s->max_qcoeff)
); );