1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2024-12-23 12:43:46 +02:00

x86: lavc: share more constant through defines

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Christophe Gisquet 2015-02-04 11:41:45 +01:00 committed by Michael Niedermayer
parent ac923ed470
commit ed450d4acf
12 changed files with 63 additions and 38 deletions

View File

@ -35,21 +35,30 @@ DECLARE_ALIGNED(8, const uint64_t, ff_pw_15) = 0x000F000F000F000FULL;
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16) = { 0x0010001000100010ULL, 0x0010001000100010ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_17) = { 0x0011001100110011ULL, 0x0011001100110011ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_18) = { 0x0012001200120012ULL, 0x0012001200120012ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pw_20) = 0x0014001400140014ULL;
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_20) = { 0x0014001400140014ULL, 0x0014001400140014ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32) = { 0x0020002000200020ULL, 0x0020002000200020ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pw_42) = 0x002A002A002A002AULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pw_53) = 0x0035003500350035ULL;
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64) = { 0x0040004000400040ULL, 0x0040004000400040ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pw_96) = 0x0060006000600060ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_255) = { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pw_255) = { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL,
0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pw_256) = { 0x0100010001000100ULL, 0x0100010001000100ULL,
0x0100010001000100ULL, 0x0100010001000100ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_512) = { 0x0200020002000200ULL, 0x0200020002000200ULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pw_512) = { 0x0200020002000200ULL, 0x0200020002000200ULL,
0x0200020002000200ULL, 0x0200020002000200ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_1019) = { 0x03FB03FB03FB03FBULL, 0x03FB03FB03FB03FBULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_1024) = { 0x0400040004000400ULL, 0x0400040004000400ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_2048) = { 0x0800080008000800ULL, 0x0800080008000800ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8192) = { 0x2000200020002000ULL, 0x2000200020002000ULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pw_1023) = { 0x03ff03ff03ff03ffULL, 0x03ff03ff03ff03ffULL,
0x03ff03ff03ff03ffULL, 0x03ff03ff03ff03ffULL};
DECLARE_ALIGNED(32, const ymm_reg, ff_pw_1024) = { 0x0400040004000400ULL, 0x0400040004000400ULL,
0x0400040004000400ULL, 0x0400040004000400ULL};
DECLARE_ALIGNED(32, const ymm_reg, ff_pw_2048) = { 0x0800080008000800ULL, 0x0800080008000800ULL,
0x0800080008000800ULL, 0x0800080008000800ULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pw_4096) = { 0x1000100010001000ULL, 0x1000100010001000ULL,
0x1000100010001000ULL, 0x1000100010001000ULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pw_8192) = { 0x2000200020002000ULL, 0x2000200020002000ULL,
0x2000200020002000ULL, 0x2000200020002000ULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pw_m1) = { 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL };
@ -63,6 +72,7 @@ DECLARE_ALIGNED(32, const ymm_reg, ff_pb_3) = { 0x0303030303030303ULL, 0x030
0x0303030303030303ULL, 0x0303030303030303ULL };
DECLARE_ALIGNED(32, const xmm_reg, ff_pb_15) = { 0x0F0F0F0F0F0F0F0FULL, 0x0F0F0F0F0F0F0F0FULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pb_80) = { 0x8080808080808080ULL, 0x8080808080808080ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pb_FE) = { 0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC) = 0xFCFCFCFCFCFCFCFCULL;
DECLARE_ALIGNED(16, const xmm_reg, ff_ps_neg) = { 0x8000000080000000ULL, 0x8000000080000000ULL };

View File

@ -35,18 +35,20 @@ extern const xmm_reg ff_pw_9;
extern const uint64_t ff_pw_15;
extern const xmm_reg ff_pw_16;
extern const xmm_reg ff_pw_18;
extern const uint64_t ff_pw_20;
extern const xmm_reg ff_pw_20;
extern const xmm_reg ff_pw_32;
extern const uint64_t ff_pw_42;
extern const uint64_t ff_pw_53;
extern const xmm_reg ff_pw_64;
extern const uint64_t ff_pw_96;
extern const uint64_t ff_pw_128;
extern const xmm_reg ff_pw_255;
extern const xmm_reg ff_pw_512;
extern const xmm_reg ff_pw_1024;
extern const xmm_reg ff_pw_2048;
extern const xmm_reg ff_pw_8192;
extern const ymm_reg ff_pw_255;
extern const ymm_reg ff_pw_512;
extern const ymm_reg ff_pw_1023;
extern const ymm_reg ff_pw_1024;
extern const ymm_reg ff_pw_2048;
extern const ymm_reg ff_pw_4096;
extern const ymm_reg ff_pw_8192;
extern const ymm_reg ff_pw_m1;
extern const ymm_reg ff_pb_0;
@ -54,7 +56,7 @@ extern const ymm_reg ff_pb_1;
extern const ymm_reg ff_pb_2;
extern const ymm_reg ff_pb_3;
extern const xmm_reg ff_pb_80;
extern const xmm_reg ff_pb_F8;
extern const xmm_reg ff_pb_FE;
extern const uint64_t ff_pb_FC;
extern const xmm_reg ff_ps_neg;

View File

@ -26,15 +26,13 @@
%include "libavutil/x86/x86util.asm"
SECTION_RODATA
pw_pixel_max: times 8 dw ((1 << 10)-1)
SECTION .text
cextern pw_2
cextern pw_3
cextern pw_4
cextern pw_1023
%define pw_pixel_max pw_1023
; out: %4 = |%1-%2|-%3
; clobbers: %5

View File

@ -26,11 +26,13 @@
SECTION_RODATA
pw_pixel_max: times 8 dw ((1 << 10)-1)
pd_32: times 4 dd 32
SECTION .text
cextern pw_1023
%define pw_pixel_max pw_1023
;-----------------------------------------------------------------------------
; void ff_h264_idct_add_10(pixel *dst, int16_t *block, int stride)
;-----------------------------------------------------------------------------

View File

@ -26,6 +26,8 @@
SECTION_RODATA
cextern pw_1023
%define pw_pixel_max pw_1023
cextern pw_512
cextern pw_16
cextern pw_8
@ -35,7 +37,6 @@ cextern pw_1
pw_m32101234: dw -3, -2, -1, 0, 1, 2, 3, 4
pw_m3: times 8 dw -3
pw_pixel_max: times 8 dw ((1 << 10)-1)
pd_17: times 4 dd 17
pd_16: times 4 dd 16

View File

@ -26,12 +26,12 @@
SECTION_RODATA 32
cextern pw_1023
%define pw_pixel_max pw_1023
cextern pw_16
cextern pw_1
cextern pb_0
pw_pixel_max: times 8 dw ((1 << 10)-1)
pad10: times 8 dw 10*1023
pad20: times 8 dw 20*1023
pad30: times 8 dw 30*1023

View File

@ -26,11 +26,12 @@
SECTION_RODATA 32
pw_pixel_max: times 8 dw ((1 << 10)-1)
sq_1: dq 1
dq 0
cextern pw_1
cextern pw_1023
%define pw_pixel_max pw_1023
SECTION .text

View File

@ -26,8 +26,9 @@
SECTION_RODATA
cextern pw_1023
%define pw_pixel_max_10 pw_1023
pw_pixel_max_12: times 8 dw ((1 << 12)-1)
pw_pixel_max_10: times 8 dw ((1 << 10)-1)
pw_m2: times 8 dw -2
pd_1 : times 4 dd 1

View File

@ -21,14 +21,21 @@
%include "libavutil/x86/x86util.asm"
SECTION_RODATA 32
pw_8: times 16 dw (1 << 9)
pw_10: times 16 dw (1 << 11)
pw_12: times 16 dw (1 << 13)
cextern pw_255
cextern pw_512
cextern pw_2048
cextern pw_8192
cextern pw_1023
cextern pw_1024
cextern pw_4096
%define pw_8 pw_512
%define pw_10 pw_2048
%define pw_12 pw_8192
%define pw_bi_10 pw_1024
%define pw_bi_12 pw_4096
%define max_pixels_8 pw_255
%define max_pixels_10 pw_1023
pw_bi_8: times 16 dw (1 << 8)
pw_bi_10: times 16 dw (1 << 10)
pw_bi_12: times 16 dw (1 << 12)
max_pixels_8: times 16 dw ((1 << 8)-1)
max_pixels_10: times 16 dw ((1 << 10)-1)
max_pixels_12: times 16 dw ((1 << 12)-1)
cextern pd_1
cextern pb_0

View File

@ -20,12 +20,12 @@
; */
%include "libavutil/x86/x86util.asm"
SECTION_RODATA 32
max_pixels_10: times 16 dw ((1 << 10)-1)
SECTION .text
cextern pw_1023
%define max_pixels_10 pw_1023
;the tr_add macros and functions were largely inspired by x264 project's code in the h264_idct.asm file
%macro TR_ADD_MMX_4_8 0
mova m2, [r1]

View File

@ -23,7 +23,8 @@
SECTION_RODATA
v210_enc_min_10: times 8 dw 0x4
cextern pw_4
%define v210_enc_min_10 pw_4
v210_enc_max_10: times 8 dw 0x3fb
v210_enc_luma_mult_10: dw 4,1,16,4,1,16,0,0
@ -32,8 +33,10 @@ v210_enc_luma_shuf_10: db -1,0,1,-1,2,3,4,5,-1,6,7,-1,8,9,10,11
v210_enc_chroma_mult_10: dw 1,4,16,0,16,1,4,0
v210_enc_chroma_shuf_10: db 0,1,8,9,-1,2,3,-1,10,11,4,5,-1,12,13,-1
v210_enc_min_8: times 16 db 0x1
v210_enc_max_8: times 16 db 0xfe
cextern pb_1
%define v210_enc_min_8 pb_1
cextern pb_FE
%define v210_enc_max_8 pb_FE
v210_enc_luma_shuf_8: db 6,-1,7,-1,8,-1,9,-1,10,-1,11,-1,-1,-1,-1,-1
v210_enc_luma_mult_8: dw 16,4,64,16,4,64,0,0

View File

@ -36,11 +36,11 @@ vp3_idct_data: times 8 dw 64277
pb_7: times 8 db 0x07
pb_1F: times 8 db 0x1f
pb_81: times 8 db 0x81
pb_FE: times 8 db 0xFE
cextern pb_1
cextern pb_3
cextern pb_80
cextern pb_FE
cextern pw_8