mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
ac3enc_fixed: drop unnecessary fixed-point DSP code
This commit is contained in:
parent
238b2d4155
commit
9e05421dbe
@ -46,49 +46,6 @@ static void ac3_exponent_min_c(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
|
||||
}
|
||||
}
|
||||
|
||||
static int ac3_max_msb_abs_int16_c(const int16_t *src, int len)
|
||||
{
|
||||
int i, v = 0;
|
||||
for (i = 0; i < len; i++)
|
||||
v |= abs(src[i]);
|
||||
return v;
|
||||
}
|
||||
|
||||
static void ac3_lshift_int16_c(int16_t *src, unsigned int len,
|
||||
unsigned int shift)
|
||||
{
|
||||
uint32_t *src32 = (uint32_t *)src;
|
||||
const uint32_t mask = ~(((1 << shift) - 1) << 16);
|
||||
int i;
|
||||
len >>= 1;
|
||||
for (i = 0; i < len; i += 8) {
|
||||
src32[i ] = (src32[i ] << shift) & mask;
|
||||
src32[i+1] = (src32[i+1] << shift) & mask;
|
||||
src32[i+2] = (src32[i+2] << shift) & mask;
|
||||
src32[i+3] = (src32[i+3] << shift) & mask;
|
||||
src32[i+4] = (src32[i+4] << shift) & mask;
|
||||
src32[i+5] = (src32[i+5] << shift) & mask;
|
||||
src32[i+6] = (src32[i+6] << shift) & mask;
|
||||
src32[i+7] = (src32[i+7] << shift) & mask;
|
||||
}
|
||||
}
|
||||
|
||||
static void ac3_rshift_int32_c(int32_t *src, unsigned int len,
|
||||
unsigned int shift)
|
||||
{
|
||||
do {
|
||||
*src++ >>= shift;
|
||||
*src++ >>= shift;
|
||||
*src++ >>= shift;
|
||||
*src++ >>= shift;
|
||||
*src++ >>= shift;
|
||||
*src++ >>= shift;
|
||||
*src++ >>= shift;
|
||||
*src++ >>= shift;
|
||||
len -= 8;
|
||||
} while (len > 0);
|
||||
}
|
||||
|
||||
static void float_to_fixed24_c(int32_t *dst, const float *src, unsigned int len)
|
||||
{
|
||||
const float scale = 1 << 24;
|
||||
@ -376,19 +333,6 @@ void ff_ac3dsp_downmix_fixed(AC3DSPContext *c, int32_t **samples, int16_t **matr
|
||||
ac3_downmix_c_fixed(samples, matrix, out_ch, in_ch, len);
|
||||
}
|
||||
|
||||
static void apply_window_int16_c(int16_t *output, const int16_t *input,
|
||||
const int16_t *window, unsigned int len)
|
||||
{
|
||||
int i;
|
||||
int len2 = len >> 1;
|
||||
|
||||
for (i = 0; i < len2; i++) {
|
||||
int16_t w = window[i];
|
||||
output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
|
||||
output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
|
||||
}
|
||||
}
|
||||
|
||||
void ff_ac3dsp_downmix(AC3DSPContext *c, float **samples, float **matrix,
|
||||
int out_ch, int in_ch, int len)
|
||||
{
|
||||
@ -424,9 +368,6 @@ void ff_ac3dsp_downmix(AC3DSPContext *c, float **samples, float **matrix,
|
||||
av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact)
|
||||
{
|
||||
c->ac3_exponent_min = ac3_exponent_min_c;
|
||||
c->ac3_max_msb_abs_int16 = ac3_max_msb_abs_int16_c;
|
||||
c->ac3_lshift_int16 = ac3_lshift_int16_c;
|
||||
c->ac3_rshift_int32 = ac3_rshift_int32_c;
|
||||
c->float_to_fixed24 = float_to_fixed24_c;
|
||||
c->bit_alloc_calc_bap = ac3_bit_alloc_calc_bap_c;
|
||||
c->update_bap_counts = ac3_update_bap_counts_c;
|
||||
@ -438,7 +379,6 @@ av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact)
|
||||
c->out_channels = 0;
|
||||
c->downmix = NULL;
|
||||
c->downmix_fixed = NULL;
|
||||
c->apply_window_int16 = apply_window_int16_c;
|
||||
|
||||
if (ARCH_ARM)
|
||||
ff_ac3dsp_init_arm(c, bit_exact);
|
||||
|
@ -42,39 +42,6 @@ typedef struct AC3DSPContext {
|
||||
*/
|
||||
void (*ac3_exponent_min)(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
|
||||
|
||||
/**
|
||||
* Calculate the maximum MSB of the absolute value of each element in an
|
||||
* array of int16_t.
|
||||
* @param src input array
|
||||
* constraints: align 16. values must be in range [-32767,32767]
|
||||
* @param len number of values in the array
|
||||
* constraints: multiple of 16 greater than 0
|
||||
* @return a value with the same MSB as max(abs(src[]))
|
||||
*/
|
||||
int (*ac3_max_msb_abs_int16)(const int16_t *src, int len);
|
||||
|
||||
/**
|
||||
* Left-shift each value in an array of int16_t by a specified amount.
|
||||
* @param src input array
|
||||
* constraints: align 16
|
||||
* @param len number of values in the array
|
||||
* constraints: multiple of 32 greater than 0
|
||||
* @param shift left shift amount
|
||||
* constraints: range [0,15]
|
||||
*/
|
||||
void (*ac3_lshift_int16)(int16_t *src, unsigned int len, unsigned int shift);
|
||||
|
||||
/**
|
||||
* Right-shift each value in an array of int32_t by a specified amount.
|
||||
* @param src input array
|
||||
* constraints: align 16
|
||||
* @param len number of values in the array
|
||||
* constraints: multiple of 16 greater than 0
|
||||
* @param shift right shift amount
|
||||
* constraints: range [0,31]
|
||||
*/
|
||||
void (*ac3_rshift_int32)(int32_t *src, unsigned int len, unsigned int shift);
|
||||
|
||||
/**
|
||||
* Convert an array of float in range [-1.0,1.0] to int32_t with range
|
||||
* [-(1<<24),(1<<24)]
|
||||
@ -136,20 +103,6 @@ typedef struct AC3DSPContext {
|
||||
int in_channels;
|
||||
void (*downmix)(float **samples, float **matrix, int len);
|
||||
void (*downmix_fixed)(int32_t **samples, int16_t **matrix, int len);
|
||||
|
||||
/**
|
||||
* Apply symmetric window in 16-bit fixed-point.
|
||||
* @param output destination array
|
||||
* constraints: 16-byte aligned
|
||||
* @param input source array
|
||||
* constraints: 16-byte aligned
|
||||
* @param window window array
|
||||
* constraints: 16-byte aligned, at least len/2 elements
|
||||
* @param len full window length
|
||||
* constraints: multiple of ? greater than zero
|
||||
*/
|
||||
void (*apply_window_int16)(int16_t *output, const int16_t *input,
|
||||
const int16_t *window, unsigned int len);
|
||||
} AC3DSPContext;
|
||||
|
||||
void ff_ac3dsp_init (AC3DSPContext *c, int bit_exact);
|
||||
|
@ -147,44 +147,6 @@ const uint8_t ff_eac3_default_cpl_band_struct[18] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1
|
||||
};
|
||||
|
||||
/* AC-3 MDCT window */
|
||||
|
||||
/* MDCT window */
|
||||
DECLARE_ALIGNED(16, const int16_t, ff_ac3_window)[AC3_WINDOW_SIZE/2] = {
|
||||
4, 7, 12, 16, 21, 28, 34, 42,
|
||||
51, 61, 72, 84, 97, 111, 127, 145,
|
||||
164, 184, 207, 231, 257, 285, 315, 347,
|
||||
382, 419, 458, 500, 544, 591, 641, 694,
|
||||
750, 810, 872, 937, 1007, 1079, 1155, 1235,
|
||||
1318, 1406, 1497, 1593, 1692, 1796, 1903, 2016,
|
||||
2132, 2253, 2379, 2509, 2644, 2783, 2927, 3076,
|
||||
3230, 3389, 3552, 3721, 3894, 4072, 4255, 4444,
|
||||
4637, 4835, 5038, 5246, 5459, 5677, 5899, 6127,
|
||||
6359, 6596, 6837, 7083, 7334, 7589, 7848, 8112,
|
||||
8380, 8652, 8927, 9207, 9491, 9778,10069,10363,
|
||||
10660,10960,11264,11570,11879,12190,12504,12820,
|
||||
13138,13458,13780,14103,14427,14753,15079,15407,
|
||||
15735,16063,16392,16720,17049,17377,17705,18032,
|
||||
18358,18683,19007,19330,19651,19970,20287,20602,
|
||||
20914,21225,21532,21837,22139,22438,22733,23025,
|
||||
23314,23599,23880,24157,24430,24699,24964,25225,
|
||||
25481,25732,25979,26221,26459,26691,26919,27142,
|
||||
27359,27572,27780,27983,28180,28373,28560,28742,
|
||||
28919,29091,29258,29420,29577,29729,29876,30018,
|
||||
30155,30288,30415,30538,30657,30771,30880,30985,
|
||||
31086,31182,31274,31363,31447,31528,31605,31678,
|
||||
31747,31814,31877,31936,31993,32046,32097,32145,
|
||||
32190,32232,32272,32310,32345,32378,32409,32438,
|
||||
32465,32490,32513,32535,32556,32574,32592,32608,
|
||||
32623,32636,32649,32661,32671,32681,32690,32698,
|
||||
32705,32712,32718,32724,32729,32733,32737,32741,
|
||||
32744,32747,32750,32752,32754,32756,32757,32759,
|
||||
32760,32761,32762,32763,32764,32764,32765,32765,
|
||||
32766,32766,32766,32766,32767,32767,32767,32767,
|
||||
32767,32767,32767,32767,32767,32767,32767,32767,
|
||||
32767,32767,32767,32767,32767,32767,32767,32767,
|
||||
};
|
||||
|
||||
const uint8_t ff_ac3_log_add_tab[260]= {
|
||||
0x40,0x3f,0x3e,0x3d,0x3c,0x3b,0x3a,0x39,0x38,0x37,
|
||||
0x36,0x35,0x34,0x34,0x33,0x32,0x31,0x30,0x2f,0x2f,
|
||||
|
@ -37,7 +37,6 @@ extern const int ff_ac3_sample_rate_tab[];
|
||||
extern const uint16_t ff_ac3_bitrate_tab[19];
|
||||
extern const uint8_t ff_ac3_rematrix_band_tab[5];
|
||||
extern const uint8_t ff_eac3_default_cpl_band_struct[18];
|
||||
extern const int16_t ff_ac3_window[AC3_WINDOW_SIZE/2];
|
||||
extern const uint8_t ff_ac3_log_add_tab[260];
|
||||
extern const uint16_t ff_ac3_hearing_threshold_tab[AC3_CRITICAL_BANDS][3];
|
||||
extern const uint8_t ff_ac3_bap_tab[64];
|
||||
|
@ -26,13 +26,8 @@
|
||||
#include "config.h"
|
||||
|
||||
void ff_ac3_exponent_min_neon(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
|
||||
int ff_ac3_max_msb_abs_int16_neon(const int16_t *src, int len);
|
||||
void ff_ac3_lshift_int16_neon(int16_t *src, unsigned len, unsigned shift);
|
||||
void ff_ac3_rshift_int32_neon(int32_t *src, unsigned len, unsigned shift);
|
||||
void ff_float_to_fixed24_neon(int32_t *dst, const float *src, unsigned int len);
|
||||
void ff_ac3_extract_exponents_neon(uint8_t *exp, int32_t *coef, int nb_coefs);
|
||||
void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src,
|
||||
const int16_t *window, unsigned n);
|
||||
void ff_ac3_sum_square_butterfly_int32_neon(int64_t sum[4],
|
||||
const int32_t *coef0,
|
||||
const int32_t *coef1,
|
||||
@ -61,12 +56,8 @@ av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact)
|
||||
|
||||
if (have_neon(cpu_flags)) {
|
||||
c->ac3_exponent_min = ff_ac3_exponent_min_neon;
|
||||
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_neon;
|
||||
c->ac3_lshift_int16 = ff_ac3_lshift_int16_neon;
|
||||
c->ac3_rshift_int32 = ff_ac3_rshift_int32_neon;
|
||||
c->float_to_fixed24 = ff_float_to_fixed24_neon;
|
||||
c->extract_exponents = ff_ac3_extract_exponents_neon;
|
||||
c->apply_window_int16 = ff_apply_window_int16_neon;
|
||||
c->sum_square_butterfly_int32 = ff_ac3_sum_square_butterfly_int32_neon;
|
||||
c->sum_square_butterfly_float = ff_ac3_sum_square_butterfly_float_neon;
|
||||
}
|
||||
|
@ -35,10 +35,6 @@ pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
|
||||
cextern pd_1
|
||||
pd_151: times 4 dd 151
|
||||
|
||||
; used in ff_apply_window_int16()
|
||||
pb_revwords: SHUFFLE_MASK_W 7, 6, 5, 4, 3, 2, 1, 0
|
||||
pd_16384: times 4 dd 16384
|
||||
|
||||
SECTION .text
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
@ -81,133 +77,6 @@ AC3_EXPONENT_MIN
|
||||
%endif
|
||||
%undef LOOP_ALIGN
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; int ff_ac3_max_msb_abs_int16(const int16_t *src, int len)
|
||||
;
|
||||
; This function uses 2 different methods to calculate a valid result.
|
||||
; 1) logical 'or' of abs of each element
|
||||
; This is used for ssse3 because of the pabsw instruction.
|
||||
; It is also used for mmx because of the lack of min/max instructions.
|
||||
; 2) calculate min/max for the array, then or(abs(min),abs(max))
|
||||
; This is used for mmxext and sse2 because they have pminsw/pmaxsw.
|
||||
;-----------------------------------------------------------------------------
|
||||
|
||||
; logical 'or' of 4 or 8 words in an mmx or xmm register into the low word
|
||||
%macro OR_WORDS_HORIZ 2 ; src, tmp
|
||||
%if cpuflag(sse2)
|
||||
movhlps %2, %1
|
||||
por %1, %2
|
||||
pshuflw %2, %1, q0032
|
||||
por %1, %2
|
||||
pshuflw %2, %1, q0001
|
||||
por %1, %2
|
||||
%elif cpuflag(mmxext)
|
||||
pshufw %2, %1, q0032
|
||||
por %1, %2
|
||||
pshufw %2, %1, q0001
|
||||
por %1, %2
|
||||
%else ; mmx
|
||||
movq %2, %1
|
||||
psrlq %2, 32
|
||||
por %1, %2
|
||||
movq %2, %1
|
||||
psrlq %2, 16
|
||||
por %1, %2
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro AC3_MAX_MSB_ABS_INT16 1
|
||||
cglobal ac3_max_msb_abs_int16, 2,2,5, src, len
|
||||
pxor m2, m2
|
||||
pxor m3, m3
|
||||
.loop:
|
||||
%ifidn %1, min_max
|
||||
mova m0, [srcq]
|
||||
mova m1, [srcq+mmsize]
|
||||
pminsw m2, m0
|
||||
pminsw m2, m1
|
||||
pmaxsw m3, m0
|
||||
pmaxsw m3, m1
|
||||
%else ; or_abs
|
||||
%if notcpuflag(ssse3)
|
||||
mova m0, [srcq]
|
||||
mova m1, [srcq+mmsize]
|
||||
ABS2 m0, m1, m3, m4
|
||||
%else ; ssse3
|
||||
; using memory args is faster for ssse3
|
||||
pabsw m0, [srcq]
|
||||
pabsw m1, [srcq+mmsize]
|
||||
%endif
|
||||
por m2, m0
|
||||
por m2, m1
|
||||
%endif
|
||||
add srcq, mmsize*2
|
||||
sub lend, mmsize
|
||||
ja .loop
|
||||
%ifidn %1, min_max
|
||||
ABS2 m2, m3, m0, m1
|
||||
por m2, m3
|
||||
%endif
|
||||
OR_WORDS_HORIZ m2, m0
|
||||
movd eax, m2
|
||||
and eax, 0xFFFF
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX mmx
|
||||
AC3_MAX_MSB_ABS_INT16 or_abs
|
||||
INIT_MMX mmxext
|
||||
AC3_MAX_MSB_ABS_INT16 min_max
|
||||
INIT_XMM sse2
|
||||
AC3_MAX_MSB_ABS_INT16 min_max
|
||||
INIT_XMM ssse3
|
||||
AC3_MAX_MSB_ABS_INT16 or_abs
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; macro used for ff_ac3_lshift_int16() and ff_ac3_rshift_int32()
|
||||
;-----------------------------------------------------------------------------
|
||||
|
||||
%macro AC3_SHIFT 3 ; l/r, 16/32, shift instruction, instruction set
|
||||
cglobal ac3_%1shift_int%2, 3, 3, 5, src, len, shift
|
||||
movd m0, shiftd
|
||||
.loop:
|
||||
mova m1, [srcq ]
|
||||
mova m2, [srcq+mmsize ]
|
||||
mova m3, [srcq+mmsize*2]
|
||||
mova m4, [srcq+mmsize*3]
|
||||
%3 m1, m0
|
||||
%3 m2, m0
|
||||
%3 m3, m0
|
||||
%3 m4, m0
|
||||
mova [srcq ], m1
|
||||
mova [srcq+mmsize ], m2
|
||||
mova [srcq+mmsize*2], m3
|
||||
mova [srcq+mmsize*3], m4
|
||||
add srcq, mmsize*4
|
||||
sub lend, mmsize*32/%2
|
||||
ja .loop
|
||||
.end:
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void ff_ac3_lshift_int16(int16_t *src, unsigned int len, unsigned int shift)
|
||||
;-----------------------------------------------------------------------------
|
||||
|
||||
INIT_MMX mmx
|
||||
AC3_SHIFT l, 16, psllw
|
||||
INIT_XMM sse2
|
||||
AC3_SHIFT l, 16, psllw
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void ff_ac3_rshift_int32(int32_t *src, unsigned int len, unsigned int shift)
|
||||
;-----------------------------------------------------------------------------
|
||||
|
||||
INIT_MMX mmx
|
||||
AC3_SHIFT r, 32, psrad
|
||||
INIT_XMM sse2
|
||||
AC3_SHIFT r, 32, psrad
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void ff_float_to_fixed24(int32_t *dst, const float *src, unsigned int len)
|
||||
;-----------------------------------------------------------------------------
|
||||
@ -423,130 +292,3 @@ AC3_EXTRACT_EXPONENTS
|
||||
INIT_XMM ssse3
|
||||
AC3_EXTRACT_EXPONENTS
|
||||
%endif
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void ff_apply_window_int16(int16_t *output, const int16_t *input,
|
||||
; const int16_t *window, unsigned int len)
|
||||
;-----------------------------------------------------------------------------
|
||||
|
||||
%macro REVERSE_WORDS 1-2
|
||||
%if cpuflag(ssse3) && notcpuflag(atom)
|
||||
pshufb %1, %2
|
||||
%elif cpuflag(sse2)
|
||||
pshuflw %1, %1, 0x1B
|
||||
pshufhw %1, %1, 0x1B
|
||||
pshufd %1, %1, 0x4E
|
||||
%elif cpuflag(mmxext)
|
||||
pshufw %1, %1, 0x1B
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro MUL16FIXED 3
|
||||
%if cpuflag(ssse3) ; dst, src, unused
|
||||
; dst = ((dst * src) + (1<<14)) >> 15
|
||||
pmulhrsw %1, %2
|
||||
%elif cpuflag(mmxext) ; dst, src, temp
|
||||
; dst = (dst * src) >> 15
|
||||
; pmulhw cuts off the bottom bit, so we have to lshift by 1 and add it back
|
||||
; in from the pmullw result.
|
||||
mova %3, %1
|
||||
pmulhw %1, %2
|
||||
pmullw %3, %2
|
||||
psrlw %3, 15
|
||||
psllw %1, 1
|
||||
por %1, %3
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro APPLY_WINDOW_INT16 1 ; %1 bitexact version
|
||||
%if %1
|
||||
cglobal apply_window_int16, 4,5,6, output, input, window, offset, offset2
|
||||
%else
|
||||
cglobal apply_window_int16_round, 4,5,6, output, input, window, offset, offset2
|
||||
%endif
|
||||
lea offset2q, [offsetq-mmsize]
|
||||
%if cpuflag(ssse3) && notcpuflag(atom)
|
||||
mova m5, [pb_revwords]
|
||||
ALIGN 16
|
||||
%elif %1
|
||||
mova m5, [pd_16384]
|
||||
%endif
|
||||
.loop:
|
||||
%if cpuflag(ssse3)
|
||||
; This version does the 16x16->16 multiplication in-place without expanding
|
||||
; to 32-bit. The ssse3 version is bit-identical.
|
||||
mova m0, [windowq+offset2q]
|
||||
mova m1, [ inputq+offset2q]
|
||||
pmulhrsw m1, m0
|
||||
REVERSE_WORDS m0, m5
|
||||
pmulhrsw m0, [ inputq+offsetq ]
|
||||
mova [outputq+offset2q], m1
|
||||
mova [outputq+offsetq ], m0
|
||||
%elif %1
|
||||
; This version expands 16-bit to 32-bit, multiplies by the window,
|
||||
; adds 16384 for rounding, right shifts 15, then repacks back to words to
|
||||
; save to the output. The window is reversed for the second half.
|
||||
mova m3, [windowq+offset2q]
|
||||
mova m4, [ inputq+offset2q]
|
||||
pxor m0, m0
|
||||
punpcklwd m0, m3
|
||||
punpcklwd m1, m4
|
||||
pmaddwd m0, m1
|
||||
paddd m0, m5
|
||||
psrad m0, 15
|
||||
pxor m2, m2
|
||||
punpckhwd m2, m3
|
||||
punpckhwd m1, m4
|
||||
pmaddwd m2, m1
|
||||
paddd m2, m5
|
||||
psrad m2, 15
|
||||
packssdw m0, m2
|
||||
mova [outputq+offset2q], m0
|
||||
REVERSE_WORDS m3
|
||||
mova m4, [ inputq+offsetq]
|
||||
pxor m0, m0
|
||||
punpcklwd m0, m3
|
||||
punpcklwd m1, m4
|
||||
pmaddwd m0, m1
|
||||
paddd m0, m5
|
||||
psrad m0, 15
|
||||
pxor m2, m2
|
||||
punpckhwd m2, m3
|
||||
punpckhwd m1, m4
|
||||
pmaddwd m2, m1
|
||||
paddd m2, m5
|
||||
psrad m2, 15
|
||||
packssdw m0, m2
|
||||
mova [outputq+offsetq], m0
|
||||
%else
|
||||
; This version does the 16x16->16 multiplication in-place without expanding
|
||||
; to 32-bit. The mmxext and sse2 versions do not use rounding, and
|
||||
; therefore are not bit-identical to the C version.
|
||||
mova m0, [windowq+offset2q]
|
||||
mova m1, [ inputq+offset2q]
|
||||
mova m2, [ inputq+offsetq ]
|
||||
MUL16FIXED m1, m0, m3
|
||||
REVERSE_WORDS m0
|
||||
MUL16FIXED m2, m0, m3
|
||||
mova [outputq+offset2q], m1
|
||||
mova [outputq+offsetq ], m2
|
||||
%endif
|
||||
add offsetd, mmsize
|
||||
sub offset2d, mmsize
|
||||
jae .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX mmxext
|
||||
APPLY_WINDOW_INT16 0
|
||||
INIT_XMM sse2
|
||||
APPLY_WINDOW_INT16 0
|
||||
|
||||
INIT_MMX mmxext
|
||||
APPLY_WINDOW_INT16 1
|
||||
INIT_XMM sse2
|
||||
APPLY_WINDOW_INT16 1
|
||||
INIT_XMM ssse3
|
||||
APPLY_WINDOW_INT16 1
|
||||
INIT_XMM ssse3, atom
|
||||
APPLY_WINDOW_INT16 1
|
||||
|
@ -30,17 +30,6 @@ void ff_ac3_exponent_min_mmx (uint8_t *exp, int num_reuse_blocks, int nb_coefs
|
||||
void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
|
||||
void ff_ac3_exponent_min_sse2 (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
|
||||
|
||||
int ff_ac3_max_msb_abs_int16_mmx (const int16_t *src, int len);
|
||||
int ff_ac3_max_msb_abs_int16_mmxext(const int16_t *src, int len);
|
||||
int ff_ac3_max_msb_abs_int16_sse2 (const int16_t *src, int len);
|
||||
int ff_ac3_max_msb_abs_int16_ssse3(const int16_t *src, int len);
|
||||
|
||||
void ff_ac3_lshift_int16_mmx (int16_t *src, unsigned int len, unsigned int shift);
|
||||
void ff_ac3_lshift_int16_sse2(int16_t *src, unsigned int len, unsigned int shift);
|
||||
|
||||
void ff_ac3_rshift_int32_mmx (int32_t *src, unsigned int len, unsigned int shift);
|
||||
void ff_ac3_rshift_int32_sse2(int32_t *src, unsigned int len, unsigned int shift);
|
||||
|
||||
void ff_float_to_fixed24_3dnow(int32_t *dst, const float *src, unsigned int len);
|
||||
void ff_float_to_fixed24_sse (int32_t *dst, const float *src, unsigned int len);
|
||||
void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned int len);
|
||||
@ -50,28 +39,12 @@ int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]);
|
||||
void ff_ac3_extract_exponents_sse2 (uint8_t *exp, int32_t *coef, int nb_coefs);
|
||||
void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_coefs);
|
||||
|
||||
void ff_apply_window_int16_round_mmxext(int16_t *output, const int16_t *input,
|
||||
const int16_t *window, unsigned int len);
|
||||
void ff_apply_window_int16_round_sse2(int16_t *output, const int16_t *input,
|
||||
const int16_t *window, unsigned int len);
|
||||
void ff_apply_window_int16_mmxext(int16_t *output, const int16_t *input,
|
||||
const int16_t *window, unsigned int len);
|
||||
void ff_apply_window_int16_sse2(int16_t *output, const int16_t *input,
|
||||
const int16_t *window, unsigned int len);
|
||||
void ff_apply_window_int16_ssse3(int16_t *output, const int16_t *input,
|
||||
const int16_t *window, unsigned int len);
|
||||
void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input,
|
||||
const int16_t *window, unsigned int len);
|
||||
|
||||
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (EXTERNAL_MMX(cpu_flags)) {
|
||||
c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
|
||||
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
|
||||
c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx;
|
||||
c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
|
||||
}
|
||||
if (EXTERNAL_AMD3DNOW(cpu_flags)) {
|
||||
if (!bit_exact) {
|
||||
@ -80,43 +53,20 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
|
||||
}
|
||||
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||
c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
|
||||
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext;
|
||||
if (bit_exact) {
|
||||
c->apply_window_int16 = ff_apply_window_int16_mmxext;
|
||||
} else {
|
||||
c->apply_window_int16 = ff_apply_window_int16_round_mmxext;
|
||||
}
|
||||
}
|
||||
if (EXTERNAL_SSE(cpu_flags)) {
|
||||
c->float_to_fixed24 = ff_float_to_fixed24_sse;
|
||||
}
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
|
||||
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
|
||||
c->float_to_fixed24 = ff_float_to_fixed24_sse2;
|
||||
c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
|
||||
c->extract_exponents = ff_ac3_extract_exponents_sse2;
|
||||
if (bit_exact) {
|
||||
c->apply_window_int16 = ff_apply_window_int16_sse2;
|
||||
}
|
||||
}
|
||||
|
||||
if (EXTERNAL_SSE2_FAST(cpu_flags)) {
|
||||
c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
|
||||
c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
|
||||
if (!bit_exact) {
|
||||
c->apply_window_int16 = ff_apply_window_int16_round_sse2;
|
||||
}
|
||||
}
|
||||
|
||||
if (EXTERNAL_SSSE3(cpu_flags)) {
|
||||
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
|
||||
if (cpu_flags & AV_CPU_FLAG_ATOM) {
|
||||
c->apply_window_int16 = ff_apply_window_int16_ssse3_atom;
|
||||
} else {
|
||||
if (!(cpu_flags & AV_CPU_FLAG_ATOM))
|
||||
c->extract_exponents = ff_ac3_extract_exponents_ssse3;
|
||||
c->apply_window_int16 = ff_apply_window_int16_ssse3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user