mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-26 19:01:44 +02:00
x86/aacencdsp: add AVX version of quantize_bands
quant_bands_signed_c: 1928.0 quant_bands_signed_sse2: 406.0 quant_bands_signed_avx: 207.0 quant_bands_unsigned_c: 1702.0 quant_bands_unsigned_sse2: 404.0 quant_bands_unsigned_avx: 209.0 Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
parent
7a3369398f
commit
91b9af0058
@ -242,7 +242,7 @@ typedef struct AACEncContext {
|
||||
enum RawDataBlockType cur_type; ///< channel group type cur_channel belongs to
|
||||
|
||||
AudioFrameQueue afq;
|
||||
DECLARE_ALIGNED(16, int, qcoefs)[96]; ///< quantized coefficients
|
||||
DECLARE_ALIGNED(32, int, qcoefs)[96]; ///< quantized coefficients
|
||||
DECLARE_ALIGNED(32, float, scoefs)[1024]; ///< scaled coefficients
|
||||
|
||||
uint16_t quantize_band_cost_cache_generation;
|
||||
|
@ -53,8 +53,19 @@ cglobal abs_pow34, 3, 3, 3, out, in, size
|
||||
; int size, int is_signed, int maxval, const float Q34,
|
||||
; const float rounding)
|
||||
;*******************************************************************
|
||||
INIT_XMM sse2
|
||||
%macro AAC_QUANTIZE_BANDS 0
|
||||
cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q34, rounding
|
||||
%if mmsize == 32
|
||||
vbroadcastss m0, Q34m
|
||||
vbroadcastss m1, roundingm
|
||||
%if UNIX64 == 0
|
||||
cvtsi2ss xm3, dword maxvalm
|
||||
%else
|
||||
cvtsi2ss xm3, maxvald
|
||||
%endif
|
||||
shufps xm3, xm3, xm3, 0
|
||||
vinsertf128 m3, m3, xm3, 1
|
||||
%else ; mmsize == 16
|
||||
%if UNIX64 == 0
|
||||
movss m0, Q34m
|
||||
movss m1, roundingm
|
||||
@ -65,9 +76,13 @@ cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q
|
||||
shufps m0, m0, 0
|
||||
shufps m1, m1, 0
|
||||
shufps m3, m3, 0
|
||||
%endif
|
||||
shl is_signedd, 31
|
||||
movd m4, is_signedd
|
||||
shufps m4, m4, 0
|
||||
movd xm4, is_signedd
|
||||
shufps xm4, xm4, xm4, 0
|
||||
%if mmsize == 32
|
||||
vinsertf128 m4, m4, xm4, 1
|
||||
%endif
|
||||
shl sized, 2
|
||||
add inq, sizeq
|
||||
add outq, sizeq
|
||||
@ -84,3 +99,9 @@ cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q
|
||||
add sizeq, mmsize
|
||||
jl .loop
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse2
|
||||
AAC_QUANTIZE_BANDS
|
||||
INIT_YMM avx
|
||||
AAC_QUANTIZE_BANDS
|
||||
|
@ -30,6 +30,9 @@ void ff_abs_pow34_sse(float *out, const float *in, const int size);
|
||||
void ff_aac_quantize_bands_sse2(int *out, const float *in, const float *scaled,
|
||||
int size, int is_signed, int maxval, const float Q34,
|
||||
const float rounding);
|
||||
void ff_aac_quantize_bands_avx(int *out, const float *in, const float *scaled,
|
||||
int size, int is_signed, int maxval, const float Q34,
|
||||
const float rounding);
|
||||
|
||||
av_cold void ff_aacenc_dsp_init_x86(AACEncDSPContext *s)
|
||||
{
|
||||
@ -40,4 +43,7 @@ av_cold void ff_aacenc_dsp_init_x86(AACEncDSPContext *s)
|
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags))
|
||||
s->quant_bands = ff_aac_quantize_bands_sse2;
|
||||
|
||||
if (EXTERNAL_AVX_FAST(cpu_flags))
|
||||
s->quant_bands = ff_aac_quantize_bands_avx;
|
||||
}
|
||||
|
@ -81,8 +81,8 @@ static void test_quant_bands(AACEncDSPContext *s)
|
||||
for (int sign = 0; sign <= 1; sign++) {
|
||||
if (check_func(s->quant_bands, "quant_bands_%s",
|
||||
sign ? "signed" : "unsigned")) {
|
||||
LOCAL_ALIGNED_16(int, out, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_16(int, out2, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_32(int, out, [BUF_SIZE]);
|
||||
LOCAL_ALIGNED_32(int, out2, [BUF_SIZE]);
|
||||
|
||||
call_ref(out, in, scaled, BUF_SIZE, sign, maxval, q34, rounding);
|
||||
call_new(out2, in, scaled, BUF_SIZE, sign, maxval, q34, rounding);
|
||||
|
Loading…
Reference in New Issue
Block a user