mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
Merge commit '12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5'
* commit '12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5':
audiodsp/x86: yasmify vector_clipf_sse
audiodsp: reorder arguments for vector_clipf
Merged the version from Libav after a discussion with James Almer on
IRC:
19:22 <ubitux> jamrial: opinion on 12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5?
19:23 <ubitux> it was apparently yasmified differently
19:23 <ubitux> (it depends on the previous commit arg shuffle)
19:24 <ubitux> i don't see the magic movsxdifnidn in your port btw
19:24 <ubitux> it's a port from 1d36defe94
19:25 <jamrial> seems better thanks to said arg shuffle
19:25 <jamrial> the loop is the same, but init is simpler
19:25 <jamrial> probably worth merging
19:25 <ubitux> OK
19:25 <ubitux> thanks
19:26 <jamrial> curious they didn't make len ptrdiff_t after the previous bunch of commits, heh
19:26 <ubitux> yeah indeed
Both commits are merged at the same time to prevent a conflict with our
existing yasmified ff_vector_clipf_sse.
Merged-by: Clément Bœsch <u@pkh.me>
This commit is contained in:
commit
83cd80d10a
@ -121,7 +121,7 @@ static void sum_square_butterfly(AC3EncodeContext *s, float sum[4],
|
||||
static void clip_coefficients(AudioDSPContext *adsp, float *coef,
|
||||
unsigned int len)
|
||||
{
|
||||
adsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len);
|
||||
adsp->vector_clipf(coef, coef, len, COEF_MIN, COEF_MAX);
|
||||
}
|
||||
|
||||
|
||||
|
@ -25,8 +25,7 @@
|
||||
#include "libavcodec/audiodsp.h"
|
||||
#include "audiodsp_arm.h"
|
||||
|
||||
void ff_vector_clipf_neon(float *dst, const float *src, float min, float max,
|
||||
int len);
|
||||
void ff_vector_clipf_neon(float *dst, const float *src, int len, float min, float max);
|
||||
void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min,
|
||||
int32_t max, unsigned int len);
|
||||
|
||||
|
@ -24,9 +24,8 @@
|
||||
function ff_vector_clipf_neon, export=1
|
||||
VFP vdup.32 q1, d0[1]
|
||||
VFP vdup.32 q0, d0[0]
|
||||
NOVFP vdup.32 q0, r2
|
||||
NOVFP vdup.32 q1, r3
|
||||
NOVFP ldr r2, [sp]
|
||||
NOVFP vdup.32 q0, r3
|
||||
NOVFP vld1.32 {d2[],d3[]}, [sp]
|
||||
vld1.f32 {q2},[r1,:128]!
|
||||
vmin.f32 q10, q2, q1
|
||||
vld1.f32 {q3},[r1,:128]!
|
||||
|
@ -55,8 +55,8 @@ static void vector_clipf_c_opposite_sign(float *dst, const float *src,
|
||||
}
|
||||
}
|
||||
|
||||
static void vector_clipf_c(float *dst, const float *src,
|
||||
float min, float max, int len)
|
||||
static void vector_clipf_c(float *dst, const float *src, int len,
|
||||
float min, float max)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
@ -48,7 +48,8 @@ typedef struct AudioDSPContext {
|
||||
/* assume len is a multiple of 16, and arrays are 16-byte aligned */
|
||||
void (*vector_clipf)(float *dst /* align 16 */,
|
||||
const float *src /* align 16 */,
|
||||
float min, float max, int len /* align 16 */);
|
||||
int len /* align 16 */,
|
||||
float min, float max);
|
||||
} AudioDSPContext;
|
||||
|
||||
void ff_audiodsp_init(AudioDSPContext *c);
|
||||
|
@ -882,7 +882,7 @@ static inline void decode_bytes_and_gain(COOKContext *q, COOKSubpacket *p,
|
||||
static void saturate_output_float(COOKContext *q, float *out)
|
||||
{
|
||||
q->adsp.vector_clipf(out, q->mono_mdct_output + q->samples_per_channel,
|
||||
-1.0f, 1.0f, FFALIGN(q->samples_per_channel, 8));
|
||||
FFALIGN(q->samples_per_channel, 8), -1.0f, 1.0f);
|
||||
}
|
||||
|
||||
|
||||
|
@ -132,46 +132,45 @@ VECTOR_CLIP_INT32 11, 1, 1, 0
|
||||
VECTOR_CLIP_INT32 6, 1, 0, 0
|
||||
%endif
|
||||
|
||||
;-----------------------------------------------------
|
||||
;void ff_vector_clipf(float *dst, const float *src,
|
||||
; float min, float max, int len)
|
||||
;-----------------------------------------------------
|
||||
; void ff_vector_clipf_sse(float *dst, const float *src,
|
||||
; int len, float min, float max)
|
||||
INIT_XMM sse
|
||||
%if UNIX64
|
||||
cglobal vector_clipf, 3,3,6, dst, src, len
|
||||
%else
|
||||
cglobal vector_clipf, 5,5,6, dst, src, min, max, len
|
||||
cglobal vector_clipf, 3, 3, 6, dst, src, len, min, max
|
||||
%if ARCH_X86_32
|
||||
VBROADCASTSS m0, minm
|
||||
VBROADCASTSS m1, maxm
|
||||
%elif WIN64
|
||||
VBROADCASTSS m0, m3
|
||||
VBROADCASTSS m1, maxm
|
||||
%else ; 64bit sysv
|
||||
VBROADCASTSS m0, m0
|
||||
VBROADCASTSS m1, m1
|
||||
%endif
|
||||
%if WIN64
|
||||
SWAP 0, 2
|
||||
SWAP 1, 3
|
||||
%elif ARCH_X86_32
|
||||
movss m0, minm
|
||||
movss m1, maxm
|
||||
%endif
|
||||
SPLATD m0
|
||||
SPLATD m1
|
||||
shl lend, 2
|
||||
add srcq, lenq
|
||||
add dstq, lenq
|
||||
neg lenq
|
||||
.loop:
|
||||
mova m2, [srcq+lenq+mmsize*0]
|
||||
mova m3, [srcq+lenq+mmsize*1]
|
||||
mova m4, [srcq+lenq+mmsize*2]
|
||||
mova m5, [srcq+lenq+mmsize*3]
|
||||
maxps m2, m0
|
||||
maxps m3, m0
|
||||
maxps m4, m0
|
||||
maxps m5, m0
|
||||
minps m2, m1
|
||||
minps m3, m1
|
||||
minps m4, m1
|
||||
minps m5, m1
|
||||
mova [dstq+lenq+mmsize*0], m2
|
||||
mova [dstq+lenq+mmsize*1], m3
|
||||
mova [dstq+lenq+mmsize*2], m4
|
||||
mova [dstq+lenq+mmsize*3], m5
|
||||
add lenq, mmsize*4
|
||||
jl .loop
|
||||
REP_RET
|
||||
|
||||
movsxdifnidn lenq, lend
|
||||
|
||||
.loop
|
||||
mova m2, [srcq + 4 * lenq - 4 * mmsize]
|
||||
mova m3, [srcq + 4 * lenq - 3 * mmsize]
|
||||
mova m4, [srcq + 4 * lenq - 2 * mmsize]
|
||||
mova m5, [srcq + 4 * lenq - 1 * mmsize]
|
||||
|
||||
maxps m2, m0
|
||||
maxps m3, m0
|
||||
maxps m4, m0
|
||||
maxps m5, m0
|
||||
|
||||
minps m2, m1
|
||||
minps m3, m1
|
||||
minps m4, m1
|
||||
minps m5, m1
|
||||
|
||||
mova [dstq + 4 * lenq - 4 * mmsize], m2
|
||||
mova [dstq + 4 * lenq - 3 * mmsize], m3
|
||||
mova [dstq + 4 * lenq - 2 * mmsize], m4
|
||||
mova [dstq + 4 * lenq - 1 * mmsize], m5
|
||||
|
||||
sub lenq, mmsize
|
||||
jg .loop
|
||||
|
||||
RET
|
||||
|
@ -38,7 +38,7 @@ void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src,
|
||||
void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src,
|
||||
int32_t min, int32_t max, unsigned int len);
|
||||
void ff_vector_clipf_sse(float *dst, const float *src,
|
||||
float min, float max, int len);
|
||||
int len, float min, float max);
|
||||
|
||||
av_cold void ff_audiodsp_init_x86(AudioDSPContext *c)
|
||||
{
|
||||
|
@ -120,7 +120,7 @@ void checkasm_check_audiodsp(void)
|
||||
int i, len;
|
||||
|
||||
declare_func_emms(AV_CPU_FLAG_MMX, void, float *dst, const float *src,
|
||||
float min, float max, unsigned int len);
|
||||
int len, float min, float max);
|
||||
|
||||
val1 = (float)rnd() / (UINT_MAX >> 1) - 1.0f;
|
||||
val2 = (float)rnd() / (UINT_MAX >> 1) - 1.0f;
|
||||
@ -133,13 +133,13 @@ void checkasm_check_audiodsp(void)
|
||||
len = rnd() % 128;
|
||||
len = 16 * FFMAX(len, 1);
|
||||
|
||||
call_ref(dst0, src, min, max, len);
|
||||
call_new(dst1, src, min, max, len);
|
||||
call_ref(dst0, src, len, min, max);
|
||||
call_new(dst1, src, len, min, max);
|
||||
for (i = 0; i < len; i++) {
|
||||
if (!float_near_ulp_array(dst0, dst1, 3, len))
|
||||
fail();
|
||||
}
|
||||
bench_new(dst1, src, min, max, MAX_SIZE);
|
||||
bench_new(dst1, src, MAX_SIZE, min, max);
|
||||
}
|
||||
|
||||
report("audiodsp");
|
||||
|
Loading…
Reference in New Issue
Block a user