1
0
mirror of https://github.com/FFmpeg/FFmpeg.git synced 2025-01-08 13:22:53 +02:00

mips: port optimizations to mips n64

This mainly consists of replacing all the pointer arithmatic 'addiu'
instructions with PTR_ADDIU which will handle the differences in pointer
sizes when compiled on 64 bit mips systems.

The header asmdefs.h contains the PTR_ macros which expend to the correct mips
instructions to manipulate registers containing pointers.

Signed-off-by: James Cowgill <james410@cowgill.org.uk>
Reviewed-by: Nedeljko Babic <Nedeljko.Babic@imgtec.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
James Cowgill 2015-03-05 17:40:15 +00:00 committed by Michael Niedermayer
parent eae13eae9d
commit 157d6f0d5b
20 changed files with 247 additions and 178 deletions

View File

@ -56,6 +56,7 @@
#include "aacdec_mips.h"
#include "libavcodec/aactab.h"
#include "libavcodec/sinewin.h"
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM
static av_always_inline void float_copy(float *dst, const float *src, int count)
@ -80,7 +81,7 @@ static av_always_inline void float_copy(float *dst, const float *src, int count)
"lw %[temp5], 20(%[src]) \n\t"
"lw %[temp6], 24(%[src]) \n\t"
"lw %[temp7], 28(%[src]) \n\t"
"addiu %[src], %[src], 32 \n\t"
PTR_ADDIU "%[src], %[src], 32 \n\t"
"sw %[temp0], 0(%[dst]) \n\t"
"sw %[temp1], 4(%[dst]) \n\t"
"sw %[temp2], 8(%[dst]) \n\t"
@ -90,7 +91,7 @@ static av_always_inline void float_copy(float *dst, const float *src, int count)
"sw %[temp6], 24(%[dst]) \n\t"
"sw %[temp7], 28(%[dst]) \n\t"
"bne %[src], %[loop_end], 1b \n\t"
"addiu %[dst], %[dst], 32 \n\t"
PTR_ADDIU "%[dst], %[dst], 32 \n\t"
".set pop \n\t"
: [temp0]"=&r"(temp[0]), [temp1]"=&r"(temp[1]),
@ -250,7 +251,7 @@ static void apply_ltp_mips(AACContext *ac, SingleChannelElement *sce)
"sw $0, 4(%[p_predTime]) \n\t"
"sw $0, 8(%[p_predTime]) \n\t"
"sw $0, 12(%[p_predTime]) \n\t"
"addiu %[p_predTime], %[p_predTime], 16 \n\t"
PTR_ADDIU "%[p_predTime], %[p_predTime], 16 \n\t"
: [p_predTime]"+r"(p_predTime)
:
@ -261,7 +262,7 @@ static void apply_ltp_mips(AACContext *ac, SingleChannelElement *sce)
__asm__ volatile (
"sw $0, 0(%[p_predTime]) \n\t"
"addiu %[p_predTime], %[p_predTime], 4 \n\t"
PTR_ADDIU "%[p_predTime], %[p_predTime], 4 \n\t"
: [p_predTime]"+r"(p_predTime)
:
@ -315,9 +316,9 @@ static av_always_inline void fmul_and_reverse(float *dst, const float *src0, con
"swc1 %[temp9], 4(%[ptr1]) \n\t"
"swc1 %[temp10], 8(%[ptr1]) \n\t"
"swc1 %[temp11], 12(%[ptr1]) \n\t"
"addiu %[ptr1], %[ptr1], 16 \n\t"
"addiu %[ptr2], %[ptr2], -16 \n\t"
"addiu %[ptr3], %[ptr3], -16 \n\t"
PTR_ADDIU "%[ptr1], %[ptr1], 16 \n\t"
PTR_ADDIU "%[ptr2], %[ptr2], -16 \n\t"
PTR_ADDIU "%[ptr3], %[ptr3], -16 \n\t"
: [temp0]"=&f"(temp[0]), [temp1]"=&f"(temp[1]),
[temp2]"=&f"(temp[2]), [temp3]"=&f"(temp[3]),
@ -358,7 +359,7 @@ static void update_ltp_mips(AACContext *ac, SingleChannelElement *sce)
"sw $0, 20(%[p_saved_ltp]) \n\t"
"sw $0, 24(%[p_saved_ltp]) \n\t"
"sw $0, 28(%[p_saved_ltp]) \n\t"
"addiu %[p_saved_ltp], %[p_saved_ltp], 32 \n\t"
PTR_ADDIU "%[p_saved_ltp],%[p_saved_ltp], 32 \n\t"
"bne %[p_saved_ltp], %[loop_end1], 1b \n\t"
: [p_saved_ltp]"+r"(p_saved_ltp)
@ -386,7 +387,7 @@ static void update_ltp_mips(AACContext *ac, SingleChannelElement *sce)
"lw %[temp5], 20(%[src]) \n\t"
"lw %[temp6], 24(%[src]) \n\t"
"lw %[temp7], 28(%[src]) \n\t"
"addiu %[src], %[src], 32 \n\t"
PTR_ADDIU "%[src], %[src], 32 \n\t"
"sw %[temp0], 0(%[dst]) \n\t"
"sw %[temp1], 4(%[dst]) \n\t"
"sw %[temp2], 8(%[dst]) \n\t"
@ -404,7 +405,7 @@ static void update_ltp_mips(AACContext *ac, SingleChannelElement *sce)
"sw $0, 2328(%[dst]) \n\t"
"sw $0, 2332(%[dst]) \n\t"
"bne %[src], %[loop_end], 1b \n\t"
" addiu %[dst], %[dst], 32 \n\t"
PTR_ADDIU "%[dst], %[dst], 32 \n\t"
".set pop \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),

View File

@ -58,6 +58,7 @@
#define AVCODEC_MIPS_AACDEC_FLOAT_H
#include "libavcodec/aac.h"
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM && HAVE_MIPSFPU
static inline float *VMUL2_mips(float *dst, const float *v, unsigned idx,
@ -77,7 +78,7 @@ static inline float *VMUL2_mips(float *dst, const float *v, unsigned idx,
"lwxc1 %[temp1], %[temp4](%[v]) \n\t"
"mul.s %[temp0], %[temp0], %[temp2] \n\t"
"mul.s %[temp1], %[temp1], %[temp2] \n\t"
"addiu %[ret], %[dst], 8 \n\t"
PTR_ADDIU "%[ret], %[dst], 8 \n\t"
"swc1 %[temp0], 0(%[dst]) \n\t"
"swc1 %[temp1], 4(%[dst]) \n\t"
@ -115,7 +116,7 @@ static inline float *VMUL4_mips(float *dst, const float *v, unsigned idx,
"mul.s %[temp6], %[temp6], %[temp4] \n\t"
"mul.s %[temp7], %[temp7], %[temp4] \n\t"
"mul.s %[temp8], %[temp8], %[temp4] \n\t"
"addiu %[ret], %[dst], 16 \n\t"
PTR_ADDIU "%[ret], %[dst], 16 \n\t"
"swc1 %[temp5], 0(%[dst]) \n\t"
"swc1 %[temp6], 4(%[dst]) \n\t"
"swc1 %[temp7], 8(%[dst]) \n\t"
@ -157,7 +158,7 @@ static inline float *VMUL2S_mips(float *dst, const float *v, unsigned idx,
"mtc1 %[temp4], %[temp7] \n\t"
"mul.s %[temp8], %[temp8], %[temp6] \n\t"
"mul.s %[temp9], %[temp9], %[temp7] \n\t"
"addiu %[ret], %[dst], 8 \n\t"
PTR_ADDIU "%[ret], %[dst], 8 \n\t"
"swc1 %[temp8], 0(%[dst]) \n\t"
"swc1 %[temp9], 4(%[dst]) \n\t"
@ -220,7 +221,7 @@ static inline float *VMUL4S_mips(float *dst, const float *v, unsigned idx,
"mul.s %[temp11], %[temp11], %[temp15] \n\t"
"mul.s %[temp12], %[temp12], %[temp16] \n\t"
"mul.s %[temp13], %[temp13], %[temp17] \n\t"
"addiu %[ret], %[dst], 16 \n\t"
PTR_ADDIU "%[ret], %[dst], 16 \n\t"
"swc1 %[temp10], 0(%[dst]) \n\t"
"swc1 %[temp11], 4(%[dst]) \n\t"
"swc1 %[temp12], 8(%[dst]) \n\t"

View File

@ -54,6 +54,7 @@
#include "config.h"
#include "libavcodec/aacpsdsp.h"
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM
static void ps_hybrid_analysis_ileave_mips(float (*out)[32][2], float L[2][38][64],
@ -86,8 +87,8 @@ static void ps_hybrid_analysis_ileave_mips(float (*out)[32][2], float L[2][38][6
"sw %[temp5], 20(%[out1]) \n\t"
"sw %[temp6], 24(%[out1]) \n\t"
"sw %[temp7], 28(%[out1]) \n\t"
"addiu %[out1], %[out1], 32 \n\t"
"addiu %[L1], %[L1], 1024 \n\t"
PTR_ADDIU "%[out1], %[out1], 32 \n\t"
PTR_ADDIU "%[L1], %[L1], 1024 \n\t"
"bne %[out1], %[j], 1b \n\t"
: [out1]"+r"(out1), [L1]"+r"(L1), [j]"+r"(j),
@ -128,10 +129,10 @@ static void ps_hybrid_synthesis_deint_mips(float out[2][38][64],
"lw %[temp5], 16(%[in2]) \n\t"
"lw %[temp6], 24(%[in1]) \n\t"
"lw %[temp7], 24(%[in2]) \n\t"
"addiu %[out1], %[out1], 1024 \n\t"
"addiu %[out2], %[out2], 1024 \n\t"
"addiu %[in1], %[in1], 32 \n\t"
"addiu %[in2], %[in2], 32 \n\t"
PTR_ADDIU "%[out1], %[out1], 1024 \n\t"
PTR_ADDIU "%[out2], %[out2], 1024 \n\t"
PTR_ADDIU "%[in1], %[in1], 32 \n\t"
PTR_ADDIU "%[in2], %[in2], 32 \n\t"
"sw %[temp0], -1024(%[out1]) \n\t"
"sw %[temp1], -1024(%[out2]) \n\t"
"sw %[temp2], -768(%[out1]) \n\t"
@ -161,10 +162,10 @@ static void ps_hybrid_synthesis_deint_mips(float out[2][38][64],
"lw %[temp5], 16(%[in2]) \n\t"
"lw %[temp6], 24(%[in1]) \n\t"
"lw %[temp7], 24(%[in2]) \n\t"
"addiu %[out1], %[out1], -7164 \n\t"
"addiu %[out2], %[out2], -7164 \n\t"
"addiu %[in1], %[in1], 32 \n\t"
"addiu %[in2], %[in2], 32 \n\t"
PTR_ADDIU "%[out1], %[out1], -7164 \n\t"
PTR_ADDIU "%[out2], %[out2], -7164 \n\t"
PTR_ADDIU "%[in1], %[in1], 32 \n\t"
PTR_ADDIU "%[in2], %[in2], 32 \n\t"
"sw %[temp0], 7164(%[out1]) \n\t"
"sw %[temp1], 7164(%[out2]) \n\t"
"sw %[temp2], 7420(%[out1]) \n\t"
@ -226,8 +227,8 @@ static void ps_add_squares_mips(float *dst, const float (*src)[2], int n)
"swc1 %[temp2], 4(%[dst0]) \n\t"
"swc1 %[temp4], 8(%[dst0]) \n\t"
"swc1 %[temp6], 12(%[dst0]) \n\t"
"addiu %[dst0], %[dst0], 16 \n\t"
"addiu %[src0], %[src0], 32 \n\t"
PTR_ADDIU "%[dst0], %[dst0], 16 \n\t"
PTR_ADDIU "%[src0], %[src0], 32 \n\t"
: [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
[temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
@ -257,14 +258,14 @@ static void ps_mul_pair_single_mips(float (*dst)[2], float (*src0)[2], float *sr
"lwc1 %[temp2], 0(%[p_s1]) \n\t"
"lwc1 %[temp0], 0(%[p_s0]) \n\t"
"lwc1 %[temp1], 4(%[p_s0]) \n\t"
"addiu %[p_d], %[p_d], 8 \n\t"
PTR_ADDIU "%[p_d], %[p_d], 8 \n\t"
"mul.s %[temp0], %[temp0], %[temp2] \n\t"
"mul.s %[temp1], %[temp1], %[temp2] \n\t"
"addiu %[p_s0], %[p_s0], 8 \n\t"
PTR_ADDIU "%[p_s0], %[p_s0], 8 \n\t"
"swc1 %[temp0], -8(%[p_d]) \n\t"
"swc1 %[temp1], -4(%[p_d]) \n\t"
"bne %[p_s1], %[end], 1b \n\t"
" addiu %[p_s1], %[p_s1], 4 \n\t"
PTR_ADDIU "%[p_s1], %[p_s1], 4 \n\t"
".set pop \n\t"
: [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
@ -355,13 +356,13 @@ static void ps_decorrelate_mips(float (*out)[2], float (*delay)[2],
"mul.s %[temp1], %[ag2], %[temp3] \n\t"
"lwc1 %[temp4], 0(%[p_t_gain]) \n\t"
"sub.s %[temp0], %[temp8], %[temp0] \n\t"
"addiu %[p_ap_delay], %[p_ap_delay], 8 \n\t"
PTR_ADDIU "%[p_ap_delay], %[p_ap_delay], 8 \n\t"
"sub.s %[temp1], %[temp9], %[temp1] \n\t"
"addiu %[p_t_gain], %[p_t_gain], 4 \n\t"
PTR_ADDIU "%[p_t_gain], %[p_t_gain], 4 \n\t"
"madd.s %[temp2], %[temp2], %[ag2], %[temp0] \n\t"
"addiu %[p_delay], %[p_delay], 8 \n\t"
PTR_ADDIU "%[p_delay], %[p_delay], 8 \n\t"
"madd.s %[temp3], %[temp3], %[ag2], %[temp1] \n\t"
"addiu %[p_out], %[p_out], 8 \n\t"
PTR_ADDIU "%[p_out], %[p_out], 8 \n\t"
"mul.s %[temp5], %[temp4], %[temp0] \n\t"
"mul.s %[temp6], %[temp4], %[temp1] \n\t"
"swc1 %[temp2], 624(%[p_ap_delay]) \n\t"
@ -414,9 +415,9 @@ static void ps_stereo_interpolate_mips(float (*l)[2], float (*r)[2],
"add.s %[h3], %[h3], %[hs3] \n\t"
"lwc1 %[r_im], 4(%[r]) \n\t"
"mul.s %[temp0], %[h0], %[l_re] \n\t"
"addiu %[l], %[l], 8 \n\t"
PTR_ADDIU "%[l], %[l], 8 \n\t"
"mul.s %[temp2], %[h1], %[l_re] \n\t"
"addiu %[r], %[r], 8 \n\t"
PTR_ADDIU "%[r], %[r], 8 \n\t"
"madd.s %[temp0], %[temp0], %[h2], %[r_re] \n\t"
"madd.s %[temp2], %[temp2], %[h3], %[r_re] \n\t"
"mul.s %[temp1], %[h0], %[l_im] \n\t"

View File

@ -56,6 +56,8 @@
#ifndef AVCODEC_MIPS_AACPSY_MIPS_H
#define AVCODEC_MIPS_AACPSY_MIPS_H
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM && HAVE_MIPSFPU && ( PSY_LAME_FIR_LEN == 21 )
static void calc_thr_3gpp_mips(const FFPsyWindowInfo *wi, const int num_bands,
AacPsyChannel *pch, const uint8_t *band_sizes,
@ -185,7 +187,7 @@ static void psy_hp_filter_mips(const float *firbuf, float *hpfsmpl, const float
"madd.s %[sum1], %[sum1], $f8, %[coeff3] \n\t"
"madd.s %[sum2], %[sum2], $f11, %[coeff3] \n\t"
"lwc1 $f1, 36(%[fb]) \n\t"
"addiu %[fb], %[fb], 16 \n\t"
PTR_ADDIU "%[fb], %[fb], 16 \n\t"
"madd.s %[sum4], %[sum4], $f0, %[coeff3] \n\t"
"madd.s %[sum3], %[sum3], $f1, %[coeff3] \n\t"
"madd.s %[sum1], %[sum1], $f1, %[coeff4] \n\t"
@ -207,7 +209,7 @@ static void psy_hp_filter_mips(const float *firbuf, float *hpfsmpl, const float
"swc1 %[sum4], 12(%[hp]) \n\t"
"swc1 %[sum3], 8(%[hp]) \n\t"
"bne %[fb], %[fb_end], 1b \n\t"
" addiu %[hp], %[hp], 16 \n\t"
PTR_ADDIU "%[hp], %[hp], 16 \n\t"
".set pop \n\t"

View File

@ -53,6 +53,7 @@
#include "libavcodec/aac.h"
#include "libavcodec/aacsbr.h"
#include "libavutil/mips/asmdefs.h"
#define ENVELOPE_ADJUSTMENT_OFFSET 2
@ -81,9 +82,9 @@ static int sbr_lf_gen_mips(AACContext *ac, SpectralBandReplication *sbr,
"sw $0, 20(%[p_x1_low]) \n\t"
"sw $0, 24(%[p_x1_low]) \n\t"
"sw $0, 28(%[p_x1_low]) \n\t"
"addiu %[p_x1_low], %[p_x1_low], 32 \n\t"
PTR_ADDIU "%[p_x1_low],%[p_x1_low], 32 \n\t"
"bne %[p_x1_low], %[loop_end], 1b \n\t"
"addiu %[p_x1_low], %[p_x1_low], -10240 \n\t"
PTR_ADDIU "%[p_x1_low],%[p_x1_low], -10240 \n\t"
: [p_x1_low]"+r"(p_x1_low)
: [loop_end]"r"(loop_end)
@ -110,8 +111,8 @@ static int sbr_lf_gen_mips(AACContext *ac, SpectralBandReplication *sbr,
"sw %[temp5], 20(%[p_x_low]) \n\t"
"sw %[temp6], 24(%[p_x_low]) \n\t"
"sw %[temp7], 28(%[p_x_low]) \n\t"
"addiu %[p_x_low], %[p_x_low], 32 \n\t"
"addiu %[p_w], %[p_w], 1024 \n\t"
PTR_ADDIU "%[p_x_low], %[p_x_low], 32 \n\t"
PTR_ADDIU "%[p_w], %[p_w], 1024 \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
[temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
@ -147,8 +148,8 @@ static int sbr_lf_gen_mips(AACContext *ac, SpectralBandReplication *sbr,
"sw %[temp5], 20(%[p_x1_low]) \n\t"
"sw %[temp6], 24(%[p_x1_low]) \n\t"
"sw %[temp7], 28(%[p_x1_low]) \n\t"
"addiu %[p_x1_low], %[p_x1_low], 32 \n\t"
"addiu %[p_w1], %[p_w1], 1024 \n\t"
PTR_ADDIU "%[p_x1_low], %[p_x1_low], 32 \n\t"
PTR_ADDIU "%[p_w1], %[p_w1], 1024 \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
[temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
@ -188,9 +189,9 @@ static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64],
"sw $0, 20(%[x1]) \n\t"
"sw $0, 24(%[x1]) \n\t"
"sw $0, 28(%[x1]) \n\t"
"addiu %[x1], %[x1], 32 \n\t"
PTR_ADDIU "%[x1],%[x1], 32 \n\t"
"bne %[x1], %[j], 1b \n\t"
"addiu %[x1], %[x1], -19456 \n\t"
PTR_ADDIU "%[x1],%[x1], -19456 \n\t"
: [x1]"+r"(x1)
: [j]"r"(j)
@ -210,8 +211,8 @@ static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64],
"lw %[temp1], 4(%[X_low1]) \n\t"
"sw %[temp0], 0(%[x1]) \n\t"
"sw %[temp1], 9728(%[x1]) \n\t"
"addiu %[x1], %[x1], 256 \n\t"
"addiu %[X_low1], %[X_low1], 8 \n\t"
PTR_ADDIU "%[x1], %[x1], 256 \n\t"
PTR_ADDIU "%[X_low1], %[X_low1], 8 \n\t"
"addiu %[i], %[i], 1 \n\t"
"bne %[i], %[i_Temp], 2b \n\t"
@ -235,8 +236,8 @@ static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64],
"lw %[temp1], 4(%[Y01]) \n\t"
"sw %[temp0], 0(%[x1]) \n\t"
"sw %[temp1], 9728(%[x1]) \n\t"
"addiu %[x1], %[x1], 256 \n\t"
"addiu %[Y01], %[Y01], 512 \n\t"
PTR_ADDIU "%[x1], %[x1], 256 \n\t"
PTR_ADDIU "%[Y01], %[Y01], 512 \n\t"
"addiu %[i], %[i], 1 \n\t"
"bne %[i], %[i_Temp], 3b \n\t"
@ -263,8 +264,8 @@ static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64],
"lw %[temp1], 4(%[X_low1]) \n\t"
"sw %[temp0], 0(%[x1]) \n\t"
"sw %[temp1], 9728(%[x1]) \n\t"
"addiu %[x1], %[x1], 256 \n\t"
"addiu %[X_low1], %[X_low1], 8 \n\t"
PTR_ADDIU "%[x1], %[x1], 256 \n\t"
PTR_ADDIU "%[X_low1],%[X_low1], 8 \n\t"
"addiu %[i], %[i], 1 \n\t"
"bne %[i], %[temp3], 4b \n\t"
@ -291,8 +292,8 @@ static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64],
"lw %[temp1], 4(%[Y11]) \n\t"
"sw %[temp0], 0(%[x1]) \n\t"
"sw %[temp1], 9728(%[x1]) \n\t"
"addiu %[x1], %[x1], 256 \n\t"
"addiu %[Y11], %[Y11], 512 \n\t"
PTR_ADDIU "%[x1], %[x1], 256 \n\t"
PTR_ADDIU "%[Y11], %[Y11], 512 \n\t"
"addiu %[i], %[i], 1 \n\t"
"bne %[i], %[temp2], 5b \n\t"
@ -370,10 +371,10 @@ static void sbr_hf_assemble_mips(float Y1[38][64][2],
"sw %[temp2], 4(%[q_temp1]) \n\t"
"sw %[temp3], 8(%[q_temp1]) \n\t"
"sw %[temp4], 12(%[q_temp1]) \n\t"
"addiu %[pok], %[pok], 16 \n\t"
"addiu %[g_temp1], %[g_temp1], 16 \n\t"
"addiu %[pok1], %[pok1], 16 \n\t"
"addiu %[q_temp1], %[q_temp1], 16 \n\t"
PTR_ADDIU "%[pok], %[pok], 16 \n\t"
PTR_ADDIU "%[g_temp1], %[g_temp1], 16 \n\t"
PTR_ADDIU "%[pok1], %[pok1], 16 \n\t"
PTR_ADDIU "%[q_temp1], %[q_temp1], 16 \n\t"
: [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
@ -390,10 +391,10 @@ static void sbr_hf_assemble_mips(float Y1[38][64][2],
"lw %[temp2], 0(%[pok1]) \n\t"
"sw %[temp1], 0(%[g_temp1]) \n\t"
"sw %[temp2], 0(%[q_temp1]) \n\t"
"addiu %[pok], %[pok], 4 \n\t"
"addiu %[g_temp1], %[g_temp1], 4 \n\t"
"addiu %[pok1], %[pok1], 4 \n\t"
"addiu %[q_temp1], %[q_temp1], 4 \n\t"
PTR_ADDIU "%[pok], %[pok], 4 \n\t"
PTR_ADDIU "%[g_temp1], %[g_temp1], 4 \n\t"
PTR_ADDIU "%[pok1], %[pok1], 4 \n\t"
PTR_ADDIU "%[q_temp1], %[q_temp1], 4 \n\t"
: [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
@ -460,8 +461,8 @@ static void sbr_hf_assemble_mips(float Y1[38][64][2],
"madd.s %[temp5], %[temp3], %[temp1], %[B_f] \n\t"
"swc1 %[temp4], 0(%[out]) \n\t"
"swc1 %[temp5], 8(%[out]) \n\t"
"addiu %[in], %[in], 8 \n\t"
"addiu %[out], %[out], 16 \n\t"
PTR_ADDIU "%[in], %[in], 8 \n\t"
PTR_ADDIU "%[out], %[out], 16 \n\t"
: [temp0]"=&f" (temp0), [temp1]"=&f"(temp1),
[temp4]"=&f" (temp4), [temp5]"=&f"(temp5),

View File

@ -56,6 +56,7 @@
#include "libavcodec/aac.h"
#include "libavcodec/sbr.h"
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM
static void sbr_qmf_analysis_mips(AVFloatDSPContext *fdsp, FFTContext *mdct,
@ -89,8 +90,8 @@ static void sbr_qmf_analysis_mips(AVFloatDSPContext *fdsp, FFTContext *mdct,
"sw %[temp5], 20(%[w0]) \n\t"
"sw %[temp6], 24(%[w0]) \n\t"
"sw %[temp7], 28(%[w0]) \n\t"
"addiu %[w0], %[w0], 32 \n\t"
"addiu %[w1], %[w1], 32 \n\t"
PTR_ADDIU " %[w0], %[w0], 32 \n\t"
PTR_ADDIU " %[w1], %[w1], 32 \n\t"
: [w0]"+r"(w0), [w1]"+r"(w1),
[temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
@ -124,8 +125,8 @@ static void sbr_qmf_analysis_mips(AVFloatDSPContext *fdsp, FFTContext *mdct,
"sw %[temp5], 20(%[w0]) \n\t"
"sw %[temp6], 24(%[w0]) \n\t"
"sw %[temp7], 28(%[w0]) \n\t"
"addiu %[w0], %[w0], 32 \n\t"
"addiu %[w1], %[w1], 32 \n\t"
PTR_ADDIU " %[w0], %[w0], 32 \n\t"
PTR_ADDIU " %[w1], %[w1], 32 \n\t"
: [w0]"+r"(w0), [w1]"+r"(w1),
[temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
@ -298,13 +299,13 @@ static void sbr_qmf_synthesis_mips(FFTContext *mdct,
"lwc1 %[temp7], 2052(%[s0]) \n\t"
"madd.s %[temp0], %[temp0], %[temp12], %[temp13] \n\t"
"lwc1 %[temp8], 4104(%[v0]) \n\t"
"addiu %[dst], %[dst], 16 \n\t"
PTR_ADDIU "%[dst], %[dst], 16 \n\t"
"madd.s %[temp1], %[temp1], %[temp14], %[temp15] \n\t"
"lwc1 %[temp9], 2056(%[s0]) \n\t"
"addiu %[s0], %[s0], 16 \n\t"
PTR_ADDIU " %[s0], %[s0], 16 \n\t"
"madd.s %[temp2], %[temp2], %[temp16], %[temp17] \n\t"
"lwc1 %[temp10], 4108(%[v0]) \n\t"
"addiu %[v0], %[v0], 16 \n\t"
PTR_ADDIU " %[v0], %[v0], 16 \n\t"
"madd.s %[temp3], %[temp3], %[temp18], %[temp19] \n\t"
"lwc1 %[temp11], 2044(%[s0]) \n\t"
"lwc1 %[temp12], 4848(%[v0]) \n\t"
@ -445,7 +446,7 @@ static void sbr_qmf_synthesis_mips(FFTContext *mdct,
"madd.s %[temp3], %[temp3], %[temp10], %[temp11] \n\t"
"lwc1 %[temp19], 2316(%[s0]) \n\t"
"madd.s %[temp0], %[temp0], %[temp12], %[temp13] \n\t"
"addiu %[dst], %[dst], 16 \n\t"
PTR_ADDIU "%[dst], %[dst], 16 \n\t"
"madd.s %[temp1], %[temp1], %[temp14], %[temp15] \n\t"
"madd.s %[temp2], %[temp2], %[temp16], %[temp17] \n\t"
"madd.s %[temp3], %[temp3], %[temp18], %[temp19] \n\t"

View File

@ -56,7 +56,7 @@
#include "config.h"
#include "libavcodec/ac3dsp.h"
#include "libavcodec/ac3.h"
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM
#if HAVE_MIPSDSPR1
@ -91,7 +91,7 @@ static void ac3_bit_alloc_calc_bap_mips(int16_t *mask, int16_t *psd,
"2: \n\t"
"lh %[address1], 0(%[psd1]) \n\t"
"lh %[address2], 2(%[psd1]) \n\t"
"addiu %[psd1], %[psd1], 4 \n\t"
PTR_ADDIU " %[psd1], %[psd1], 4 \n\t"
"subu %[address1], %[address1], %[m] \n\t"
"sra %[address1], %[address1], 5 \n\t"
"addiu %[address1], %[address1], -32 \n\t"
@ -109,14 +109,14 @@ static void ac3_bit_alloc_calc_bap_mips(int16_t *mask, int16_t *psd,
"addiu %[address2], %[address2], 32 \n\t"
"lbux %[address2], %[address2](%[bap_tab]) \n\t"
"sb %[address2], 1(%[bap1]) \n\t"
"addiu %[bap1], %[bap1], 2 \n\t"
PTR_ADDIU " %[bap1], %[bap1], 2 \n\t"
"bnez %[cond], 2b \n\t"
"addiu %[psd_end], %[psd_end], 2 \n\t"
PTR_ADDIU " %[psd_end], %[psd_end], 2 \n\t"
"slt %[cond], %[psd1], %[psd_end] \n\t"
"beqz %[cond], 3f \n\t"
"1: \n\t"
"lh %[address1], 0(%[psd1]) \n\t"
"addiu %[psd1], %[psd1], 2 \n\t"
PTR_ADDIU " %[psd1], %[psd1], 2 \n\t"
"subu %[address1], %[address1], %[m] \n\t"
"sra %[address1], %[address1], 5 \n\t"
"addiu %[address1], %[address1], -32 \n\t"
@ -125,7 +125,7 @@ static void ac3_bit_alloc_calc_bap_mips(int16_t *mask, int16_t *psd,
"addiu %[address1], %[address1], 32 \n\t"
"lbux %[address1], %[address1](%[bap_tab]) \n\t"
"sb %[address1], 0(%[bap1]) \n\t"
"addiu %[bap1], %[bap1], 1 \n\t"
PTR_ADDIU " %[bap1], %[bap1], 1 \n\t"
"3: \n\t"
: [address1]"=&r"(address1), [address2]"=&r"(address2),
@ -140,34 +140,35 @@ static void ac3_bit_alloc_calc_bap_mips(int16_t *mask, int16_t *psd,
static void ac3_update_bap_counts_mips(uint16_t mant_cnt[16], uint8_t *bap,
int len)
{
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
void *temp0, *temp2, *temp4, *temp5, *temp6, *temp7;
int temp1, temp3;
__asm__ volatile (
"andi %[temp3], %[len], 3 \n\t"
"addu %[temp2], %[bap], %[len] \n\t"
"addu %[temp4], %[bap], %[temp3] \n\t"
PTR_ADDU "%[temp2], %[bap], %[len] \n\t"
PTR_ADDU "%[temp4], %[bap], %[temp3] \n\t"
"beq %[temp2], %[temp4], 4f \n\t"
"1: \n\t"
"lbu %[temp0], -1(%[temp2]) \n\t"
"lbu %[temp5], -2(%[temp2]) \n\t"
"lbu %[temp6], -3(%[temp2]) \n\t"
"sll %[temp0], %[temp0], 1 \n\t"
"addu %[temp0], %[mant_cnt], %[temp0] \n\t"
PTR_ADDU "%[temp0], %[mant_cnt], %[temp0] \n\t"
"sll %[temp5], %[temp5], 1 \n\t"
"addu %[temp5], %[mant_cnt], %[temp5] \n\t"
PTR_ADDU "%[temp5], %[mant_cnt], %[temp5] \n\t"
"lhu %[temp1], 0(%[temp0]) \n\t"
"sll %[temp6], %[temp6], 1 \n\t"
"addu %[temp6], %[mant_cnt], %[temp6] \n\t"
PTR_ADDU "%[temp6], %[mant_cnt], %[temp6] \n\t"
"addiu %[temp1], %[temp1], 1 \n\t"
"sh %[temp1], 0(%[temp0]) \n\t"
"lhu %[temp1], 0(%[temp5]) \n\t"
"lbu %[temp7], -4(%[temp2]) \n\t"
"addiu %[temp2], %[temp2], -4 \n\t"
PTR_ADDIU "%[temp2],%[temp2], -4 \n\t"
"addiu %[temp1], %[temp1], 1 \n\t"
"sh %[temp1], 0(%[temp5]) \n\t"
"lhu %[temp1], 0(%[temp6]) \n\t"
"sll %[temp7], %[temp7], 1 \n\t"
"addu %[temp7], %[mant_cnt], %[temp7] \n\t"
PTR_ADDU "%[temp7], %[mant_cnt], %[temp7] \n\t"
"addiu %[temp1], %[temp1],1 \n\t"
"sh %[temp1], 0(%[temp6]) \n\t"
"lhu %[temp1], 0(%[temp7]) \n\t"
@ -179,9 +180,9 @@ static void ac3_update_bap_counts_mips(uint16_t mant_cnt[16], uint8_t *bap,
"3: \n\t"
"addiu %[temp3], %[temp3], -1 \n\t"
"lbu %[temp0], -1(%[temp2]) \n\t"
"addiu %[temp2], %[temp2], -1 \n\t"
PTR_ADDIU "%[temp2],%[temp2], -1 \n\t"
"sll %[temp0], %[temp0], 1 \n\t"
"addu %[temp0], %[mant_cnt], %[temp0] \n\t"
PTR_ADDU "%[temp0], %[mant_cnt], %[temp0] \n\t"
"lhu %[temp1], 0(%[temp0]) \n\t"
"addiu %[temp1], %[temp1], 1 \n\t"
"sh %[temp1], 0(%[temp0]) \n\t"
@ -274,7 +275,7 @@ static void ac3_downmix_mips(float **samples, float (*matrix)[2],
float v0, v1, v2, v3;
float v4, v5, v6, v7;
float samples0, samples1, samples2, samples3, matrix_j, matrix_j2;
float *samples_p,*matrix_p, **samples_x, **samples_end, **samples_sw;
float *samples_p, *samples_sw, *matrix_p, **samples_x, **samples_end;
__asm__ volatile(
".set push \n\t"
@ -283,7 +284,7 @@ static void ac3_downmix_mips(float **samples, float (*matrix)[2],
"li %[i1], 2 \n\t"
"sll %[len], 2 \n\t"
"move %[i], $zero \n\t"
"sll %[j], %[in_ch], 2 \n\t"
"sll %[j], %[in_ch], " PTRLOG " \n\t"
"bne %[out_ch], %[i1], 3f \n\t" // if (out_ch == 2)
" li %[i2], 1 \n\t"
@ -301,9 +302,9 @@ static void ac3_downmix_mips(float **samples, float (*matrix)[2],
"mtc1 $zero, %[v7] \n\t"
"addiu %[i1], %[i], 4 \n\t"
"addiu %[i2], %[i], 8 \n\t"
"lw %[samples_p], 0(%[samples_x]) \n\t"
PTR_L " %[samples_p], 0(%[samples_x]) \n\t"
"addiu %[i3], %[i], 12 \n\t"
"addu %[samples_end], %[samples_x], %[j] \n\t"
PTR_ADDU "%[samples_end],%[samples_x], %[j] \n\t"
"move %[samples_sw], %[samples_p] \n\t"
"1: \n\t" // start of the inner for loop (for (j = 0; j < in_ch; j++))
@ -313,8 +314,8 @@ static void ac3_downmix_mips(float **samples, float (*matrix)[2],
"lwxc1 %[samples1], %[i1](%[samples_p]) \n\t"
"lwxc1 %[samples2], %[i2](%[samples_p]) \n\t"
"lwxc1 %[samples3], %[i3](%[samples_p]) \n\t"
"addiu %[matrix_p], 8 \n\t"
"addiu %[samples_x], 4 \n\t"
PTR_ADDIU "%[matrix_p], 8 \n\t"
PTR_ADDIU "%[samples_x]," PTRSIZE " \n\t"
"madd.s %[v0], %[v0], %[samples0], %[matrix_j] \n\t"
"madd.s %[v1], %[v1], %[samples1], %[matrix_j] \n\t"
"madd.s %[v2], %[v2], %[samples2], %[matrix_j] \n\t"
@ -324,9 +325,9 @@ static void ac3_downmix_mips(float **samples, float (*matrix)[2],
"madd.s %[v6], %[v6], %[samples2], %[matrix_j2]\n\t"
"madd.s %[v7], %[v7], %[samples3], %[matrix_j2]\n\t"
"bne %[samples_x], %[samples_end], 1b \n\t"
" lw %[samples_p], 0(%[samples_x]) \n\t"
PTR_L " %[samples_p], 0(%[samples_x]) \n\t"
"lw %[samples_p], 4(%[samples]) \n\t"
PTR_L " %[samples_p], " PTRSIZE "(%[samples]) \n\t"
"swxc1 %[v0], %[i](%[samples_sw]) \n\t"
"swxc1 %[v1], %[i1](%[samples_sw]) \n\t"
"swxc1 %[v2], %[i2](%[samples_sw]) \n\t"
@ -351,9 +352,9 @@ static void ac3_downmix_mips(float **samples, float (*matrix)[2],
"mtc1 $zero, %[v3] \n\t"
"addiu %[i1], %[i], 4 \n\t"
"addiu %[i2], %[i], 8 \n\t"
"lw %[samples_p], 0(%[samples_x]) \n\t"
PTR_L " %[samples_p], 0(%[samples_x]) \n\t"
"addiu %[i3], %[i], 12 \n\t"
"addu %[samples_end], %[samples_x], %[j] \n\t"
PTR_ADDU "%[samples_end],%[samples_x], %[j] \n\t"
"move %[samples_sw], %[samples_p] \n\t"
"4: \n\t" // start of the inner for loop (for (j = 0; j < in_ch; j++))
@ -362,14 +363,14 @@ static void ac3_downmix_mips(float **samples, float (*matrix)[2],
"lwxc1 %[samples1], %[i1](%[samples_p]) \n\t"
"lwxc1 %[samples2], %[i2](%[samples_p]) \n\t"
"lwxc1 %[samples3], %[i3](%[samples_p]) \n\t"
"addiu %[matrix_p], 8 \n\t"
"addiu %[samples_x], 4 \n\t"
PTR_ADDIU "%[matrix_p], 8 \n\t"
PTR_ADDIU "%[samples_x]," PTRSIZE " \n\t"
"madd.s %[v0], %[v0], %[samples0], %[matrix_j] \n\t"
"madd.s %[v1], %[v1], %[samples1], %[matrix_j] \n\t"
"madd.s %[v2], %[v2], %[samples2], %[matrix_j] \n\t"
"madd.s %[v3], %[v3], %[samples3], %[matrix_j] \n\t"
"bne %[samples_x], %[samples_end], 4b \n\t"
" lw %[samples_p], 0(%[samples_x]) \n\t"
PTR_L " %[samples_p], 0(%[samples_x]) \n\t"
"swxc1 %[v0], %[i](%[samples_sw]) \n\t"
"addiu %[i], 16 \n\t"

View File

@ -54,6 +54,7 @@
#include "config.h"
#include "libavutil/attributes.h"
#include "libavcodec/acelp_filters.h"
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM
static void ff_acelp_interpolatef_mips(float *out, const float *in,
@ -82,11 +83,11 @@ static void ff_acelp_interpolatef_mips(float *out, const float *in,
"lwc1 %[fc_val_p], 0(%[p_filter_coeffs_p]) \n\t"
"lwc1 %[in_val_m], 0(%[p_in_m]) \n\t"
"lwc1 %[fc_val_m], 0(%[p_filter_coeffs_m]) \n\t"
"addiu %[p_in_p], %[p_in_p], 4 \n\t"
PTR_ADDIU "%[p_in_p], %[p_in_p], 4 \n\t"
"madd.s %[v],%[v], %[in_val_p],%[fc_val_p] \n\t"
"addiu %[p_in_m], %[p_in_m], -4 \n\t"
"addu %[p_filter_coeffs_p], %[p_filter_coeffs_p], %[prec] \n\t"
"addu %[p_filter_coeffs_m], %[p_filter_coeffs_m], %[prec] \n\t"
PTR_ADDIU "%[p_in_m], %[p_in_m], -4 \n\t"
PTR_ADDU "%[p_filter_coeffs_p],%[p_filter_coeffs_p], %[prec] \n\t"
PTR_ADDU "%[p_filter_coeffs_m],%[p_filter_coeffs_m], %[prec] \n\t"
"madd.s %[v],%[v],%[in_val_m], %[fc_val_m] \n\t"
: [v] "+&f" (v),[p_in_p] "+r" (p_in_p), [p_in_m] "+r" (p_in_m),
@ -185,8 +186,8 @@ static void ff_acelp_apply_order_2_transfer_function_mips(float *out, const floa
"madd.s $f14, $f0, $f4, $f1 \n\t"
"madd.s $f14, $f14, $f5, $f13 \n\t"
"swc1 $f8, 24(%[out]) \n\t"
"addiu %[out], 32 \n\t"
"addiu %[in], 32 \n\t"
PTR_ADDIU "%[out], 32 \n\t"
PTR_ADDIU "%[in], 32 \n\t"
"addiu %[n], -8 \n\t"
"swc1 $f14, -4(%[out]) \n\t"
"bnez %[n], ff_acelp_apply_order_2_transfer_function_madd%= \n\t"

View File

@ -54,6 +54,7 @@
*/
#include "config.h"
#include "libavcodec/acelp_vectors.h"
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM
static void ff_weighted_vector_sumf_mips(
@ -75,11 +76,11 @@ static void ff_weighted_vector_sumf_mips(
"mul.s $f5, %[weight_coeff_a], $f3 \n\t"
"madd.s $f2, $f2, %[weight_coeff_b], $f1 \n\t"
"madd.s $f5, $f5, %[weight_coeff_b], $f4 \n\t"
"addiu %[in_a], 8 \n\t"
"addiu %[in_b], 8 \n\t"
PTR_ADDIU "%[in_a],8 \n\t"
PTR_ADDIU "%[in_b],8 \n\t"
"swc1 $f2, 0(%[out]) \n\t"
"swc1 $f5, 4(%[out]) \n\t"
"addiu %[out], 8 \n\t"
PTR_ADDIU "%[out], 8 \n\t"
"bne %[in_a], %[a_end], ff_weighted_vector_sumf_madd%= \n\t"
"ff_weighted_vector_sumf_end%=: \n\t"

View File

@ -55,6 +55,7 @@
#include "libavutil/attributes.h"
#include "libavutil/common.h"
#include "libavcodec/celp_filters.h"
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM
static void ff_celp_lp_synthesis_filterf_mips(float *out,
@ -118,8 +119,8 @@ static void ff_celp_lp_synthesis_filterf_mips(float *out,
__asm__ volatile(
"lwc1 %[old_out3], -20(%[p_out]) \n\t"
"lwc1 $f5, 16(%[p_filter_coeffs]) \n\t"
"addiu %[p_out], -8 \n\t"
"addiu %[p_filter_coeffs], 8 \n\t"
PTR_ADDIU "%[p_out], -8 \n\t"
PTR_ADDIU "%[p_filter_coeffs], 8 \n\t"
"nmsub.s %[out1], %[out1], $f5, %[old_out0] \n\t"
"nmsub.s %[out3], %[out3], $f5, %[old_out2] \n\t"
"lwc1 $f4, 12(%[p_filter_coeffs]) \n\t"
@ -181,8 +182,8 @@ static void ff_celp_lp_synthesis_filterf_mips(float *out,
__asm__ volatile(
"lwc1 %[fc_val], 0(%[p_filter_coeffs]) \n\t"
"lwc1 %[out_val_i], -4(%[p_out]) \n\t"
"addiu %[p_filter_coeffs], 4 \n\t"
"addiu %[p_out], -4 \n\t"
PTR_ADDIU "%[p_filter_coeffs], 4 \n\t"
PTR_ADDIU "%[p_out], -4 \n\t"
"nmsub.s %[out_val], %[out_val], %[fc_val], %[out_val_i] \n\t"
: [fc_val]"=&f"(fc_val), [out_val]"+f"(out_val),
@ -245,8 +246,8 @@ static void ff_celp_lp_zero_synthesis_filterf_mips(float *out,
"madd.s %[sum_out1], %[sum_out1], %[fc_val], $f0 \n\t"
"lwc1 %[fc_val], 4(%[p_filter_coeffs]) \n\t"
"lwc1 $f7, -8(%[p_in]) \n\t"
"addiu %[p_filter_coeffs], 8 \n\t"
"addiu %[p_in], -8 \n\t"
PTR_ADDIU "%[p_filter_coeffs], 8 \n\t"
PTR_ADDIU "%[p_in], -8 \n\t"
"madd.s %[sum_out8], %[sum_out8], %[fc_val], $f6 \n\t"
"madd.s %[sum_out7], %[sum_out7], %[fc_val], $f5 \n\t"
"madd.s %[sum_out6], %[sum_out6], %[fc_val], $f4 \n\t"

View File

@ -53,6 +53,7 @@
*/
#include "config.h"
#include "libavcodec/celp_math.h"
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM
static float ff_dot_productf_mips(const float* a, const float* b,
@ -67,8 +68,8 @@ static float ff_dot_productf_mips(const float* a, const float* b,
"ff_dot_productf_madd%=: \n\t"
"lwc1 $f2, 0(%[a]) \n\t"
"lwc1 $f1, 0(%[b]) \n\t"
"addiu %[a], %[a], 4 \n\t"
"addiu %[b], %[b], 4 \n\t"
PTR_ADDIU "%[a], %[a], 4 \n\t"
PTR_ADDIU "%[b], %[b], 4 \n\t"
"madd.s %[sum], %[sum], $f1, $f2 \n\t"
"bne %[a], %[a_end], ff_dot_productf_madd%= \n\t"
"ff_dot_productf_end%=: \n\t"

View File

@ -55,6 +55,8 @@
#ifndef AVCODEC_MIPS_COMPUTE_ANTIALIAS_FLOAT_H
#define AVCODEC_MIPS_COMPUTE_ANTIALIAS_FLOAT_H
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM
static void compute_antialias_mips_float(MPADecodeContext *s,
GranuleDef *g)
@ -158,7 +160,7 @@ static void compute_antialias_mips_float(MPADecodeContext *s,
"mul.s %[out4], %[in5], %[in7] \t\n"
"swc1 %[out1], -7*4(%[ptr]) \t\n"
"swc1 %[out2], 6*4(%[ptr]) \t\n"
"addiu %[ptr], %[ptr], 72 \t\n"
PTR_ADDIU "%[ptr],%[ptr], 72 \t\n"
"nmsub.s %[out3], %[out3], %[in7], %[in8] \t\n"
"madd.s %[out4], %[out4], %[in6], %[in8] \t\n"
"swc1 %[out3], -26*4(%[ptr]) \t\n"

View File

@ -50,6 +50,7 @@
#include "config.h"
#include "libavcodec/fft.h"
#include "libavcodec/fft_table.h"
#include "libavutil/mips/asmdefs.h"
/**
* FFT transform
@ -368,14 +369,14 @@ static void ff_imdct_half_mips(FFTContext *s, FFTSample *output, const FFTSample
"mul.s %[temp11], %[temp5], %[temp6] \t\n"
"mul.s %[temp12], %[temp5], %[temp7] \t\n"
"lwc1 %[temp8], 0(%[in3]) \t\n"
"addiu %[tcos1], %[tcos1], 8 \t\n"
"addiu %[tsin1], %[tsin1], 8 \t\n"
"addiu %[in1], %[in1], 16 \t\n"
PTR_ADDIU " %[tcos1], %[tcos1], 8 \t\n"
PTR_ADDIU " %[tsin1], %[tsin1], 8 \t\n"
PTR_ADDIU " %[in1], %[in1], 16 \t\n"
"nmsub.s %[temp11], %[temp11], %[temp8], %[temp7] \t\n"
"madd.s %[temp12], %[temp12], %[temp8], %[temp6] \t\n"
"addiu %[in2], %[in2], -16 \t\n"
"addiu %[in3], %[in3], 16 \t\n"
"addiu %[in4], %[in4], -16 \t\n"
PTR_ADDIU " %[in2], %[in2], -16 \t\n"
PTR_ADDIU " %[in3], %[in3], 16 \t\n"
PTR_ADDIU " %[in4], %[in4], -16 \t\n"
: [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
[temp3]"=&f"(temp3), [temp4]"=&f"(temp4),

View File

@ -50,9 +50,9 @@
#include "config.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/fmtconvert.h"
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM
static void int32_to_float_fmul_scalar_mips(float *dst, const int *src,
float mul, int len)
{
@ -86,7 +86,7 @@ static void int32_to_float_fmul_scalar_mips(float *dst, const int *src,
"mtc1 %[rpom12], %[temp13] \n\t"
"mtc1 %[rpom22], %[temp15] \n\t"
"addiu %[src], 32 \n\t"
PTR_ADDIU "%[src], 32 \n\t"
"cvt.s.w %[temp1], %[temp1] \n\t"
"cvt.s.w %[temp3], %[temp3] \n\t"
"cvt.s.w %[temp5], %[temp5] \n\t"
@ -116,7 +116,7 @@ static void int32_to_float_fmul_scalar_mips(float *dst, const int *src,
"swc1 %[temp11], 20(%[dst]) \n\t" /*dst[i+5] = src[i+5] * mul;*/
"swc1 %[temp13], 24(%[dst]) \n\t" /*dst[i+6] = src[i+6] * mul;*/
"swc1 %[temp15], 28(%[dst]) \n\t" /*dst[i+7] = src[i+7] * mul;*/
"addiu %[dst], 32 \n\t"
PTR_ADDIU "%[dst], 32 \n\t"
"bne %[src], %[src_end], i32tf_lp%= \n\t"
: [temp1]"=&f"(temp1), [temp11]"=&f"(temp11),
[temp13]"=&f"(temp13), [temp15]"=&f"(temp15),

View File

@ -55,6 +55,8 @@
#define AVCODEC_LSP_MIPS_H
#if HAVE_MIPSFPU && HAVE_INLINE_ASM
#include "libavutil/mips/asmdefs.h"
static av_always_inline void ff_lsp2polyf_mips(const double *lsp, double *f, int lp_half_order)
{
int i, j = 0;
@ -73,7 +75,7 @@ static av_always_inline void ff_lsp2polyf_mips(const double *lsp, double *f, int
__asm__ volatile(
"move %[p_f], %[p_fi] \n\t"
"add.d %[val], %[val], %[val] \n\t"
"addiu %[p_fi], 8 \n\t"
PTR_ADDIU "%[p_fi], 8 \n\t"
"ldc1 %[f_j_1], 0(%[p_f]) \n\t"
"ldc1 %[f_j], 8(%[p_f]) \n\t"
"neg.d %[val], %[val] \n\t"
@ -91,7 +93,7 @@ static av_always_inline void ff_lsp2polyf_mips(const double *lsp, double *f, int
"mov.d %[f_j_1], %[f_j_2] \n\t"
"ldc1 %[f_j_2], -16(%[p_f]) \n\t"
"sdc1 %[tmp], 8(%[p_f]) \n\t"
"addiu %[p_f], -8 \n\t"
PTR_ADDIU "%[p_f], -8 \n\t"
"bgtz %[j], ff_lsp2polyf_lp_j%= \n\t"
"ff_lsp2polyf_lp_j_end%=: \n\t"

View File

@ -54,6 +54,7 @@
#include <string.h>
#include "libavutil/mips/asmdefs.h"
#include "libavcodec/mpegaudiodsp.h"
static void ff_mpadsp_apply_window_mips_fixed(int32_t *synth_buf, int32_t *window,
@ -152,7 +153,7 @@ static void ff_mpadsp_apply_window_mips_fixed(int32_t *synth_buf, int32_t *windo
"extr.w %[sum1], $ac0, 24 \n\t"
"mflo %[temp3] \n\t"
"addi %[w], %[w], 4 \n\t"
PTR_ADDIU "%[w], %[w], 4 \n\t"
"and %[temp1], %[temp3], 0x00ffffff \n\t"
"slt %[temp2], %[sum1], %[min_asm] \n\t"
"movn %[sum1], %[min_asm], %[temp2] \n\t"
@ -180,7 +181,7 @@ static void ff_mpadsp_apply_window_mips_fixed(int32_t *synth_buf, int32_t *windo
"mtlo $0, $ac1 \n\t"
"mthi $0 \n\t"
"mtlo %[temp1] \n\t"
"addi %[p_temp1], %[p_temp1], 4 \n\t"
PTR_ADDIU "%[p_temp1], %[p_temp1], 4 \n\t"
"lw %[w_asm], 0(%[w]) \n\t"
"lw %[p_asm], 0(%[p_temp1]) \n\t"
"lw %[w2_asm], 0(%[w2]) \n\t"
@ -221,7 +222,7 @@ static void ff_mpadsp_apply_window_mips_fixed(int32_t *synth_buf, int32_t *windo
"msub $ac1, %[w2_asm], %[p_asm] \n\t"
"madd %[w_asm1], %[p_asm1] \n\t"
"msub $ac1, %[w2_asm1], %[p_asm1] \n\t"
"addi %[p_temp2], %[p_temp2], -4 \n\t"
PTR_ADDIU "%[p_temp2], %[p_temp2], -4 \n\t"
"lw %[w_asm], 32*4(%[w]) \n\t"
"lw %[p_asm], 0(%[p_temp2]) \n\t"
"lw %[w2_asm], 32*4(%[w2]) \n\t"
@ -262,8 +263,8 @@ static void ff_mpadsp_apply_window_mips_fixed(int32_t *synth_buf, int32_t *windo
"msub %[w_asm1], %[p_asm1] \n\t"
"msub $ac1, %[w2_asm], %[p_asm] \n\t"
"msub $ac1, %[w2_asm1], %[p_asm1] \n\t"
"addi %[w], %[w], 4 \n\t"
"addi %[w2], %[w2], -4 \n\t"
PTR_ADDIU "%[w], %[w], 4 \n\t"
PTR_ADDIU "%[w2], %[w2], -4 \n\t"
"mflo %[temp2] \n\t"
"extr.w %[sum1], $ac0, 24 \n\t"
"li %[temp3], 1 \n\t"

View File

@ -55,6 +55,7 @@
#include <string.h>
#include "libavutil/mips/asmdefs.h"
#include "libavcodec/mpegaudiodsp.h"
static void ff_mpadsp_apply_window_mips_float(float *synth_buf, float *window,
@ -89,7 +90,7 @@ static void ff_mpadsp_apply_window_mips_float(float *synth_buf, float *window,
"sw $zero, 0(%[dither_state]) \t\n"
"lwc1 %[in3], 64*4(%[window]) \t\n"
"lwc1 %[in4], 80*4(%[synth_buf]) \t\n"
"addu %[samples2], %[samples], %[t_sample] \t\n"
PTR_ADDU "%[samples2],%[samples], %[t_sample] \t\n"
"madd.s %[sum], %[sum], %[in1], %[in2] \t\n"
"lwc1 %[in5], 128*4(%[window]) \t\n"
"lwc1 %[in6], 144*4(%[synth_buf]) \t\n"
@ -131,15 +132,15 @@ static void ff_mpadsp_apply_window_mips_float(float *synth_buf, float *window,
"lwc1 %[in7], 480*4(%[window]) \t\n"
"lwc1 %[in8], 496*4(%[synth_buf]) \t\n"
"nmsub.s %[sum], %[sum], %[in1], %[in2] \t\n"
"addu %[w], %[window], 4 \t\n"
PTR_ADDU "%[w], %[window], 4 \t\n"
"nmsub.s %[sum], %[sum], %[in3], %[in4] \t\n"
"addu %[w2], %[window], 124 \t\n"
"addiu %[p], %[synth_buf], 68 \t\n"
"addiu %[p2], %[synth_buf], 188 \t\n"
PTR_ADDU "%[w2], %[window], 124 \t\n"
PTR_ADDIU "%[p], %[synth_buf], 68 \t\n"
PTR_ADDIU "%[p2], %[synth_buf], 188 \t\n"
"nmsub.s %[sum], %[sum], %[in5], %[in6] \t\n"
"nmsub.s %[sum], %[sum], %[in7], %[in8] \t\n"
"swc1 %[sum], 0(%[samples]) \t\n"
"addu %[samples], %[samples], %[incr1] \t\n"
PTR_ADDU "%[samples], %[samples], %[incr1] \t\n"
/* calculate two samples at the same time to avoid one memory
access per two sample */
@ -223,17 +224,17 @@ static void ff_mpadsp_apply_window_mips_float(float *synth_buf, float *window,
"nmsub.s %[sum], %[sum], %[in1], %[in2] \t\n"
"lwc1 %[in6], 480*4(%[w2]) \t\n"
"nmsub.s %[sum2], %[sum2], %[in2], %[in3] \t\n"
"addiu %[w], %[w], 4 \t\n"
PTR_ADDIU "%[w], %[w], 4 \t\n"
"nmsub.s %[sum], %[sum], %[in4], %[in5] \t\n"
"addiu %[w2], %[w2], -4 \t\n"
PTR_ADDIU "%[w2], %[w2], -4 \t\n"
"nmsub.s %[sum2], %[sum2], %[in5], %[in6] \t\n"
"addu %[j], %[j], 4 \t\n"
"addiu %[p], 4 \t\n"
PTR_ADDIU "%[p], 4 \t\n"
"swc1 %[sum], 0(%[samples]) \t\n"
"addiu %[p2], -4 \t\n"
PTR_ADDIU "%[p2], -4 \t\n"
"swc1 %[sum2], 0(%[samples2]) \t\n"
"addu %[samples], %[samples], %[incr1] \t\n"
"subu %[samples2], %[samples2], %[incr1] \t\n"
PTR_ADDU "%[samples], %[samples], %[incr1] \t\n"
PTR_SUBU "%[samples2],%[samples2], %[incr1] \t\n"
"bne %[j], 64, ff_mpadsp_apply_window_loop%= \t\n"
"lwc1 %[in1], 48*4(%[window]) \t\n"

View File

@ -56,6 +56,7 @@
#include "config.h"
#include "libavcodec/sbrdsp.h"
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM
static void sbr_qmf_pre_shuffle_mips(float *z)
@ -80,7 +81,7 @@ static void sbr_qmf_pre_shuffle_mips(float *z)
"xor %[Temp3], %[Temp3], %[Temp6] \n\t"
"xor %[Temp4], %[Temp4], %[Temp6] \n\t"
"xor %[Temp5], %[Temp5], %[Temp6] \n\t"
"addiu %[z2], %[z2], -20 \n\t"
PTR_ADDIU "%[z2], %[z2], -20 \n\t"
"sw %[Temp1], 32(%[z1]) \n\t"
"sw %[Temp2], 24(%[z1]) \n\t"
"sw %[Temp3], 16(%[z1]) \n\t"
@ -96,8 +97,8 @@ static void sbr_qmf_pre_shuffle_mips(float *z)
"sw %[Temp3], 20(%[z1]) \n\t"
"sw %[Temp4], 28(%[z1]) \n\t"
"sw %[Temp5], 36(%[z1]) \n\t"
"addiu %[z3], %[z3], 20 \n\t"
"addiu %[z1], %[z1], 40 \n\t"
PTR_ADDIU "%[z3], %[z3], 20 \n\t"
PTR_ADDIU "%[z1], %[z1], 40 \n\t"
"bne %[z1], %[z4], 1b \n\t"
"lw %[Temp1], 132(%[z]) \n\t"
"lw %[Temp2], 128(%[z]) \n\t"
@ -138,7 +139,7 @@ static void sbr_qmf_post_shuffle_mips(float W[32][2], const float *z)
"xor %[Temp2], %[Temp2], %[Temp5] \n\t"
"xor %[Temp3], %[Temp3], %[Temp5] \n\t"
"xor %[Temp4], %[Temp4], %[Temp5] \n\t"
"addiu %[z2], %[z2], -16 \n\t"
PTR_ADDIU "%[z2], %[z2], -16 \n\t"
"sw %[Temp1], 24(%[W_ptr]) \n\t"
"sw %[Temp2], 16(%[W_ptr]) \n\t"
"sw %[Temp3], 8(%[W_ptr]) \n\t"
@ -151,8 +152,8 @@ static void sbr_qmf_post_shuffle_mips(float W[32][2], const float *z)
"sw %[Temp2], 12(%[W_ptr]) \n\t"
"sw %[Temp3], 20(%[W_ptr]) \n\t"
"sw %[Temp4], 28(%[W_ptr]) \n\t"
"addiu %[z1], %[z1], 16 \n\t"
"addiu %[W_ptr], %[W_ptr], 32 \n\t"
PTR_ADDIU "%[z1], %[z1], 16 \n\t"
PTR_ADDIU "%[W_ptr],%[W_ptr], 32 \n\t"
"bne %[z1], %[z_end], 1b \n\t"
: [Temp1]"=&r"(Temp1), [Temp2]"=&r"(Temp2),
@ -286,7 +287,7 @@ static float sbr_sum_square_mips(float (*x)[2], int n)
"lwc1 %[temp2], 8(%[p_x]) \n\t"
"lwc1 %[temp3], 12(%[p_x]) \n\t"
"1: \n\t"
"addiu %[p_x], %[p_x], 16 \n\t"
PTR_ADDIU "%[p_x], %[p_x], 16 \n\t"
"madd.s %[sum0], %[sum0], %[temp0], %[temp0] \n\t"
"lwc1 %[temp0], 0(%[p_x]) \n\t"
"madd.s %[sum1], %[sum1], %[temp1], %[temp1] \n\t"
@ -421,10 +422,10 @@ static void sbr_qmf_deint_bfly_mips(float *v, const float *src0, const float *sr
"swc1 %[temp6], 56(%[v0]) \n\t"
"swc1 %[temp11], -60(%[v1]) \n\t"
"swc1 %[temp9], 60(%[v0]) \n\t"
"addiu %[src0], %[src0], 64 \n\t"
"addiu %[src1], %[src1], -64 \n\t"
"addiu %[v0], %[v0], 64 \n\t"
"addiu %[v1], %[v1], -64 \n\t"
PTR_ADDIU " %[src0], %[src0], 64 \n\t"
PTR_ADDIU " %[src1], %[src1], -64 \n\t"
PTR_ADDIU " %[v0], %[v0], 64 \n\t"
PTR_ADDIU " %[v1], %[v1], -64 \n\t"
: [v0]"+r"(v0), [v1]"+r"(v1), [src0]"+r"(psrc0), [src1]"+r"(psrc1),
[temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
@ -473,7 +474,7 @@ static void sbr_autocorrelate_mips(const float x[40][2], float phi[3][2][2])
"add.s %[imag_sum_1], %[imag_sum_1], %[temp_r2] \n\t"
"add.s %[real_sum_2], %[real_sum_2], %[temp_r3] \n\t"
"add.s %[imag_sum_2], %[imag_sum_2], %[temp_r4] \n\t"
"addiu %[p_x], %[p_x], 8 \n\t"
PTR_ADDIU "%[p_x], %[p_x], 8 \n\t"
: [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
[temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
@ -543,7 +544,7 @@ static void sbr_autocorrelate_mips(const float x[40][2], float phi[3][2][2])
"add.s %[imag_sum_1], %[imag_sum_1], %[temp_r2] \n\t"
"add.s %[real_sum_2], %[real_sum_2], %[temp_r3] \n\t"
"add.s %[imag_sum_2], %[imag_sum_2], %[temp_r4] \n\t"
"addiu %[p_x], %[p_x], 24 \n\t"
PTR_ADDIU "%[p_x], %[p_x], 24 \n\t"
: [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
[temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
@ -634,8 +635,8 @@ static void sbr_hf_gen_mips(float (*X_high)[2], const float (*X_low)[2],
"lwc1 %[temp8], 4(%[alpha]) \n\t"
"lwc1 %[temp9], 8(%[alpha]) \n\t"
"lwc1 %[temp10], 12(%[alpha]) \n\t"
"addiu %[p_x_high], %[p_x_high], 8 \n\t"
"addiu %[p_x_low], %[p_x_low], 8 \n\t"
PTR_ADDIU "%[p_x_high], %[p_x_high], 8 \n\t"
PTR_ADDIU "%[p_x_low], %[p_x_low], 8 \n\t"
"mul.s %[temp11], %[temp1], %[temp8] \n\t"
"msub.s %[temp11], %[temp11], %[temp0], %[temp7] \n\t"
"madd.s %[temp11], %[temp11], %[temp2], %[temp9] \n\t"
@ -682,12 +683,12 @@ static void sbr_hf_g_filt_mips(float (*Y)[2], const float (*X_high)[40][2],
"lwc1 %[temp2], 4(%[p_x]) \n\t"
"mul.s %[temp1], %[temp1], %[temp0] \n\t"
"mul.s %[temp2], %[temp2], %[temp0] \n\t"
"addiu %[p_g], %[p_g], 4 \n\t"
"addiu %[p_x], %[p_x], 320 \n\t"
PTR_ADDIU "%[p_g], %[p_g], 4 \n\t"
PTR_ADDIU "%[p_x], %[p_x], 320 \n\t"
"swc1 %[temp1], 0(%[p_y]) \n\t"
"swc1 %[temp2], 4(%[p_y]) \n\t"
"bne %[p_g], %[loop_end], 1b \n\t"
" addiu %[p_y], %[p_y], 8 \n\t"
PTR_ADDIU "%[p_y], %[p_y], 8 \n\t"
".set pop \n\t"
: [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
@ -719,7 +720,7 @@ static void sbr_hf_apply_noise_0_mips(float (*Y)[2], const float *s_m,
"addiu %[noise], %[noise], 1 \n\t"
"andi %[noise], %[noise], 0x1ff \n\t"
"sll %[temp0], %[noise], 3 \n\t"
"addu %[ff_table], %[ff_sbr_noise_table], %[temp0] \n\t"
PTR_ADDU "%[ff_table],%[ff_sbr_noise_table], %[temp0] \n\t"
"add.s %[y0], %[y0], %[temp1] \n\t"
"mfc1 %[temp3], %[temp1] \n\t"
"bne %[temp3], $0, 1f \n\t"
@ -765,7 +766,7 @@ static void sbr_hf_apply_noise_1_mips(float (*Y)[2], const float *s_m,
"addiu %[noise], %[noise], 1 \n\t"
"andi %[noise], %[noise], 0x1ff \n\t"
"sll %[temp0], %[noise], 3 \n\t"
"addu %[ff_table], %[ff_sbr_noise_table], %[temp0] \n\t"
PTR_ADDU "%[ff_table],%[ff_sbr_noise_table],%[temp0] \n\t"
"madd.s %[y1], %[y1], %[temp1], %[phi_sign] \n\t"
"bne %[temp3], $0, 1f \n\t"
"lwc1 %[y0], 0(%[Y1]) \n\t"
@ -810,7 +811,7 @@ static void sbr_hf_apply_noise_2_mips(float (*Y)[2], const float *s_m,
"addiu %[noise], %[noise], 1 \n\t"
"andi %[noise], %[noise], 0x1ff \n\t"
"sll %[temp0], %[noise], 3 \n\t"
"addu %[ff_table], %[ff_sbr_noise_table], %[temp0] \n\t"
PTR_ADDU "%[ff_table],%[ff_sbr_noise_table],%[temp0] \n\t"
"sub.s %[y0], %[y0], %[temp1] \n\t"
"mfc1 %[temp3], %[temp1] \n\t"
"bne %[temp3], $0, 1f \n\t"
@ -856,7 +857,7 @@ static void sbr_hf_apply_noise_3_mips(float (*Y)[2], const float *s_m,
"addiu %[noise], %[noise], 1 \n\t"
"andi %[noise], %[noise], 0x1ff \n\t"
"sll %[temp0], %[noise], 3 \n\t"
"addu %[ff_table], %[ff_sbr_noise_table], %[temp0] \n\t"
PTR_ADDU "%[ff_table],%[ff_sbr_noise_table], %[temp0] \n\t"
"nmsub.s %[y1], %[y1], %[temp1], %[phi_sign] \n\t"
"mfc1 %[temp3], %[temp1] \n\t"
"bne %[temp3], $0, 1f \n\t"

48
libavutil/mips/asmdefs.h Normal file
View File

@ -0,0 +1,48 @@
/*
* Copyright (c) 2015 Imagination Technologies Ltd
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* MIPS assembly defines from sys/asm.h but rewritten for use with C inline
* assembly (rather than from within .s files).
*/
#ifndef AVCODEC_MIPS_ASMDEFS_H
#define AVCODEC_MIPS_ASMDEFS_H
#include <sgidefs.h>
#if _MIPS_SIM == _ABI64
# define PTRSIZE " 8 "
# define PTRLOG " 3 "
# define PTR_ADDU "daddu "
# define PTR_ADDIU "daddiu "
# define PTR_SUBU "dsubu "
# define PTR_L "ld "
#else
# define PTRSIZE " 4 "
# define PTRLOG " 2 "
# define PTR_ADDU "addu "
# define PTR_ADDIU "addiu "
# define PTR_SUBU "subu "
# define PTR_L "lw "
#endif
#endif /* AVCODEC_MIPS_ASMDEFS_H */

View File

@ -53,6 +53,7 @@
#include "config.h"
#include "libavutil/float_dsp.h"
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM && HAVE_MIPSFPU
static void vector_fmul_mips(float *dst, const float *src0, const float *src1,
@ -90,9 +91,9 @@ static void vector_fmul_mips(float *dst, const float *src0, const float *src1,
"swc1 %[src0_1], 4(%[d]) \n\t"
"swc1 %[src0_2], 8(%[d]) \n\t"
"swc1 %[src0_3], 12(%[d]) \n\t"
"addiu %[s0], %[s0], 16 \n\t"
"addiu %[s1], %[s1], 16 \n\t"
"addiu %[d], %[d], 16 \n\t"
PTR_ADDIU "%[s0], %[s0], 16 \n\t"
PTR_ADDIU "%[s1], %[s1], 16 \n\t"
PTR_ADDIU "%[d], %[d], 16 \n\t"
"bne %[d], %[d_end], 1b \n\t"
: [src0_0]"=&f"(src0_0), [src0_1]"=&f"(src0_1),
@ -122,12 +123,12 @@ static void vector_fmul_scalar_mips(float *dst, const float *src, float mul,
"lwc1 %[temp1], 4(%[src]) \n\t"
"lwc1 %[temp2], 8(%[src]) \n\t"
"lwc1 %[temp3], 12(%[src]) \n\t"
"addiu %[dst], %[dst], 16 \n\t"
PTR_ADDIU "%[dst], %[dst], 16 \n\t"
"mul.s %[temp0], %[temp0], %[mul] \n\t"
"mul.s %[temp1], %[temp1], %[mul] \n\t"
"mul.s %[temp2], %[temp2], %[mul] \n\t"
"mul.s %[temp3], %[temp3], %[mul] \n\t"
"addiu %[src], %[src], 16 \n\t"
PTR_ADDIU "%[src], %[src], 16 \n\t"
"swc1 %[temp0], -16(%[dst]) \n\t"
"swc1 %[temp1], -12(%[dst]) \n\t"
"swc1 %[temp2], -8(%[dst]) \n\t"
@ -251,8 +252,8 @@ static void butterflies_float_mips(float *av_restrict v1, float *av_restrict v2,
"add.s %[temp13], %[temp2], %[temp6] \n\t"
"sub.s %[temp14], %[temp3], %[temp7] \n\t"
"add.s %[temp15], %[temp3], %[temp7] \n\t"
"addiu %[v1], %[v1], 16 \n\t"
"addiu %[v2], %[v2], 16 \n\t"
PTR_ADDIU "%[v1], %[v1], 16 \n\t"
PTR_ADDIU "%[v2], %[v2], 16 \n\t"
"addiu %[pom], %[pom], -1 \n\t"
"lwc1 %[temp0], 0(%[v1]) \n\t"
"lwc1 %[temp1], 4(%[v1]) \n\t"
@ -321,9 +322,9 @@ static void vector_fmul_reverse_mips(float *dst, const float *src0, const float
"mul.s %[temp2], %[temp3], %[temp2] \n\t"
"mul.s %[temp4], %[temp5], %[temp4] \n\t"
"mul.s %[temp6], %[temp7], %[temp6] \n\t"
"addiu %[src0], %[src0], 16 \n\t"
"addiu %[src1], %[src1], -16 \n\t"
"addiu %[dst], %[dst], 16 \n\t"
PTR_ADDIU "%[src0], %[src0], 16 \n\t"
PTR_ADDIU "%[src1], %[src1], -16 \n\t"
PTR_ADDIU "%[dst], %[dst], 16 \n\t"
"swc1 %[temp0], -16(%[dst]) \n\t"
"swc1 %[temp2], -12(%[dst]) \n\t"
"swc1 %[temp4], -8(%[dst]) \n\t"