mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-11-21 10:55:51 +02:00
avcodec/mips: [loongson] reoptimize simple idct with mmi.
Performance of mpeg4 decoding improved about 23%(from 128fps to 158fps, tested on loongson 3A3000). Reoptimized following functions with mmi. 1. ff_simple_idct_put_8_mmi 2. ff_simple_idct_add_8_mmi 3. ff_simple_idct_8_mmi Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
This commit is contained in:
parent
1124df0397
commit
df13b75aa1
@ -20,6 +20,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "idctdsp_mips.h"
|
#include "idctdsp_mips.h"
|
||||||
|
#include "xvididct_mips.h"
|
||||||
|
|
||||||
#if HAVE_MSA
|
#if HAVE_MSA
|
||||||
static av_cold void idctdsp_init_msa(IDCTDSPContext *c, AVCodecContext *avctx,
|
static av_cold void idctdsp_init_msa(IDCTDSPContext *c, AVCodecContext *avctx,
|
||||||
@ -48,8 +49,10 @@ static av_cold void idctdsp_init_mmi(IDCTDSPContext *c, AVCodecContext *avctx,
|
|||||||
if ((avctx->lowres != 1) && (avctx->lowres != 2) && (avctx->lowres != 3) &&
|
if ((avctx->lowres != 1) && (avctx->lowres != 2) && (avctx->lowres != 3) &&
|
||||||
(avctx->bits_per_raw_sample != 10) &&
|
(avctx->bits_per_raw_sample != 10) &&
|
||||||
(avctx->bits_per_raw_sample != 12) &&
|
(avctx->bits_per_raw_sample != 12) &&
|
||||||
(avctx->idct_algo == FF_IDCT_AUTO)) {
|
((avctx->idct_algo == FF_IDCT_AUTO) || (avctx->idct_algo == FF_IDCT_SIMPLE))) {
|
||||||
c->idct = ff_simple_idct_mmi;
|
c->idct_put = ff_simple_idct_put_8_mmi;
|
||||||
|
c->idct_add = ff_simple_idct_add_8_mmi;
|
||||||
|
c->idct = ff_simple_idct_8_mmi;
|
||||||
c->perm_type = FF_IDCT_PERM_NONE;
|
c->perm_type = FF_IDCT_PERM_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -46,8 +46,8 @@ void ff_put_signed_pixels_clamped_mmi(const int16_t *block,
|
|||||||
uint8_t *av_restrict pixels, ptrdiff_t line_size);
|
uint8_t *av_restrict pixels, ptrdiff_t line_size);
|
||||||
void ff_add_pixels_clamped_mmi(const int16_t *block,
|
void ff_add_pixels_clamped_mmi(const int16_t *block,
|
||||||
uint8_t *av_restrict pixels, ptrdiff_t line_size);
|
uint8_t *av_restrict pixels, ptrdiff_t line_size);
|
||||||
void ff_simple_idct_mmi(int16_t *block);
|
void ff_simple_idct_8_mmi(int16_t *block);
|
||||||
void ff_simple_idct_put_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
|
void ff_simple_idct_put_8_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
|
||||||
void ff_simple_idct_add_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
|
void ff_simple_idct_add_8_mmi(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
|
||||||
|
|
||||||
#endif // #ifndef AVCODEC_MIPS_IDCTDSP_MIPS_H
|
#endif // #ifndef AVCODEC_MIPS_IDCTDSP_MIPS_H
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -201,6 +201,55 @@
|
|||||||
|
|
||||||
#endif /* HAVE_LOONGSON2 */
|
#endif /* HAVE_LOONGSON2 */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* backup register
|
||||||
|
*/
|
||||||
|
#define BACKUP_REG \
|
||||||
|
double temp_backup_reg[8]; \
|
||||||
|
if (_MIPS_SIM == _ABI64) \
|
||||||
|
__asm__ volatile ( \
|
||||||
|
"gssqc1 $f25, $f24, 0x00(%[temp]) \n\t" \
|
||||||
|
"gssqc1 $f27, $f26, 0x10(%[temp]) \n\t" \
|
||||||
|
"gssqc1 $f29, $f28, 0x20(%[temp]) \n\t" \
|
||||||
|
"gssqc1 $f31, $f30, 0x30(%[temp]) \n\t" \
|
||||||
|
: \
|
||||||
|
: [temp]"r"(temp_backup_reg) \
|
||||||
|
: "memory" \
|
||||||
|
); \
|
||||||
|
else \
|
||||||
|
__asm__ volatile ( \
|
||||||
|
"gssqc1 $f22, $f20, 0x00(%[temp]) \n\t" \
|
||||||
|
"gssqc1 $f26, $f24, 0x10(%[temp]) \n\t" \
|
||||||
|
"gssqc1 $f30, $f28, 0x20(%[temp]) \n\t" \
|
||||||
|
: \
|
||||||
|
: [temp]"r"(temp_backup_reg) \
|
||||||
|
: "memory" \
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* recover register
|
||||||
|
*/
|
||||||
|
#define RECOVER_REG \
|
||||||
|
if (_MIPS_SIM == _ABI64) \
|
||||||
|
__asm__ volatile ( \
|
||||||
|
"gslqc1 $f25, $f24, 0x00(%[temp]) \n\t" \
|
||||||
|
"gslqc1 $f27, $f26, 0x10(%[temp]) \n\t" \
|
||||||
|
"gslqc1 $f29, $f28, 0x20(%[temp]) \n\t" \
|
||||||
|
"gslqc1 $f31, $f30, 0x30(%[temp]) \n\t" \
|
||||||
|
: \
|
||||||
|
: [temp]"r"(temp_backup_reg) \
|
||||||
|
: "memory" \
|
||||||
|
); \
|
||||||
|
else \
|
||||||
|
__asm__ volatile ( \
|
||||||
|
"gslqc1 $f22, $f20, 0x00(%[temp]) \n\t" \
|
||||||
|
"gslqc1 $f26, $f24, 0x10(%[temp]) \n\t" \
|
||||||
|
"gslqc1 $f30, $f28, 0x20(%[temp]) \n\t" \
|
||||||
|
: \
|
||||||
|
: [temp]"r"(temp_backup_reg) \
|
||||||
|
: "memory" \
|
||||||
|
);
|
||||||
|
|
||||||
#define TRANSPOSE_4H(m1, m2, m3, m4, t1, t2, t3, t4, t5, r1, zero, shift) \
|
#define TRANSPOSE_4H(m1, m2, m3, m4, t1, t2, t3, t4, t5, r1, zero, shift) \
|
||||||
"li "#r1", 0x93 \n\t" \
|
"li "#r1", 0x93 \n\t" \
|
||||||
"xor "#zero","#zero","#zero" \n\t" \
|
"xor "#zero","#zero","#zero" \n\t" \
|
||||||
|
Loading…
Reference in New Issue
Block a user