mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
dsputil: Split off *_8x8basis to a separate context
This commit is contained in:
parent
a7985cfd4c
commit
8d686ca59d
@ -67,7 +67,8 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += mpegaudiodsp.o \
|
|||||||
OBJS-$(CONFIG_MPEGVIDEO) += mpegvideo.o mpegvideodsp.o \
|
OBJS-$(CONFIG_MPEGVIDEO) += mpegvideo.o mpegvideodsp.o \
|
||||||
mpegvideo_motion.o mpegutils.o
|
mpegvideo_motion.o mpegutils.o
|
||||||
OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \
|
OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \
|
||||||
motion_est.o ratecontrol.o
|
motion_est.o ratecontrol.o \
|
||||||
|
mpegvideoencdsp.o
|
||||||
OBJS-$(CONFIG_QPELDSP) += qpeldsp.o
|
OBJS-$(CONFIG_QPELDSP) += qpeldsp.o
|
||||||
OBJS-$(CONFIG_RANGECODER) += rangecoder.o
|
OBJS-$(CONFIG_RANGECODER) += rangecoder.o
|
||||||
RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o
|
RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o
|
||||||
|
@ -464,35 +464,6 @@ static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int
|
|||||||
return score1 + FFABS(score2) * 8;
|
return score1 + FFABS(score2) * 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int try_8x8basis_c(int16_t rem[64], int16_t weight[64],
|
|
||||||
int16_t basis[64], int scale)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
unsigned int sum = 0;
|
|
||||||
|
|
||||||
for (i = 0; i < 8 * 8; i++) {
|
|
||||||
int b = rem[i] + ((basis[i] * scale +
|
|
||||||
(1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >>
|
|
||||||
(BASIS_SHIFT - RECON_SHIFT));
|
|
||||||
int w = weight[i];
|
|
||||||
b >>= RECON_SHIFT;
|
|
||||||
assert(-512 < b && b < 512);
|
|
||||||
|
|
||||||
sum += (w * b) * (w * b) >> 4;
|
|
||||||
}
|
|
||||||
return sum >> 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < 8 * 8; i++)
|
|
||||||
rem[i] += (basis[i] * scale +
|
|
||||||
(1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >>
|
|
||||||
(BASIS_SHIFT - RECON_SHIFT);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int zero_cmp(MpegEncContext *s, uint8_t *a, uint8_t *b,
|
static int zero_cmp(MpegEncContext *s, uint8_t *a, uint8_t *b,
|
||||||
int stride, int h)
|
int stride, int h)
|
||||||
{
|
{
|
||||||
@ -1126,9 +1097,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
|
|||||||
c->nsse[0] = nsse16_c;
|
c->nsse[0] = nsse16_c;
|
||||||
c->nsse[1] = nsse8_c;
|
c->nsse[1] = nsse8_c;
|
||||||
|
|
||||||
c->try_8x8basis = try_8x8basis_c;
|
|
||||||
c->add_8x8basis = add_8x8basis_c;
|
|
||||||
|
|
||||||
c->shrink[0] = av_image_copy_plane;
|
c->shrink[0] = av_image_copy_plane;
|
||||||
c->shrink[1] = ff_shrink22;
|
c->shrink[1] = ff_shrink22;
|
||||||
c->shrink[2] = ff_shrink44;
|
c->shrink[2] = ff_shrink44;
|
||||||
|
@ -87,12 +87,6 @@ typedef struct DSPContext {
|
|||||||
void (*fdct)(int16_t *block /* align 16 */);
|
void (*fdct)(int16_t *block /* align 16 */);
|
||||||
void (*fdct248)(int16_t *block /* align 16 */);
|
void (*fdct248)(int16_t *block /* align 16 */);
|
||||||
|
|
||||||
int (*try_8x8basis)(int16_t rem[64], int16_t weight[64],
|
|
||||||
int16_t basis[64], int scale);
|
|
||||||
void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
|
|
||||||
#define BASIS_SHIFT 16
|
|
||||||
#define RECON_SHIFT 6
|
|
||||||
|
|
||||||
void (*draw_edges)(uint8_t *buf, int wrap, int width, int height,
|
void (*draw_edges)(uint8_t *buf, int wrap, int width, int height,
|
||||||
int w, int h, int sides);
|
int w, int h, int sides);
|
||||||
#define EDGE_WIDTH 16
|
#define EDGE_WIDTH 16
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
#include "hpeldsp.h"
|
#include "hpeldsp.h"
|
||||||
#include "idctdsp.h"
|
#include "idctdsp.h"
|
||||||
#include "mpegvideodsp.h"
|
#include "mpegvideodsp.h"
|
||||||
|
#include "mpegvideoencdsp.h"
|
||||||
#include "put_bits.h"
|
#include "put_bits.h"
|
||||||
#include "ratecontrol.h"
|
#include "ratecontrol.h"
|
||||||
#include "parser.h"
|
#include "parser.h"
|
||||||
@ -355,6 +356,7 @@ typedef struct MpegEncContext {
|
|||||||
HpelDSPContext hdsp;
|
HpelDSPContext hdsp;
|
||||||
IDCTDSPContext idsp;
|
IDCTDSPContext idsp;
|
||||||
MpegVideoDSPContext mdsp;
|
MpegVideoDSPContext mdsp;
|
||||||
|
MpegvideoEncDSPContext mpvencdsp;
|
||||||
QpelDSPContext qdsp;
|
QpelDSPContext qdsp;
|
||||||
VideoDSPContext vdsp;
|
VideoDSPContext vdsp;
|
||||||
H263DSPContext h263dsp;
|
H263DSPContext h263dsp;
|
||||||
|
@ -701,6 +701,7 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
|
|||||||
if (ARCH_X86)
|
if (ARCH_X86)
|
||||||
ff_MPV_encode_init_x86(s);
|
ff_MPV_encode_init_x86(s);
|
||||||
|
|
||||||
|
ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
|
||||||
ff_qpeldsp_init(&s->qdsp);
|
ff_qpeldsp_init(&s->qdsp);
|
||||||
|
|
||||||
s->avctx->coded_frame = s->current_picture.f;
|
s->avctx->coded_frame = s->current_picture.f;
|
||||||
@ -3871,7 +3872,7 @@ STOP_TIMER("memset rem[]")}
|
|||||||
run_tab[rle_index++]=run;
|
run_tab[rle_index++]=run;
|
||||||
run=0;
|
run=0;
|
||||||
|
|
||||||
s->dsp.add_8x8basis(rem, basis[j], coeff);
|
s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
|
||||||
}else{
|
}else{
|
||||||
run++;
|
run++;
|
||||||
}
|
}
|
||||||
@ -3885,7 +3886,7 @@ STOP_TIMER("init rem[]")
|
|||||||
{START_TIMER
|
{START_TIMER
|
||||||
#endif
|
#endif
|
||||||
for(;;){
|
for(;;){
|
||||||
int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
|
int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
|
||||||
int best_coeff=0;
|
int best_coeff=0;
|
||||||
int best_change=0;
|
int best_change=0;
|
||||||
int run2, best_unquant_change=0, analyze_gradient;
|
int run2, best_unquant_change=0, analyze_gradient;
|
||||||
@ -3929,7 +3930,8 @@ STOP_TIMER("dct")}
|
|||||||
if(new_coeff >= 2048 || new_coeff < 0)
|
if(new_coeff >= 2048 || new_coeff < 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
|
score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
|
||||||
|
new_coeff - old_coeff);
|
||||||
if(score<best_score){
|
if(score<best_score){
|
||||||
best_score= score;
|
best_score= score;
|
||||||
best_coeff= 0;
|
best_coeff= 0;
|
||||||
@ -4052,7 +4054,8 @@ STOP_TIMER("dct")}
|
|||||||
unquant_change= new_coeff - old_coeff;
|
unquant_change= new_coeff - old_coeff;
|
||||||
assert((score < 100*lambda && score > -100*lambda) || lambda==0);
|
assert((score < 100*lambda && score > -100*lambda) || lambda==0);
|
||||||
|
|
||||||
score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
|
score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
|
||||||
|
unquant_change);
|
||||||
if(score<best_score){
|
if(score<best_score){
|
||||||
best_score= score;
|
best_score= score;
|
||||||
best_coeff= i;
|
best_coeff= i;
|
||||||
@ -4126,7 +4129,7 @@ if(256*256*256*64 % count == 0){
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
|
s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
|
||||||
}else{
|
}else{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
64
libavcodec/mpegvideoencdsp.c
Normal file
64
libavcodec/mpegvideoencdsp.c
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
/*
|
||||||
|
* This file is part of Libav.
|
||||||
|
*
|
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Libav is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
|
#include "avcodec.h"
|
||||||
|
#include "mpegvideoencdsp.h"
|
||||||
|
|
||||||
|
static int try_8x8basis_c(int16_t rem[64], int16_t weight[64],
|
||||||
|
int16_t basis[64], int scale)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
unsigned int sum = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < 8 * 8; i++) {
|
||||||
|
int b = rem[i] + ((basis[i] * scale +
|
||||||
|
(1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >>
|
||||||
|
(BASIS_SHIFT - RECON_SHIFT));
|
||||||
|
int w = weight[i];
|
||||||
|
b >>= RECON_SHIFT;
|
||||||
|
assert(-512 < b && b < 512);
|
||||||
|
|
||||||
|
sum += (w * b) * (w * b) >> 4;
|
||||||
|
}
|
||||||
|
return sum >> 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < 8 * 8; i++)
|
||||||
|
rem[i] += (basis[i] * scale +
|
||||||
|
(1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >>
|
||||||
|
(BASIS_SHIFT - RECON_SHIFT);
|
||||||
|
}
|
||||||
|
|
||||||
|
av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
|
||||||
|
AVCodecContext *avctx)
|
||||||
|
{
|
||||||
|
c->try_8x8basis = try_8x8basis_c;
|
||||||
|
c->add_8x8basis = add_8x8basis_c;
|
||||||
|
|
||||||
|
if (ARCH_X86)
|
||||||
|
ff_mpegvideoencdsp_init_x86(c, avctx);
|
||||||
|
}
|
41
libavcodec/mpegvideoencdsp.h
Normal file
41
libavcodec/mpegvideoencdsp.h
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
/*
|
||||||
|
* This file is part of Libav.
|
||||||
|
*
|
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Libav is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef AVCODEC_MPEGVIDEOENCDSP_H
|
||||||
|
#define AVCODEC_MPEGVIDEOENCDSP_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "avcodec.h"
|
||||||
|
|
||||||
|
#define BASIS_SHIFT 16
|
||||||
|
#define RECON_SHIFT 6
|
||||||
|
|
||||||
|
typedef struct MpegvideoEncDSPContext {
|
||||||
|
int (*try_8x8basis)(int16_t rem[64], int16_t weight[64],
|
||||||
|
int16_t basis[64], int scale);
|
||||||
|
void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
|
||||||
|
|
||||||
|
} MpegvideoEncDSPContext;
|
||||||
|
|
||||||
|
void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
|
||||||
|
AVCodecContext *avctx);
|
||||||
|
void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
|
||||||
|
AVCodecContext *avctx);
|
||||||
|
|
||||||
|
#endif /* AVCODEC_MPEGVIDEOENCDSP_H */
|
@ -23,7 +23,8 @@ OBJS-$(CONFIG_LPC) += x86/lpc.o
|
|||||||
OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodsp.o
|
OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodsp.o
|
||||||
OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o \
|
OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o \
|
||||||
x86/mpegvideodsp.o
|
x86/mpegvideodsp.o
|
||||||
OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o
|
OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o \
|
||||||
|
x86/mpegvideoencdsp_init.o
|
||||||
OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp_init.o
|
OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp_init.o
|
||||||
OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o
|
OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o
|
||||||
OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o
|
OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o
|
||||||
|
@ -805,72 +805,6 @@ DCT_SAD_FUNC(ssse3)
|
|||||||
#undef HSUM
|
#undef HSUM
|
||||||
#undef DCT_SAD
|
#undef DCT_SAD
|
||||||
|
|
||||||
#define PHADDD(a, t) \
|
|
||||||
"movq " #a ", " #t " \n\t" \
|
|
||||||
"psrlq $32, " #a " \n\t" \
|
|
||||||
"paddd " #t ", " #a " \n\t"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* pmulhw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15])[16 - 31]
|
|
||||||
* pmulhrw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x8000)[16 - 31]
|
|
||||||
* pmulhrsw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x4000)[15 - 30]
|
|
||||||
*/
|
|
||||||
#define PMULHRW(x, y, s, o) \
|
|
||||||
"pmulhw " #s ", " #x " \n\t" \
|
|
||||||
"pmulhw " #s ", " #y " \n\t" \
|
|
||||||
"paddw " #o ", " #x " \n\t" \
|
|
||||||
"paddw " #o ", " #y " \n\t" \
|
|
||||||
"psraw $1, " #x " \n\t" \
|
|
||||||
"psraw $1, " #y " \n\t"
|
|
||||||
#define DEF(x) x ## _mmx
|
|
||||||
#define SET_RND MOVQ_WONE
|
|
||||||
#define SCALE_OFFSET 1
|
|
||||||
|
|
||||||
#include "dsputil_qns_template.c"
|
|
||||||
|
|
||||||
#undef DEF
|
|
||||||
#undef SET_RND
|
|
||||||
#undef SCALE_OFFSET
|
|
||||||
#undef PMULHRW
|
|
||||||
|
|
||||||
#define DEF(x) x ## _3dnow
|
|
||||||
#define SET_RND(x)
|
|
||||||
#define SCALE_OFFSET 0
|
|
||||||
#define PMULHRW(x, y, s, o) \
|
|
||||||
"pmulhrw " #s ", " #x " \n\t" \
|
|
||||||
"pmulhrw " #s ", " #y " \n\t"
|
|
||||||
|
|
||||||
#include "dsputil_qns_template.c"
|
|
||||||
|
|
||||||
#undef DEF
|
|
||||||
#undef SET_RND
|
|
||||||
#undef SCALE_OFFSET
|
|
||||||
#undef PMULHRW
|
|
||||||
|
|
||||||
#if HAVE_SSSE3_INLINE
|
|
||||||
#undef PHADDD
|
|
||||||
#define DEF(x) x ## _ssse3
|
|
||||||
#define SET_RND(x)
|
|
||||||
#define SCALE_OFFSET -1
|
|
||||||
|
|
||||||
#define PHADDD(a, t) \
|
|
||||||
"pshufw $0x0E, " #a ", " #t " \n\t" \
|
|
||||||
/* faster than phaddd on core2 */ \
|
|
||||||
"paddd " #t ", " #a " \n\t"
|
|
||||||
|
|
||||||
#define PMULHRW(x, y, s, o) \
|
|
||||||
"pmulhrsw " #s ", " #x " \n\t" \
|
|
||||||
"pmulhrsw " #s ", " #y " \n\t"
|
|
||||||
|
|
||||||
#include "dsputil_qns_template.c"
|
|
||||||
|
|
||||||
#undef DEF
|
|
||||||
#undef SET_RND
|
|
||||||
#undef SCALE_OFFSET
|
|
||||||
#undef PMULHRW
|
|
||||||
#undef PHADDD
|
|
||||||
#endif /* HAVE_SSSE3_INLINE */
|
|
||||||
|
|
||||||
#endif /* HAVE_INLINE_ASM */
|
#endif /* HAVE_INLINE_ASM */
|
||||||
|
|
||||||
int ff_sse16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
|
int ff_sse16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
|
||||||
@ -921,16 +855,7 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
|
|||||||
c->nsse[1] = nsse8_mmx;
|
c->nsse[1] = nsse8_mmx;
|
||||||
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
|
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
|
||||||
c->vsad[0] = vsad16_mmx;
|
c->vsad[0] = vsad16_mmx;
|
||||||
c->try_8x8basis = try_8x8basis_mmx;
|
|
||||||
}
|
}
|
||||||
c->add_8x8basis = add_8x8basis_mmx;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (INLINE_AMD3DNOW(cpu_flags)) {
|
|
||||||
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
|
|
||||||
c->try_8x8basis = try_8x8basis_3dnow;
|
|
||||||
}
|
|
||||||
c->add_8x8basis = add_8x8basis_3dnow;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (INLINE_MMXEXT(cpu_flags)) {
|
if (INLINE_MMXEXT(cpu_flags)) {
|
||||||
@ -956,10 +881,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
|
|||||||
|
|
||||||
#if HAVE_SSSE3_INLINE
|
#if HAVE_SSSE3_INLINE
|
||||||
if (INLINE_SSSE3(cpu_flags)) {
|
if (INLINE_SSSE3(cpu_flags)) {
|
||||||
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
|
|
||||||
c->try_8x8basis = try_8x8basis_ssse3;
|
|
||||||
}
|
|
||||||
c->add_8x8basis = add_8x8basis_ssse3;
|
|
||||||
c->sum_abs_dctelem = sum_abs_dctelem_ssse3;
|
c->sum_abs_dctelem = sum_abs_dctelem_ssse3;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* DSP utils : QNS functions are compiled 3 times for mmx/3dnow/ssse3
|
* QNS functions are compiled 3 times for MMX/3DNOW/SSSE3
|
||||||
* Copyright (c) 2004 Michael Niedermayer
|
* Copyright (c) 2004 Michael Niedermayer
|
||||||
*
|
*
|
||||||
* MMX optimization by Michael Niedermayer <michaelni@gmx.at>
|
* MMX optimization by Michael Niedermayer <michaelni@gmx.at>
|
125
libavcodec/x86/mpegvideoencdsp_init.c
Normal file
125
libavcodec/x86/mpegvideoencdsp_init.c
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
/*
|
||||||
|
* This file is part of Libav.
|
||||||
|
*
|
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Libav is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
|
#include "libavutil/cpu.h"
|
||||||
|
#include "libavutil/x86/cpu.h"
|
||||||
|
#include "libavcodec/avcodec.h"
|
||||||
|
#include "libavcodec/mpegvideoencdsp.h"
|
||||||
|
|
||||||
|
#if HAVE_INLINE_ASM
|
||||||
|
|
||||||
|
#define PHADDD(a, t) \
|
||||||
|
"movq " #a ", " #t " \n\t" \
|
||||||
|
"psrlq $32, " #a " \n\t" \
|
||||||
|
"paddd " #t ", " #a " \n\t"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* pmulhw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15])[16 - 31]
|
||||||
|
* pmulhrw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x8000)[16 - 31]
|
||||||
|
* pmulhrsw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x4000)[15 - 30]
|
||||||
|
*/
|
||||||
|
#define PMULHRW(x, y, s, o) \
|
||||||
|
"pmulhw " #s ", " #x " \n\t" \
|
||||||
|
"pmulhw " #s ", " #y " \n\t" \
|
||||||
|
"paddw " #o ", " #x " \n\t" \
|
||||||
|
"paddw " #o ", " #y " \n\t" \
|
||||||
|
"psraw $1, " #x " \n\t" \
|
||||||
|
"psraw $1, " #y " \n\t"
|
||||||
|
#define DEF(x) x ## _mmx
|
||||||
|
#define SET_RND MOVQ_WONE
|
||||||
|
#define SCALE_OFFSET 1
|
||||||
|
|
||||||
|
#include "mpegvideoenc_qns_template.c"
|
||||||
|
|
||||||
|
#undef DEF
|
||||||
|
#undef SET_RND
|
||||||
|
#undef SCALE_OFFSET
|
||||||
|
#undef PMULHRW
|
||||||
|
|
||||||
|
#define DEF(x) x ## _3dnow
|
||||||
|
#define SET_RND(x)
|
||||||
|
#define SCALE_OFFSET 0
|
||||||
|
#define PMULHRW(x, y, s, o) \
|
||||||
|
"pmulhrw " #s ", " #x " \n\t" \
|
||||||
|
"pmulhrw " #s ", " #y " \n\t"
|
||||||
|
|
||||||
|
#include "mpegvideoenc_qns_template.c"
|
||||||
|
|
||||||
|
#undef DEF
|
||||||
|
#undef SET_RND
|
||||||
|
#undef SCALE_OFFSET
|
||||||
|
#undef PMULHRW
|
||||||
|
|
||||||
|
#if HAVE_SSSE3_INLINE
|
||||||
|
#undef PHADDD
|
||||||
|
#define DEF(x) x ## _ssse3
|
||||||
|
#define SET_RND(x)
|
||||||
|
#define SCALE_OFFSET -1
|
||||||
|
|
||||||
|
#define PHADDD(a, t) \
|
||||||
|
"pshufw $0x0E, " #a ", " #t " \n\t" \
|
||||||
|
/* faster than phaddd on core2 */ \
|
||||||
|
"paddd " #t ", " #a " \n\t"
|
||||||
|
|
||||||
|
#define PMULHRW(x, y, s, o) \
|
||||||
|
"pmulhrsw " #s ", " #x " \n\t" \
|
||||||
|
"pmulhrsw " #s ", " #y " \n\t"
|
||||||
|
|
||||||
|
#include "mpegvideoenc_qns_template.c"
|
||||||
|
|
||||||
|
#undef DEF
|
||||||
|
#undef SET_RND
|
||||||
|
#undef SCALE_OFFSET
|
||||||
|
#undef PMULHRW
|
||||||
|
#undef PHADDD
|
||||||
|
#endif /* HAVE_SSSE3_INLINE */
|
||||||
|
|
||||||
|
#endif /* HAVE_INLINE_ASM */
|
||||||
|
|
||||||
|
av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
|
||||||
|
AVCodecContext *avctx)
|
||||||
|
{
|
||||||
|
#if HAVE_INLINE_ASM
|
||||||
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
if (INLINE_MMX(cpu_flags)) {
|
||||||
|
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
|
||||||
|
c->try_8x8basis = try_8x8basis_mmx;
|
||||||
|
}
|
||||||
|
c->add_8x8basis = add_8x8basis_mmx;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (INLINE_AMD3DNOW(cpu_flags)) {
|
||||||
|
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
|
||||||
|
c->try_8x8basis = try_8x8basis_3dnow;
|
||||||
|
}
|
||||||
|
c->add_8x8basis = add_8x8basis_3dnow;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if HAVE_SSSE3_INLINE
|
||||||
|
if (INLINE_SSSE3(cpu_flags)) {
|
||||||
|
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
|
||||||
|
c->try_8x8basis = try_8x8basis_ssse3;
|
||||||
|
}
|
||||||
|
c->add_8x8basis = add_8x8basis_ssse3;
|
||||||
|
}
|
||||||
|
#endif /* HAVE_SSSE3_INLINE */
|
||||||
|
|
||||||
|
#endif /* HAVE_INLINE_ASM */
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user