mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
Merge commit '65d5d5865845f057cc6530a8d0f34db952d9009c'
* commit '65d5d5865845f057cc6530a8d0f34db952d9009c': dsputil: Move SVQ1 encoding specific bits into svq1enc Conflicts: libavcodec/x86/Makefile Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
commit
ea0931fb96
@ -2216,16 +2216,6 @@ static int vsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
|
|||||||
return score;
|
return score;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
|
|
||||||
int size)
|
|
||||||
{
|
|
||||||
int score = 0, i;
|
|
||||||
|
|
||||||
for (i = 0; i < size; i++)
|
|
||||||
score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
|
|
||||||
return score;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define WRAPPER8_16_SQ(name8, name16) \
|
#define WRAPPER8_16_SQ(name8, name16) \
|
||||||
static int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src, \
|
static int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src, \
|
||||||
int stride, int h) \
|
int stride, int h) \
|
||||||
@ -2626,8 +2616,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
|
|||||||
ff_dsputil_init_dwt(c);
|
ff_dsputil_init_dwt(c);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
|
|
||||||
|
|
||||||
c->bswap_buf = bswap_buf;
|
c->bswap_buf = bswap_buf;
|
||||||
c->bswap16_buf = bswap16_buf;
|
c->bswap16_buf = bswap16_buf;
|
||||||
|
|
||||||
|
@ -177,9 +177,6 @@ typedef struct DSPContext {
|
|||||||
me_cmp_func ildct_cmp[6]; // only width 16 used
|
me_cmp_func ildct_cmp[6]; // only width 16 used
|
||||||
me_cmp_func frame_skip_cmp[6]; // only width 8 used
|
me_cmp_func frame_skip_cmp[6]; // only width 8 used
|
||||||
|
|
||||||
int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
|
|
||||||
int size);
|
|
||||||
|
|
||||||
qpel_mc_func put_qpel_pixels_tab[2][16];
|
qpel_mc_func put_qpel_pixels_tab[2][16];
|
||||||
qpel_mc_func avg_qpel_pixels_tab[2][16];
|
qpel_mc_func avg_qpel_pixels_tab[2][16];
|
||||||
qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
|
qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
|
||||||
|
@ -12,6 +12,7 @@ OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o
|
|||||||
OBJS-$(CONFIG_VIDEODSP) += ppc/videodsp_ppc.o
|
OBJS-$(CONFIG_VIDEODSP) += ppc/videodsp_ppc.o
|
||||||
OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o
|
OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o
|
||||||
|
|
||||||
|
OBJS-$(CONFIG_SVQ1_ENCODER) += ppc/svq1enc_altivec.o
|
||||||
OBJS-$(CONFIG_VC1_DECODER) += ppc/vc1dsp_altivec.o
|
OBJS-$(CONFIG_VC1_DECODER) += ppc/vc1dsp_altivec.o
|
||||||
OBJS-$(CONFIG_VORBIS_DECODER) += ppc/vorbisdsp_altivec.o
|
OBJS-$(CONFIG_VORBIS_DECODER) += ppc/vorbisdsp_altivec.o
|
||||||
OBJS-$(CONFIG_VP7_DECODER) += ppc/vp8dsp_altivec.o
|
OBJS-$(CONFIG_VP7_DECODER) += ppc/vp8dsp_altivec.o
|
||||||
|
@ -34,48 +34,6 @@
|
|||||||
#include "libavcodec/dsputil.h"
|
#include "libavcodec/dsputil.h"
|
||||||
#include "dsputil_altivec.h"
|
#include "dsputil_altivec.h"
|
||||||
|
|
||||||
static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,
|
|
||||||
int size)
|
|
||||||
{
|
|
||||||
int i, size16 = size >> 4;
|
|
||||||
vector signed char vpix1;
|
|
||||||
vector signed short vpix2, vdiff, vpix1l, vpix1h;
|
|
||||||
union {
|
|
||||||
vector signed int vscore;
|
|
||||||
int32_t score[4];
|
|
||||||
} u = { .vscore = vec_splat_s32(0) };
|
|
||||||
|
|
||||||
// XXX lazy way, fix it later
|
|
||||||
|
|
||||||
while (size16) {
|
|
||||||
// score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
|
|
||||||
// load pix1 and the first batch of pix2
|
|
||||||
|
|
||||||
vpix1 = vec_unaligned_load(pix1);
|
|
||||||
vpix2 = vec_unaligned_load(pix2);
|
|
||||||
pix2 += 8;
|
|
||||||
// unpack
|
|
||||||
vpix1h = vec_unpackh(vpix1);
|
|
||||||
vdiff = vec_sub(vpix1h, vpix2);
|
|
||||||
vpix1l = vec_unpackl(vpix1);
|
|
||||||
// load another batch from pix2
|
|
||||||
vpix2 = vec_unaligned_load(pix2);
|
|
||||||
u.vscore = vec_msum(vdiff, vdiff, u.vscore);
|
|
||||||
vdiff = vec_sub(vpix1l, vpix2);
|
|
||||||
u.vscore = vec_msum(vdiff, vdiff, u.vscore);
|
|
||||||
pix1 += 16;
|
|
||||||
pix2 += 8;
|
|
||||||
size16--;
|
|
||||||
}
|
|
||||||
u.vscore = vec_sums(u.vscore, vec_splat_s32(0));
|
|
||||||
|
|
||||||
size %= 16;
|
|
||||||
for (i = 0; i < size; i++)
|
|
||||||
u.score[3] += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
|
|
||||||
|
|
||||||
return u.score[3];
|
|
||||||
}
|
|
||||||
|
|
||||||
static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2,
|
static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2,
|
||||||
int order)
|
int order)
|
||||||
{
|
{
|
||||||
@ -140,8 +98,6 @@ static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1,
|
|||||||
|
|
||||||
av_cold void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx)
|
av_cold void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx)
|
||||||
{
|
{
|
||||||
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec;
|
|
||||||
|
|
||||||
c->scalarproduct_int16 = scalarproduct_int16_altivec;
|
c->scalarproduct_int16 = scalarproduct_int16_altivec;
|
||||||
|
|
||||||
c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_altivec;
|
c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_altivec;
|
||||||
|
80
libavcodec/ppc/svq1enc_altivec.c
Normal file
80
libavcodec/ppc/svq1enc_altivec.c
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2007 Luca Barbato <lu_zero@gentoo.org>
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
#if HAVE_ALTIVEC_H
|
||||||
|
#include <altivec.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
|
#include "libavutil/ppc/types_altivec.h"
|
||||||
|
#include "libavutil/ppc/util_altivec.h"
|
||||||
|
#include "libavcodec/svq1enc.h"
|
||||||
|
|
||||||
|
#if HAVE_ALTIVEC
|
||||||
|
static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,
|
||||||
|
int size)
|
||||||
|
{
|
||||||
|
int i, size16 = size >> 4;
|
||||||
|
vector signed char vpix1;
|
||||||
|
vector signed short vpix2, vdiff, vpix1l, vpix1h;
|
||||||
|
union {
|
||||||
|
vector signed int vscore;
|
||||||
|
int32_t score[4];
|
||||||
|
} u = { .vscore = vec_splat_s32(0) };
|
||||||
|
|
||||||
|
while (size16) {
|
||||||
|
// score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
|
||||||
|
// load pix1 and the first batch of pix2
|
||||||
|
|
||||||
|
vpix1 = vec_unaligned_load(pix1);
|
||||||
|
vpix2 = vec_unaligned_load(pix2);
|
||||||
|
pix2 += 8;
|
||||||
|
// unpack
|
||||||
|
vpix1h = vec_unpackh(vpix1);
|
||||||
|
vdiff = vec_sub(vpix1h, vpix2);
|
||||||
|
vpix1l = vec_unpackl(vpix1);
|
||||||
|
// load another batch from pix2
|
||||||
|
vpix2 = vec_unaligned_load(pix2);
|
||||||
|
u.vscore = vec_msum(vdiff, vdiff, u.vscore);
|
||||||
|
vdiff = vec_sub(vpix1l, vpix2);
|
||||||
|
u.vscore = vec_msum(vdiff, vdiff, u.vscore);
|
||||||
|
pix1 += 16;
|
||||||
|
pix2 += 8;
|
||||||
|
size16--;
|
||||||
|
}
|
||||||
|
u.vscore = vec_sums(u.vscore, vec_splat_s32(0));
|
||||||
|
|
||||||
|
size %= 16;
|
||||||
|
for (i = 0; i < size; i++)
|
||||||
|
u.score[3] += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
|
||||||
|
|
||||||
|
return u.score[3];
|
||||||
|
}
|
||||||
|
#endif /* HAVE_ALTIVEC */
|
||||||
|
|
||||||
|
av_cold void ff_svq1enc_init_ppc(SVQ1EncContext *c)
|
||||||
|
{
|
||||||
|
#if HAVE_ALTIVEC
|
||||||
|
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec;
|
||||||
|
#endif /* HAVE_ALTIVEC */
|
||||||
|
}
|
@ -34,48 +34,11 @@
|
|||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
#include "mpegutils.h"
|
#include "mpegutils.h"
|
||||||
#include "svq1.h"
|
#include "svq1.h"
|
||||||
|
#include "svq1enc.h"
|
||||||
#include "svq1enc_cb.h"
|
#include "svq1enc_cb.h"
|
||||||
#include "libavutil/avassert.h"
|
#include "libavutil/avassert.h"
|
||||||
|
|
||||||
|
|
||||||
typedef struct SVQ1EncContext {
|
|
||||||
/* FIXME: Needed for motion estimation, should not be used for anything
|
|
||||||
* else, the idea is to make the motion estimation eventually independent
|
|
||||||
* of MpegEncContext, so this will be removed then. */
|
|
||||||
MpegEncContext m;
|
|
||||||
AVCodecContext *avctx;
|
|
||||||
DSPContext dsp;
|
|
||||||
HpelDSPContext hdsp;
|
|
||||||
AVFrame *current_picture;
|
|
||||||
AVFrame *last_picture;
|
|
||||||
PutBitContext pb;
|
|
||||||
GetBitContext gb;
|
|
||||||
|
|
||||||
/* why ooh why this sick breadth first order,
|
|
||||||
* everything is slower and more complex */
|
|
||||||
PutBitContext reorder_pb[6];
|
|
||||||
|
|
||||||
int frame_width;
|
|
||||||
int frame_height;
|
|
||||||
|
|
||||||
/* Y plane block dimensions */
|
|
||||||
int y_block_width;
|
|
||||||
int y_block_height;
|
|
||||||
|
|
||||||
/* U & V plane (C planes) block dimensions */
|
|
||||||
int c_block_width;
|
|
||||||
int c_block_height;
|
|
||||||
|
|
||||||
uint16_t *mb_type;
|
|
||||||
uint32_t *dummy;
|
|
||||||
int16_t (*motion_val8[3])[2];
|
|
||||||
int16_t (*motion_val16[3])[2];
|
|
||||||
|
|
||||||
int64_t rd_total;
|
|
||||||
|
|
||||||
uint8_t *scratchbuf;
|
|
||||||
} SVQ1EncContext;
|
|
||||||
|
|
||||||
static void svq1_write_header(SVQ1EncContext *s, int frame_type)
|
static void svq1_write_header(SVQ1EncContext *s, int frame_type)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
@ -113,6 +76,16 @@ static void svq1_write_header(SVQ1EncContext *s, int frame_type)
|
|||||||
#define QUALITY_THRESHOLD 100
|
#define QUALITY_THRESHOLD 100
|
||||||
#define THRESHOLD_MULTIPLIER 0.6
|
#define THRESHOLD_MULTIPLIER 0.6
|
||||||
|
|
||||||
|
static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
|
||||||
|
int size)
|
||||||
|
{
|
||||||
|
int score = 0, i;
|
||||||
|
|
||||||
|
for (i = 0; i < size; i++)
|
||||||
|
score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
|
||||||
|
return score;
|
||||||
|
}
|
||||||
|
|
||||||
static int encode_block(SVQ1EncContext *s, uint8_t *src, uint8_t *ref,
|
static int encode_block(SVQ1EncContext *s, uint8_t *src, uint8_t *ref,
|
||||||
uint8_t *decoded, int stride, int level,
|
uint8_t *decoded, int stride, int level,
|
||||||
int threshold, int lambda, int intra)
|
int threshold, int lambda, int intra)
|
||||||
@ -174,7 +147,7 @@ static int encode_block(SVQ1EncContext *s, uint8_t *src, uint8_t *ref,
|
|||||||
int sqr, diff, score;
|
int sqr, diff, score;
|
||||||
|
|
||||||
vector = codebook + stage * size * 16 + i * size;
|
vector = codebook + stage * size * 16 + i * size;
|
||||||
sqr = s->dsp.ssd_int8_vs_int16(vector, block[stage], size);
|
sqr = s->ssd_int8_vs_int16(vector, block[stage], size);
|
||||||
diff = block_sum[stage] - sum;
|
diff = block_sum[stage] - sum;
|
||||||
score = sqr - (diff * (int64_t)diff >> (level + 3)); // FIXME: 64bit slooow
|
score = sqr - (diff * (int64_t)diff >> (level + 3)); // FIXME: 64bit slooow
|
||||||
if (score < best_vector_score) {
|
if (score < best_vector_score) {
|
||||||
@ -580,6 +553,13 @@ static av_cold int svq1_encode_init(AVCodecContext *avctx)
|
|||||||
s->y_block_height * sizeof(int16_t));
|
s->y_block_height * sizeof(int16_t));
|
||||||
s->dummy = av_mallocz((s->y_block_width + 1) *
|
s->dummy = av_mallocz((s->y_block_width + 1) *
|
||||||
s->y_block_height * sizeof(int32_t));
|
s->y_block_height * sizeof(int32_t));
|
||||||
|
s->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
|
||||||
|
|
||||||
|
if (ARCH_PPC)
|
||||||
|
ff_svq1enc_init_ppc(s);
|
||||||
|
if (ARCH_X86)
|
||||||
|
ff_svq1enc_init_x86(s);
|
||||||
|
|
||||||
ff_h263_encode_init(&s->m); // mv_penalty
|
ff_h263_encode_init(&s->m); // mv_penalty
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
78
libavcodec/svq1enc.h
Normal file
78
libavcodec/svq1enc.h
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
/*
|
||||||
|
* SVQ1 encoder
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef AVCODEC_SVQ1ENC_H
|
||||||
|
#define AVCODEC_SVQ1ENC_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "libavutil/frame.h"
|
||||||
|
#include "avcodec.h"
|
||||||
|
#include "dsputil.h"
|
||||||
|
#include "get_bits.h"
|
||||||
|
#include "hpeldsp.h"
|
||||||
|
#include "mpegvideo.h"
|
||||||
|
#include "put_bits.h"
|
||||||
|
|
||||||
|
typedef struct SVQ1EncContext {
|
||||||
|
/* FIXME: Needed for motion estimation, should not be used for anything
|
||||||
|
* else, the idea is to make the motion estimation eventually independent
|
||||||
|
* of MpegEncContext, so this will be removed then. */
|
||||||
|
MpegEncContext m;
|
||||||
|
AVCodecContext *avctx;
|
||||||
|
DSPContext dsp;
|
||||||
|
HpelDSPContext hdsp;
|
||||||
|
AVFrame *current_picture;
|
||||||
|
AVFrame *last_picture;
|
||||||
|
PutBitContext pb;
|
||||||
|
GetBitContext gb;
|
||||||
|
|
||||||
|
/* why ooh why this sick breadth first order,
|
||||||
|
* everything is slower and more complex */
|
||||||
|
PutBitContext reorder_pb[6];
|
||||||
|
|
||||||
|
int frame_width;
|
||||||
|
int frame_height;
|
||||||
|
|
||||||
|
/* Y plane block dimensions */
|
||||||
|
int y_block_width;
|
||||||
|
int y_block_height;
|
||||||
|
|
||||||
|
/* U & V plane (C planes) block dimensions */
|
||||||
|
int c_block_width;
|
||||||
|
int c_block_height;
|
||||||
|
|
||||||
|
uint16_t *mb_type;
|
||||||
|
uint32_t *dummy;
|
||||||
|
int16_t (*motion_val8[3])[2];
|
||||||
|
int16_t (*motion_val16[3])[2];
|
||||||
|
|
||||||
|
int64_t rd_total;
|
||||||
|
|
||||||
|
uint8_t *scratchbuf;
|
||||||
|
|
||||||
|
int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
|
||||||
|
int size);
|
||||||
|
} SVQ1EncContext;
|
||||||
|
|
||||||
|
void ff_svq1enc_init_ppc(SVQ1EncContext *c);
|
||||||
|
void ff_svq1enc_init_x86(SVQ1EncContext *c);
|
||||||
|
|
||||||
|
#endif /* AVCODEC_SVQ1ENC_H */
|
@ -59,6 +59,7 @@ MMX-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_mmx.o
|
|||||||
|
|
||||||
MMX-OBJS-$(CONFIG_SNOW_DECODER) += x86/snowdsp.o
|
MMX-OBJS-$(CONFIG_SNOW_DECODER) += x86/snowdsp.o
|
||||||
MMX-OBJS-$(CONFIG_SNOW_ENCODER) += x86/snowdsp.o
|
MMX-OBJS-$(CONFIG_SNOW_ENCODER) += x86/snowdsp.o
|
||||||
|
MMX-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_mmx.o
|
||||||
MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o
|
MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o
|
||||||
|
|
||||||
YASM-OBJS += x86/deinterlace.o \
|
YASM-OBJS += x86/deinterlace.o \
|
||||||
|
@ -703,40 +703,6 @@ static int vsad16_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
|
|||||||
#undef SUM
|
#undef SUM
|
||||||
|
|
||||||
|
|
||||||
static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2,
|
|
||||||
int size)
|
|
||||||
{
|
|
||||||
int sum;
|
|
||||||
x86_reg i = size;
|
|
||||||
|
|
||||||
__asm__ volatile (
|
|
||||||
"pxor %%mm4, %%mm4 \n"
|
|
||||||
"1: \n"
|
|
||||||
"sub $8, %0 \n"
|
|
||||||
"movq (%2, %0), %%mm2 \n"
|
|
||||||
"movq (%3, %0, 2), %%mm0 \n"
|
|
||||||
"movq 8(%3, %0, 2), %%mm1 \n"
|
|
||||||
"punpckhbw %%mm2, %%mm3 \n"
|
|
||||||
"punpcklbw %%mm2, %%mm2 \n"
|
|
||||||
"psraw $8, %%mm3 \n"
|
|
||||||
"psraw $8, %%mm2 \n"
|
|
||||||
"psubw %%mm3, %%mm1 \n"
|
|
||||||
"psubw %%mm2, %%mm0 \n"
|
|
||||||
"pmaddwd %%mm1, %%mm1 \n"
|
|
||||||
"pmaddwd %%mm0, %%mm0 \n"
|
|
||||||
"paddd %%mm1, %%mm4 \n"
|
|
||||||
"paddd %%mm0, %%mm4 \n"
|
|
||||||
"jg 1b \n"
|
|
||||||
"movq %%mm4, %%mm3 \n"
|
|
||||||
"psrlq $32, %%mm3 \n"
|
|
||||||
"paddd %%mm3, %%mm4 \n"
|
|
||||||
"movd %%mm4, %1 \n"
|
|
||||||
: "+r" (i), "=r" (sum)
|
|
||||||
: "r" (pix1), "r" (pix2));
|
|
||||||
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define PHADDD(a, t) \
|
#define PHADDD(a, t) \
|
||||||
"movq " #a ", " #t " \n\t" \
|
"movq " #a ", " #t " \n\t" \
|
||||||
"psrlq $32, " #a " \n\t" \
|
"psrlq $32, " #a " \n\t" \
|
||||||
@ -854,8 +820,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
|
|||||||
c->try_8x8basis = try_8x8basis_mmx;
|
c->try_8x8basis = try_8x8basis_mmx;
|
||||||
}
|
}
|
||||||
c->add_8x8basis = add_8x8basis_mmx;
|
c->add_8x8basis = add_8x8basis_mmx;
|
||||||
|
|
||||||
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (INLINE_AMD3DNOW(cpu_flags)) {
|
if (INLINE_AMD3DNOW(cpu_flags)) {
|
||||||
|
73
libavcodec/x86/svq1enc_mmx.c
Normal file
73
libavcodec/x86/svq1enc_mmx.c
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
/*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
|
#include "libavutil/cpu.h"
|
||||||
|
#include "libavutil/x86/asm.h"
|
||||||
|
#include "libavutil/x86/cpu.h"
|
||||||
|
#include "libavcodec/svq1enc.h"
|
||||||
|
|
||||||
|
#if HAVE_INLINE_ASM
|
||||||
|
|
||||||
|
static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2,
|
||||||
|
int size)
|
||||||
|
{
|
||||||
|
int sum;
|
||||||
|
x86_reg i = size;
|
||||||
|
|
||||||
|
__asm__ volatile (
|
||||||
|
"pxor %%mm4, %%mm4 \n"
|
||||||
|
"1: \n"
|
||||||
|
"sub $8, %0 \n"
|
||||||
|
"movq (%2, %0), %%mm2 \n"
|
||||||
|
"movq (%3, %0, 2), %%mm0 \n"
|
||||||
|
"movq 8(%3, %0, 2), %%mm1 \n"
|
||||||
|
"punpckhbw %%mm2, %%mm3 \n"
|
||||||
|
"punpcklbw %%mm2, %%mm2 \n"
|
||||||
|
"psraw $8, %%mm3 \n"
|
||||||
|
"psraw $8, %%mm2 \n"
|
||||||
|
"psubw %%mm3, %%mm1 \n"
|
||||||
|
"psubw %%mm2, %%mm0 \n"
|
||||||
|
"pmaddwd %%mm1, %%mm1 \n"
|
||||||
|
"pmaddwd %%mm0, %%mm0 \n"
|
||||||
|
"paddd %%mm1, %%mm4 \n"
|
||||||
|
"paddd %%mm0, %%mm4 \n"
|
||||||
|
"jg 1b \n"
|
||||||
|
"movq %%mm4, %%mm3 \n"
|
||||||
|
"psrlq $32, %%mm3 \n"
|
||||||
|
"paddd %%mm3, %%mm4 \n"
|
||||||
|
"movd %%mm4, %1 \n"
|
||||||
|
: "+r" (i), "=r" (sum)
|
||||||
|
: "r" (pix1), "r" (pix2));
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* HAVE_INLINE_ASM */
|
||||||
|
|
||||||
|
av_cold void ff_svq1enc_init_x86(SVQ1EncContext *c)
|
||||||
|
{
|
||||||
|
#if HAVE_INLINE_ASM
|
||||||
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
if (INLINE_MMX(cpu_flags)) {
|
||||||
|
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
|
||||||
|
}
|
||||||
|
#endif /* HAVE_INLINE_ASM */
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user