mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
dsputil: Move SVQ1 encoding specific bits into svq1enc
This commit is contained in:
parent
2ea2612df5
commit
65d5d58658
@ -2099,16 +2099,6 @@ static int vsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
|
||||
return score;
|
||||
}
|
||||
|
||||
static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
|
||||
int size)
|
||||
{
|
||||
int score = 0, i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
|
||||
return score;
|
||||
}
|
||||
|
||||
#define WRAPPER8_16_SQ(name8, name16) \
|
||||
static int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src, \
|
||||
int stride, int h) \
|
||||
@ -2430,8 +2420,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
|
||||
c->nsse[0] = nsse16_c;
|
||||
c->nsse[1] = nsse8_c;
|
||||
|
||||
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
|
||||
|
||||
c->bswap_buf = bswap_buf;
|
||||
c->bswap16_buf = bswap16_buf;
|
||||
|
||||
|
@ -175,9 +175,6 @@ typedef struct DSPContext {
|
||||
me_cmp_func ildct_cmp[6]; // only width 16 used
|
||||
me_cmp_func frame_skip_cmp[6]; // only width 8 used
|
||||
|
||||
int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
|
||||
int size);
|
||||
|
||||
qpel_mc_func put_qpel_pixels_tab[2][16];
|
||||
qpel_mc_func avg_qpel_pixels_tab[2][16];
|
||||
qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
|
||||
|
@ -12,6 +12,7 @@ OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o
|
||||
OBJS-$(CONFIG_VIDEODSP) += ppc/videodsp_ppc.o
|
||||
OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o
|
||||
|
||||
OBJS-$(CONFIG_SVQ1_ENCODER) += ppc/svq1enc_altivec.o
|
||||
OBJS-$(CONFIG_VC1_DECODER) += ppc/vc1dsp_altivec.o
|
||||
OBJS-$(CONFIG_VORBIS_DECODER) += ppc/vorbisdsp_altivec.o
|
||||
OBJS-$(CONFIG_VP7_DECODER) += ppc/vp8dsp_altivec.o
|
||||
|
@ -34,48 +34,6 @@
|
||||
#include "libavcodec/dsputil.h"
|
||||
#include "dsputil_altivec.h"
|
||||
|
||||
static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,
|
||||
int size)
|
||||
{
|
||||
int i, size16 = size >> 4;
|
||||
vector signed char vpix1;
|
||||
vector signed short vpix2, vdiff, vpix1l, vpix1h;
|
||||
union {
|
||||
vector signed int vscore;
|
||||
int32_t score[4];
|
||||
} u = { .vscore = vec_splat_s32(0) };
|
||||
|
||||
// XXX lazy way, fix it later
|
||||
|
||||
while (size16) {
|
||||
// score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
|
||||
// load pix1 and the first batch of pix2
|
||||
|
||||
vpix1 = vec_unaligned_load(pix1);
|
||||
vpix2 = vec_unaligned_load(pix2);
|
||||
pix2 += 8;
|
||||
// unpack
|
||||
vpix1h = vec_unpackh(vpix1);
|
||||
vdiff = vec_sub(vpix1h, vpix2);
|
||||
vpix1l = vec_unpackl(vpix1);
|
||||
// load another batch from pix2
|
||||
vpix2 = vec_unaligned_load(pix2);
|
||||
u.vscore = vec_msum(vdiff, vdiff, u.vscore);
|
||||
vdiff = vec_sub(vpix1l, vpix2);
|
||||
u.vscore = vec_msum(vdiff, vdiff, u.vscore);
|
||||
pix1 += 16;
|
||||
pix2 += 8;
|
||||
size16--;
|
||||
}
|
||||
u.vscore = vec_sums(u.vscore, vec_splat_s32(0));
|
||||
|
||||
size %= 16;
|
||||
for (i = 0; i < size; i++)
|
||||
u.score[3] += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
|
||||
|
||||
return u.score[3];
|
||||
}
|
||||
|
||||
static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2,
|
||||
int order)
|
||||
{
|
||||
@ -140,8 +98,6 @@ static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1,
|
||||
|
||||
av_cold void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx)
|
||||
{
|
||||
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec;
|
||||
|
||||
c->scalarproduct_int16 = scalarproduct_int16_altivec;
|
||||
|
||||
c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_altivec;
|
||||
|
80
libavcodec/ppc/svq1enc_altivec.c
Normal file
80
libavcodec/ppc/svq1enc_altivec.c
Normal file
@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Luca Barbato <lu_zero@gentoo.org>
|
||||
*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "config.h"
|
||||
#if HAVE_ALTIVEC_H
|
||||
#include <altivec.h>
|
||||
#endif
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/ppc/types_altivec.h"
|
||||
#include "libavutil/ppc/util_altivec.h"
|
||||
#include "libavcodec/svq1enc.h"
|
||||
|
||||
#if HAVE_ALTIVEC
|
||||
static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,
|
||||
int size)
|
||||
{
|
||||
int i, size16 = size >> 4;
|
||||
vector signed char vpix1;
|
||||
vector signed short vpix2, vdiff, vpix1l, vpix1h;
|
||||
union {
|
||||
vector signed int vscore;
|
||||
int32_t score[4];
|
||||
} u = { .vscore = vec_splat_s32(0) };
|
||||
|
||||
while (size16) {
|
||||
// score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
|
||||
// load pix1 and the first batch of pix2
|
||||
|
||||
vpix1 = vec_unaligned_load(pix1);
|
||||
vpix2 = vec_unaligned_load(pix2);
|
||||
pix2 += 8;
|
||||
// unpack
|
||||
vpix1h = vec_unpackh(vpix1);
|
||||
vdiff = vec_sub(vpix1h, vpix2);
|
||||
vpix1l = vec_unpackl(vpix1);
|
||||
// load another batch from pix2
|
||||
vpix2 = vec_unaligned_load(pix2);
|
||||
u.vscore = vec_msum(vdiff, vdiff, u.vscore);
|
||||
vdiff = vec_sub(vpix1l, vpix2);
|
||||
u.vscore = vec_msum(vdiff, vdiff, u.vscore);
|
||||
pix1 += 16;
|
||||
pix2 += 8;
|
||||
size16--;
|
||||
}
|
||||
u.vscore = vec_sums(u.vscore, vec_splat_s32(0));
|
||||
|
||||
size %= 16;
|
||||
for (i = 0; i < size; i++)
|
||||
u.score[3] += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
|
||||
|
||||
return u.score[3];
|
||||
}
|
||||
#endif /* HAVE_ALTIVEC */
|
||||
|
||||
av_cold void ff_svq1enc_init_ppc(SVQ1EncContext *c)
|
||||
{
|
||||
#if HAVE_ALTIVEC
|
||||
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec;
|
||||
#endif /* HAVE_ALTIVEC */
|
||||
}
|
@ -34,49 +34,12 @@
|
||||
#include "internal.h"
|
||||
#include "mpegutils.h"
|
||||
#include "svq1.h"
|
||||
#include "svq1enc.h"
|
||||
#include "svq1enc_cb.h"
|
||||
|
||||
#undef NDEBUG
|
||||
#include <assert.h>
|
||||
|
||||
typedef struct SVQ1EncContext {
|
||||
/* FIXME: Needed for motion estimation, should not be used for anything
|
||||
* else, the idea is to make the motion estimation eventually independent
|
||||
* of MpegEncContext, so this will be removed then. */
|
||||
MpegEncContext m;
|
||||
AVCodecContext *avctx;
|
||||
DSPContext dsp;
|
||||
HpelDSPContext hdsp;
|
||||
AVFrame *current_picture;
|
||||
AVFrame *last_picture;
|
||||
PutBitContext pb;
|
||||
GetBitContext gb;
|
||||
|
||||
/* why ooh why this sick breadth first order,
|
||||
* everything is slower and more complex */
|
||||
PutBitContext reorder_pb[6];
|
||||
|
||||
int frame_width;
|
||||
int frame_height;
|
||||
|
||||
/* Y plane block dimensions */
|
||||
int y_block_width;
|
||||
int y_block_height;
|
||||
|
||||
/* U & V plane (C planes) block dimensions */
|
||||
int c_block_width;
|
||||
int c_block_height;
|
||||
|
||||
uint16_t *mb_type;
|
||||
uint32_t *dummy;
|
||||
int16_t (*motion_val8[3])[2];
|
||||
int16_t (*motion_val16[3])[2];
|
||||
|
||||
int64_t rd_total;
|
||||
|
||||
uint8_t *scratchbuf;
|
||||
} SVQ1EncContext;
|
||||
|
||||
static void svq1_write_header(SVQ1EncContext *s, int frame_type)
|
||||
{
|
||||
int i;
|
||||
@ -114,6 +77,16 @@ static void svq1_write_header(SVQ1EncContext *s, int frame_type)
|
||||
#define QUALITY_THRESHOLD 100
|
||||
#define THRESHOLD_MULTIPLIER 0.6
|
||||
|
||||
static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
|
||||
int size)
|
||||
{
|
||||
int score = 0, i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
|
||||
return score;
|
||||
}
|
||||
|
||||
static int encode_block(SVQ1EncContext *s, uint8_t *src, uint8_t *ref,
|
||||
uint8_t *decoded, int stride, int level,
|
||||
int threshold, int lambda, int intra)
|
||||
@ -175,7 +148,7 @@ static int encode_block(SVQ1EncContext *s, uint8_t *src, uint8_t *ref,
|
||||
int sqr, diff, score;
|
||||
|
||||
vector = codebook + stage * size * 16 + i * size;
|
||||
sqr = s->dsp.ssd_int8_vs_int16(vector, block[stage], size);
|
||||
sqr = s->ssd_int8_vs_int16(vector, block[stage], size);
|
||||
diff = block_sum[stage] - sum;
|
||||
score = sqr - (diff * (int64_t)diff >> (level + 3)); // FIXME: 64bit slooow
|
||||
if (score < best_vector_score) {
|
||||
@ -574,6 +547,13 @@ static av_cold int svq1_encode_init(AVCodecContext *avctx)
|
||||
s->y_block_height * sizeof(int16_t));
|
||||
s->dummy = av_mallocz((s->y_block_width + 1) *
|
||||
s->y_block_height * sizeof(int32_t));
|
||||
s->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
|
||||
|
||||
if (ARCH_PPC)
|
||||
ff_svq1enc_init_ppc(s);
|
||||
if (ARCH_X86)
|
||||
ff_svq1enc_init_x86(s);
|
||||
|
||||
ff_h263_encode_init(&s->m); // mv_penalty
|
||||
|
||||
return 0;
|
||||
|
78
libavcodec/svq1enc.h
Normal file
78
libavcodec/svq1enc.h
Normal file
@ -0,0 +1,78 @@
|
||||
/*
|
||||
* SVQ1 encoder
|
||||
*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVCODEC_SVQ1ENC_H
|
||||
#define AVCODEC_SVQ1ENC_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "libavutil/frame.h"
|
||||
#include "avcodec.h"
|
||||
#include "dsputil.h"
|
||||
#include "get_bits.h"
|
||||
#include "hpeldsp.h"
|
||||
#include "mpegvideo.h"
|
||||
#include "put_bits.h"
|
||||
|
||||
typedef struct SVQ1EncContext {
|
||||
/* FIXME: Needed for motion estimation, should not be used for anything
|
||||
* else, the idea is to make the motion estimation eventually independent
|
||||
* of MpegEncContext, so this will be removed then. */
|
||||
MpegEncContext m;
|
||||
AVCodecContext *avctx;
|
||||
DSPContext dsp;
|
||||
HpelDSPContext hdsp;
|
||||
AVFrame *current_picture;
|
||||
AVFrame *last_picture;
|
||||
PutBitContext pb;
|
||||
GetBitContext gb;
|
||||
|
||||
/* why ooh why this sick breadth first order,
|
||||
* everything is slower and more complex */
|
||||
PutBitContext reorder_pb[6];
|
||||
|
||||
int frame_width;
|
||||
int frame_height;
|
||||
|
||||
/* Y plane block dimensions */
|
||||
int y_block_width;
|
||||
int y_block_height;
|
||||
|
||||
/* U & V plane (C planes) block dimensions */
|
||||
int c_block_width;
|
||||
int c_block_height;
|
||||
|
||||
uint16_t *mb_type;
|
||||
uint32_t *dummy;
|
||||
int16_t (*motion_val8[3])[2];
|
||||
int16_t (*motion_val16[3])[2];
|
||||
|
||||
int64_t rd_total;
|
||||
|
||||
uint8_t *scratchbuf;
|
||||
|
||||
int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
|
||||
int size);
|
||||
} SVQ1EncContext;
|
||||
|
||||
void ff_svq1enc_init_ppc(SVQ1EncContext *c);
|
||||
void ff_svq1enc_init_x86(SVQ1EncContext *c);
|
||||
|
||||
#endif /* AVCODEC_SVQ1ENC_H */
|
@ -51,6 +51,7 @@ MMX-OBJS-$(CONFIG_HPELDSP) += x86/fpel_mmx.o \
|
||||
x86/hpeldsp_mmx.o
|
||||
MMX-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_mmx.o
|
||||
|
||||
MMX-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_mmx.o
|
||||
MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o
|
||||
|
||||
YASM-OBJS += x86/deinterlace.o \
|
||||
|
@ -805,40 +805,6 @@ DCT_SAD_FUNC(ssse3)
|
||||
#undef HSUM
|
||||
#undef DCT_SAD
|
||||
|
||||
static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2,
|
||||
int size)
|
||||
{
|
||||
int sum;
|
||||
x86_reg i = size;
|
||||
|
||||
__asm__ volatile (
|
||||
"pxor %%mm4, %%mm4 \n"
|
||||
"1: \n"
|
||||
"sub $8, %0 \n"
|
||||
"movq (%2, %0), %%mm2 \n"
|
||||
"movq (%3, %0, 2), %%mm0 \n"
|
||||
"movq 8(%3, %0, 2), %%mm1 \n"
|
||||
"punpckhbw %%mm2, %%mm3 \n"
|
||||
"punpcklbw %%mm2, %%mm2 \n"
|
||||
"psraw $8, %%mm3 \n"
|
||||
"psraw $8, %%mm2 \n"
|
||||
"psubw %%mm3, %%mm1 \n"
|
||||
"psubw %%mm2, %%mm0 \n"
|
||||
"pmaddwd %%mm1, %%mm1 \n"
|
||||
"pmaddwd %%mm0, %%mm0 \n"
|
||||
"paddd %%mm1, %%mm4 \n"
|
||||
"paddd %%mm0, %%mm4 \n"
|
||||
"jg 1b \n"
|
||||
"movq %%mm4, %%mm3 \n"
|
||||
"psrlq $32, %%mm3 \n"
|
||||
"paddd %%mm3, %%mm4 \n"
|
||||
"movd %%mm4, %1 \n"
|
||||
: "+r" (i), "=r" (sum)
|
||||
: "r" (pix1), "r" (pix2));
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
#define PHADDD(a, t) \
|
||||
"movq " #a ", " #t " \n\t" \
|
||||
"psrlq $32, " #a " \n\t" \
|
||||
@ -958,8 +924,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
|
||||
c->try_8x8basis = try_8x8basis_mmx;
|
||||
}
|
||||
c->add_8x8basis = add_8x8basis_mmx;
|
||||
|
||||
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
|
||||
}
|
||||
|
||||
if (INLINE_AMD3DNOW(cpu_flags)) {
|
||||
|
73
libavcodec/x86/svq1enc_mmx.c
Normal file
73
libavcodec/x86/svq1enc_mmx.c
Normal file
@ -0,0 +1,73 @@
|
||||
/*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/x86/asm.h"
|
||||
#include "libavutil/x86/cpu.h"
|
||||
#include "libavcodec/svq1enc.h"
|
||||
|
||||
#if HAVE_INLINE_ASM
|
||||
|
||||
static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2,
|
||||
int size)
|
||||
{
|
||||
int sum;
|
||||
x86_reg i = size;
|
||||
|
||||
__asm__ volatile (
|
||||
"pxor %%mm4, %%mm4 \n"
|
||||
"1: \n"
|
||||
"sub $8, %0 \n"
|
||||
"movq (%2, %0), %%mm2 \n"
|
||||
"movq (%3, %0, 2), %%mm0 \n"
|
||||
"movq 8(%3, %0, 2), %%mm1 \n"
|
||||
"punpckhbw %%mm2, %%mm3 \n"
|
||||
"punpcklbw %%mm2, %%mm2 \n"
|
||||
"psraw $8, %%mm3 \n"
|
||||
"psraw $8, %%mm2 \n"
|
||||
"psubw %%mm3, %%mm1 \n"
|
||||
"psubw %%mm2, %%mm0 \n"
|
||||
"pmaddwd %%mm1, %%mm1 \n"
|
||||
"pmaddwd %%mm0, %%mm0 \n"
|
||||
"paddd %%mm1, %%mm4 \n"
|
||||
"paddd %%mm0, %%mm4 \n"
|
||||
"jg 1b \n"
|
||||
"movq %%mm4, %%mm3 \n"
|
||||
"psrlq $32, %%mm3 \n"
|
||||
"paddd %%mm3, %%mm4 \n"
|
||||
"movd %%mm4, %1 \n"
|
||||
: "+r" (i), "=r" (sum)
|
||||
: "r" (pix1), "r" (pix2));
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
|
||||
av_cold void ff_svq1enc_init_x86(SVQ1EncContext *c)
|
||||
{
|
||||
#if HAVE_INLINE_ASM
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (INLINE_MMX(cpu_flags)) {
|
||||
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
|
||||
}
|
||||
#endif /* HAVE_INLINE_ASM */
|
||||
}
|
Loading…
Reference in New Issue
Block a user