mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2024-12-23 12:43:46 +02:00
avcodec/takdec: add x86 SIMD for rest of decorrelation modes
Signed-off-by: Paul B Mahol <onemda@gmail.com>
This commit is contained in:
parent
2f4374fae1
commit
35af7add6f
@ -491,7 +491,7 @@ OBJS-$(CONFIG_SVQ1_ENCODER) += svq1enc.o svq1.o \
|
||||
h263.o ituh263enc.o
|
||||
OBJS-$(CONFIG_SVQ3_DECODER) += svq3.o svq13.o mpegutils.o
|
||||
OBJS-$(CONFIG_TEXT_DECODER) += textdec.o ass.o
|
||||
OBJS-$(CONFIG_TAK_DECODER) += takdec.o tak.o
|
||||
OBJS-$(CONFIG_TAK_DECODER) += takdec.o tak.o takdsp.o
|
||||
OBJS-$(CONFIG_TARGA_DECODER) += targa.o
|
||||
OBJS-$(CONFIG_TARGA_ENCODER) += targaenc.o rle.o
|
||||
OBJS-$(CONFIG_TARGA_Y216_DECODER) += targa_y216dec.o
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "libavutil/internal.h"
|
||||
#include "libavutil/samplefmt.h"
|
||||
#include "tak.h"
|
||||
#include "takdsp.h"
|
||||
#include "audiodsp.h"
|
||||
#include "thread.h"
|
||||
#include "avcodec.h"
|
||||
@ -47,6 +48,7 @@ typedef struct MCDParam {
|
||||
typedef struct TAKDecContext {
|
||||
AVCodecContext *avctx; ///< parent AVCodecContext
|
||||
AudioDSPContext adsp;
|
||||
TAKDSPContext tdsp;
|
||||
TAKStreamInfo ti;
|
||||
GetBitContext gb; ///< bitstream reader initialized to start at the current frame
|
||||
|
||||
@ -172,6 +174,7 @@ static av_cold int tak_decode_init(AVCodecContext *avctx)
|
||||
TAKDecContext *s = avctx->priv_data;
|
||||
|
||||
ff_audiodsp_init(&s->adsp);
|
||||
ff_takdsp_init(&s->tdsp);
|
||||
|
||||
s->avctx = avctx;
|
||||
avctx->bits_per_raw_sample = avctx->bits_per_coded_sample;
|
||||
@ -541,46 +544,32 @@ static int decode_channel(TAKDecContext *s, int chan)
|
||||
static int decorrelate(TAKDecContext *s, int c1, int c2, int length)
|
||||
{
|
||||
GetBitContext *gb = &s->gb;
|
||||
int32_t *p1 = s->decoded[c1] + 1;
|
||||
int32_t *p2 = s->decoded[c2] + 1;
|
||||
int32_t *p1 = s->decoded[c1] + (s->dmode > 5);
|
||||
int32_t *p2 = s->decoded[c2] + (s->dmode > 5);
|
||||
int32_t bp1 = p1[0];
|
||||
int32_t bp2 = p2[0];
|
||||
int i;
|
||||
int dshift, dfactor;
|
||||
|
||||
length += s->dmode < 6;
|
||||
|
||||
switch (s->dmode) {
|
||||
case 1: /* left/side */
|
||||
for (i = 0; i < length; i++) {
|
||||
int32_t a = p1[i];
|
||||
int32_t b = p2[i];
|
||||
p2[i] = a + b;
|
||||
}
|
||||
s->tdsp.decorrelate_ls(p1, p2, length);
|
||||
break;
|
||||
case 2: /* side/right */
|
||||
for (i = 0; i < length; i++) {
|
||||
int32_t a = p1[i];
|
||||
int32_t b = p2[i];
|
||||
p1[i] = b - a;
|
||||
}
|
||||
s->tdsp.decorrelate_sr(p1, p2, length);
|
||||
break;
|
||||
case 3: /* side/mid */
|
||||
for (i = 0; i < length; i++) {
|
||||
int32_t a = p1[i];
|
||||
int32_t b = p2[i];
|
||||
a -= b >> 1;
|
||||
p1[i] = a;
|
||||
p2[i] = a + b;
|
||||
}
|
||||
s->tdsp.decorrelate_sm(p1, p2, length);
|
||||
break;
|
||||
case 4: /* side/left with scale factor */
|
||||
FFSWAP(int32_t*, p1, p2);
|
||||
FFSWAP(int32_t, bp1, bp2);
|
||||
case 5: /* side/right with scale factor */
|
||||
dshift = get_bits_esc4(gb);
|
||||
dfactor = get_sbits(gb, 10);
|
||||
for (i = 0; i < length; i++) {
|
||||
int32_t a = p1[i];
|
||||
int32_t b = p2[i];
|
||||
b = dfactor * (b >> dshift) + 128 >> 8 << dshift;
|
||||
p1[i] = b - a;
|
||||
}
|
||||
s->tdsp.decorrelate_sf(p1, p2, length, dshift, dfactor);
|
||||
break;
|
||||
case 6:
|
||||
FFSWAP(int32_t*, p1, p2);
|
||||
@ -664,6 +653,11 @@ static int decorrelate(TAKDecContext *s, int c1, int c2, int length)
|
||||
}
|
||||
}
|
||||
|
||||
if (s->dmode > 0 && s->dmode < 6) {
|
||||
p1[0] = bp1;
|
||||
p2[0] = bp2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
82
libavcodec/takdsp.c
Normal file
82
libavcodec/takdsp.c
Normal file
@ -0,0 +1,82 @@
|
||||
/*
|
||||
* TAK decoder
|
||||
* Copyright (c) 2015 Paul B Mahol
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "takdsp.h"
|
||||
#include "config.h"
|
||||
|
||||
static void decorrelate_ls(int32_t *p1, int32_t *p2, int length)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
int32_t a = p1[i];
|
||||
int32_t b = p2[i];
|
||||
p2[i] = a + b;
|
||||
}
|
||||
}
|
||||
|
||||
static void decorrelate_sr(int32_t *p1, int32_t *p2, int length)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
int32_t a = p1[i];
|
||||
int32_t b = p2[i];
|
||||
p1[i] = b - a;
|
||||
}
|
||||
}
|
||||
|
||||
static void decorrelate_sm(int32_t *p1, int32_t *p2, int length)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
int32_t a = p1[i];
|
||||
int32_t b = p2[i];
|
||||
a -= b >> 1;
|
||||
p1[i] = a;
|
||||
p2[i] = a + b;
|
||||
}
|
||||
}
|
||||
|
||||
static void decorrelate_sf(int32_t *p1, int32_t *p2, int length, int dshift, int dfactor)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
int32_t a = p1[i];
|
||||
int32_t b = p2[i];
|
||||
b = dfactor * (b >> dshift) + 128 >> 8 << dshift;
|
||||
p1[i] = b - a;
|
||||
}
|
||||
}
|
||||
|
||||
av_cold void ff_takdsp_init(TAKDSPContext *c)
|
||||
{
|
||||
c->decorrelate_ls = decorrelate_ls;
|
||||
c->decorrelate_sr = decorrelate_sr;
|
||||
c->decorrelate_sm = decorrelate_sm;
|
||||
c->decorrelate_sf = decorrelate_sf;
|
||||
|
||||
if (ARCH_X86)
|
||||
ff_takdsp_init_x86(c);
|
||||
}
|
34
libavcodec/takdsp.h
Normal file
34
libavcodec/takdsp.h
Normal file
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVCODEC_TAKDSP_H
|
||||
#define AVCODEC_TAKDSP_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct TAKDSPContext {
|
||||
void (*decorrelate_ls)(int32_t *p1, int32_t *p2, int length);
|
||||
void (*decorrelate_sr)(int32_t *p1, int32_t *p2, int length);
|
||||
void (*decorrelate_sm)(int32_t *p1, int32_t *p2, int length);
|
||||
void (*decorrelate_sf)(int32_t *p1, int32_t *p2, int length, int dshift, int dfactor);
|
||||
} TAKDSPContext;
|
||||
|
||||
void ff_takdsp_init(TAKDSPContext *c);
|
||||
void ff_takdsp_init_x86(TAKDSPContext *c);
|
||||
|
||||
#endif /* AVCODEC_TAKDSP_H */
|
@ -56,6 +56,7 @@ OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp_init.o
|
||||
OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o
|
||||
OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp_init.o
|
||||
OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_init.o
|
||||
OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp_init.o
|
||||
OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp_init.o
|
||||
OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o
|
||||
OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o
|
||||
@ -152,6 +153,7 @@ YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
|
||||
YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
|
||||
YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv40dsp.o
|
||||
YASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o
|
||||
YASM-OBJS-$(CONFIG_TAK_DECODER) += x86/takdsp.o
|
||||
YASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
|
||||
YASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o
|
||||
YASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o
|
||||
|
116
libavcodec/x86/takdsp.asm
Normal file
116
libavcodec/x86/takdsp.asm
Normal file
@ -0,0 +1,116 @@
|
||||
;******************************************************************************
|
||||
;* TAK DSP SIMD optimizations
|
||||
;*
|
||||
;* Copyright (C) 2015 Paul B Mahol
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or
|
||||
;* modify it under the terms of the GNU Lesser General Public
|
||||
;* License as published by the Free Software Foundation; either
|
||||
;* version 2.1 of the License, or (at your option) any later version.
|
||||
;*
|
||||
;* FFmpeg is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;* Lesser General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU Lesser General Public
|
||||
;* License along with FFmpeg; if not, write to the Free Software
|
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;******************************************************************************
|
||||
|
||||
%include "libavutil/x86/x86util.asm"
|
||||
|
||||
SECTION_RODATA
|
||||
|
||||
pd_128: times 4 dd 128
|
||||
|
||||
SECTION .text
|
||||
|
||||
INIT_XMM sse2
|
||||
cglobal tak_decorrelate_ls, 3, 3, 2, p1, p2, length
|
||||
shl lengthd, 2
|
||||
add p1q, lengthq
|
||||
add p2q, lengthq
|
||||
neg lengthq
|
||||
.loop:
|
||||
mova m0, [p1q+lengthq+mmsize*0]
|
||||
mova m1, [p1q+lengthq+mmsize*1]
|
||||
paddd m0, [p2q+lengthq+mmsize*0]
|
||||
paddd m1, [p2q+lengthq+mmsize*1]
|
||||
mova [p2q+lengthq+mmsize*0], m0
|
||||
mova [p2q+lengthq+mmsize*1], m1
|
||||
add lengthq, mmsize*2
|
||||
jl .loop
|
||||
REP_RET
|
||||
|
||||
cglobal tak_decorrelate_sr, 3, 3, 2, p1, p2, length
|
||||
shl lengthd, 2
|
||||
add p1q, lengthq
|
||||
add p2q, lengthq
|
||||
neg lengthq
|
||||
|
||||
.loop:
|
||||
mova m0, [p2q+lengthq+mmsize*0]
|
||||
mova m1, [p2q+lengthq+mmsize*1]
|
||||
psubd m0, [p1q+lengthq+mmsize*0]
|
||||
psubd m1, [p1q+lengthq+mmsize*1]
|
||||
mova [p1q+lengthq+mmsize*0], m0
|
||||
mova [p1q+lengthq+mmsize*1], m1
|
||||
add lengthq, mmsize*2
|
||||
jl .loop
|
||||
REP_RET
|
||||
|
||||
cglobal tak_decorrelate_sm, 3, 3, 6, p1, p2, length
|
||||
shl lengthd, 2
|
||||
add p1q, lengthq
|
||||
add p2q, lengthq
|
||||
neg lengthq
|
||||
|
||||
.loop:
|
||||
mova m0, [p1q+lengthq]
|
||||
mova m1, [p2q+lengthq]
|
||||
mova m3, [p1q+lengthq+mmsize]
|
||||
mova m4, [p2q+lengthq+mmsize]
|
||||
mova m2, m1
|
||||
mova m5, m4
|
||||
psrld m2, 1
|
||||
psrld m5, 1
|
||||
psubd m0, m2
|
||||
psubd m3, m5
|
||||
paddd m1, m0
|
||||
paddd m4, m3
|
||||
mova [p1q+lengthq], m0
|
||||
mova [p2q+lengthq], m1
|
||||
mova [p1q+lengthq+mmsize], m3
|
||||
mova [p2q+lengthq+mmsize], m4
|
||||
add lengthq, mmsize*2
|
||||
jl .loop
|
||||
REP_RET
|
||||
|
||||
INIT_XMM sse4
|
||||
cglobal tak_decorrelate_sf, 3, 3, 5, p1, p2, length, dshift, dfactor
|
||||
shl lengthd, 2
|
||||
add p1q, lengthq
|
||||
add p2q, lengthq
|
||||
neg lengthq
|
||||
|
||||
movd m2, dshiftm
|
||||
movd m3, dfactorm
|
||||
pshufd m3, m3, 0
|
||||
mova m4, [pd_128]
|
||||
|
||||
.loop:
|
||||
mova m0, [p1q+lengthq]
|
||||
mova m1, [p2q+lengthq]
|
||||
psrld m1, m2
|
||||
pmulld m1, m3
|
||||
paddd m1, m4
|
||||
psrld m1, 8
|
||||
pslld m1, m2
|
||||
psubd m1, m0
|
||||
mova [p1q+lengthq], m1
|
||||
add lengthq, mmsize
|
||||
jl .loop
|
||||
REP_RET
|
45
libavcodec/x86/takdsp_init.c
Normal file
45
libavcodec/x86/takdsp_init.c
Normal file
@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Paul B Mahol
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavcodec/takdsp.h"
|
||||
#include "libavutil/x86/cpu.h"
|
||||
#include "config.h"
|
||||
|
||||
void ff_tak_decorrelate_ls_sse2(int32_t *p1, int32_t *p2, int length);
|
||||
void ff_tak_decorrelate_sr_sse2(int32_t *p1, int32_t *p2, int length);
|
||||
void ff_tak_decorrelate_sm_sse2(int32_t *p1, int32_t *p2, int length);
|
||||
void ff_tak_decorrelate_sf_sse4(int32_t *p1, int32_t *p2, int length, int dshift, int dfactor);
|
||||
|
||||
av_cold void ff_takdsp_init_x86(TAKDSPContext *c)
|
||||
{
|
||||
#if HAVE_YASM
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
c->decorrelate_ls = ff_tak_decorrelate_ls_sse2;
|
||||
c->decorrelate_sr = ff_tak_decorrelate_sr_sse2;
|
||||
c->decorrelate_sm = ff_tak_decorrelate_sm_sse2;
|
||||
}
|
||||
|
||||
if (EXTERNAL_SSE4(cpu_flags)) {
|
||||
c->decorrelate_sf = ff_tak_decorrelate_sf_sse4;
|
||||
}
|
||||
#endif
|
||||
}
|
Loading…
Reference in New Issue
Block a user