mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-04-24 17:12:34 +02:00
dsputil: Split off pixel block routines into their own context
This commit is contained in:
parent
0569a7e0bd
commit
f46bb608d9
13
configure
vendored
13
configure
vendored
@ -1558,6 +1558,7 @@ CONFIG_EXTRA="
|
|||||||
mpegvideo
|
mpegvideo
|
||||||
mpegvideoenc
|
mpegvideoenc
|
||||||
nettle
|
nettle
|
||||||
|
pixblockdsp
|
||||||
qpeldsp
|
qpeldsp
|
||||||
rangecoder
|
rangecoder
|
||||||
riffdec
|
riffdec
|
||||||
@ -1706,7 +1707,7 @@ threads_if_any="$THREADS_LIST"
|
|||||||
|
|
||||||
# subsystems
|
# subsystems
|
||||||
dct_select="rdft"
|
dct_select="rdft"
|
||||||
dsputil_select="fdctdsp idctdsp"
|
dsputil_select="fdctdsp idctdsp pixblockdsp"
|
||||||
error_resilience_select="dsputil"
|
error_resilience_select="dsputil"
|
||||||
intrax8_select="error_resilience"
|
intrax8_select="error_resilience"
|
||||||
mdct_select="fft"
|
mdct_select="fft"
|
||||||
@ -1715,7 +1716,7 @@ mpeg_er_select="error_resilience"
|
|||||||
mpegaudio_select="mpegaudiodsp"
|
mpegaudio_select="mpegaudiodsp"
|
||||||
mpegaudiodsp_select="dct"
|
mpegaudiodsp_select="dct"
|
||||||
mpegvideo_select="blockdsp dsputil hpeldsp idctdsp videodsp"
|
mpegvideo_select="blockdsp dsputil hpeldsp idctdsp videodsp"
|
||||||
mpegvideoenc_select="dsputil mpegvideo qpeldsp"
|
mpegvideoenc_select="dsputil mpegvideo pixblockdsp qpeldsp"
|
||||||
|
|
||||||
# decoders / encoders
|
# decoders / encoders
|
||||||
aac_decoder_select="mdct sinewin"
|
aac_decoder_select="mdct sinewin"
|
||||||
@ -1732,9 +1733,9 @@ amrwb_decoder_select="lsp"
|
|||||||
amv_decoder_select="sp5x_decoder"
|
amv_decoder_select="sp5x_decoder"
|
||||||
ape_decoder_select="bswapdsp"
|
ape_decoder_select="bswapdsp"
|
||||||
asv1_decoder_select="blockdsp bswapdsp idctdsp"
|
asv1_decoder_select="blockdsp bswapdsp idctdsp"
|
||||||
asv1_encoder_select="bswapdsp dsputil fdctdsp"
|
asv1_encoder_select="bswapdsp fdctdsp pixblockdsp"
|
||||||
asv2_decoder_select="blockdsp bswapdsp idctdsp"
|
asv2_decoder_select="blockdsp bswapdsp idctdsp"
|
||||||
asv2_encoder_select="bswapdsp dsputil fdctdsp"
|
asv2_encoder_select="bswapdsp fdctdsp pixblockdsp"
|
||||||
atrac1_decoder_select="mdct sinewin"
|
atrac1_decoder_select="mdct sinewin"
|
||||||
atrac3_decoder_select="mdct"
|
atrac3_decoder_select="mdct"
|
||||||
atrac3p_decoder_select="mdct sinewin"
|
atrac3p_decoder_select="mdct sinewin"
|
||||||
@ -1749,9 +1750,9 @@ cscd_decoder_select="lzo"
|
|||||||
cscd_decoder_suggest="zlib"
|
cscd_decoder_suggest="zlib"
|
||||||
dca_decoder_select="mdct"
|
dca_decoder_select="mdct"
|
||||||
dnxhd_decoder_select="blockdsp idctdsp"
|
dnxhd_decoder_select="blockdsp idctdsp"
|
||||||
dnxhd_encoder_select="aandcttables blockdsp dsputil fdctdsp idctdsp mpegvideoenc"
|
dnxhd_encoder_select="aandcttables blockdsp fdctdsp idctdsp mpegvideoenc pixblockdsp"
|
||||||
dvvideo_decoder_select="dvprofile idctdsp"
|
dvvideo_decoder_select="dvprofile idctdsp"
|
||||||
dvvideo_encoder_select="dsputil dvprofile fdctdsp"
|
dvvideo_encoder_select="dsputil dvprofile fdctdsp pixblockdsp"
|
||||||
dxa_decoder_deps="zlib"
|
dxa_decoder_deps="zlib"
|
||||||
eac3_decoder_select="ac3_decoder"
|
eac3_decoder_select="ac3_decoder"
|
||||||
eac3_encoder_select="ac3_encoder"
|
eac3_encoder_select="ac3_encoder"
|
||||||
|
@ -72,6 +72,7 @@ OBJS-$(CONFIG_MPEGVIDEO) += mpegvideo.o mpegvideodsp.o \
|
|||||||
OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \
|
OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \
|
||||||
motion_est.o ratecontrol.o \
|
motion_est.o ratecontrol.o \
|
||||||
mpegvideoencdsp.o
|
mpegvideoencdsp.o
|
||||||
|
OBJS-$(CONFIG_PIXBLOCKDSP) += pixblockdsp.o
|
||||||
OBJS-$(CONFIG_QPELDSP) += qpeldsp.o
|
OBJS-$(CONFIG_QPELDSP) += qpeldsp.o
|
||||||
OBJS-$(CONFIG_RANGECODER) += rangecoder.o
|
OBJS-$(CONFIG_RANGECODER) += rangecoder.o
|
||||||
RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o
|
RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o
|
||||||
|
@ -23,6 +23,7 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o
|
|||||||
OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o
|
OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o
|
||||||
OBJS-$(CONFIG_MPEGVIDEOENC) += arm/mpegvideoencdsp_init_arm.o
|
OBJS-$(CONFIG_MPEGVIDEOENC) += arm/mpegvideoencdsp_init_arm.o
|
||||||
OBJS-$(CONFIG_NEON_CLOBBER_TEST) += arm/neontest.o
|
OBJS-$(CONFIG_NEON_CLOBBER_TEST) += arm/neontest.o
|
||||||
|
OBJS-$(CONFIG_PIXBLOCKDSP) += arm/pixblockdsp_init_arm.o
|
||||||
OBJS-$(CONFIG_VIDEODSP) += arm/videodsp_init_arm.o
|
OBJS-$(CONFIG_VIDEODSP) += arm/videodsp_init_arm.o
|
||||||
OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_init_arm.o
|
OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_init_arm.o
|
||||||
|
|
||||||
@ -62,6 +63,7 @@ ARMV6-OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_armv6.o \
|
|||||||
arm/simple_idct_armv6.o
|
arm/simple_idct_armv6.o
|
||||||
ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o
|
ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o
|
||||||
ARMV6-OBJS-$(CONFIG_MPEGVIDEOENC) += arm/mpegvideoencdsp_armv6.o
|
ARMV6-OBJS-$(CONFIG_MPEGVIDEOENC) += arm/mpegvideoencdsp_armv6.o
|
||||||
|
ARMV6-OBJS-$(CONFIG_PIXBLOCKDSP) += arm/pixblockdsp_armv6.o
|
||||||
|
|
||||||
ARMV6-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv6.o
|
ARMV6-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv6.o
|
||||||
ARMV6-OBJS-$(CONFIG_VP7_DECODER) += arm/vp8_armv6.o \
|
ARMV6-OBJS-$(CONFIG_VP7_DECODER) += arm/vp8_armv6.o \
|
||||||
|
@ -20,61 +20,6 @@
|
|||||||
|
|
||||||
#include "libavutil/arm/asm.S"
|
#include "libavutil/arm/asm.S"
|
||||||
|
|
||||||
function ff_get_pixels_armv6, export=1
|
|
||||||
pld [r1, r2]
|
|
||||||
push {r4-r8, lr}
|
|
||||||
mov lr, #8
|
|
||||||
1:
|
|
||||||
ldrd_post r4, r5, r1, r2
|
|
||||||
subs lr, lr, #1
|
|
||||||
uxtb16 r6, r4
|
|
||||||
uxtb16 r4, r4, ror #8
|
|
||||||
uxtb16 r12, r5
|
|
||||||
uxtb16 r8, r5, ror #8
|
|
||||||
pld [r1, r2]
|
|
||||||
pkhbt r5, r6, r4, lsl #16
|
|
||||||
pkhtb r6, r4, r6, asr #16
|
|
||||||
pkhbt r7, r12, r8, lsl #16
|
|
||||||
pkhtb r12, r8, r12, asr #16
|
|
||||||
stm r0!, {r5,r6,r7,r12}
|
|
||||||
bgt 1b
|
|
||||||
|
|
||||||
pop {r4-r8, pc}
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
function ff_diff_pixels_armv6, export=1
|
|
||||||
pld [r1, r3]
|
|
||||||
pld [r2, r3]
|
|
||||||
push {r4-r9, lr}
|
|
||||||
mov lr, #8
|
|
||||||
1:
|
|
||||||
ldrd_post r4, r5, r1, r3
|
|
||||||
ldrd_post r6, r7, r2, r3
|
|
||||||
uxtb16 r8, r4
|
|
||||||
uxtb16 r4, r4, ror #8
|
|
||||||
uxtb16 r9, r6
|
|
||||||
uxtb16 r6, r6, ror #8
|
|
||||||
pld [r1, r3]
|
|
||||||
ssub16 r9, r8, r9
|
|
||||||
ssub16 r6, r4, r6
|
|
||||||
uxtb16 r8, r5
|
|
||||||
uxtb16 r5, r5, ror #8
|
|
||||||
pld [r2, r3]
|
|
||||||
pkhbt r4, r9, r6, lsl #16
|
|
||||||
pkhtb r6, r6, r9, asr #16
|
|
||||||
uxtb16 r9, r7
|
|
||||||
uxtb16 r7, r7, ror #8
|
|
||||||
ssub16 r9, r8, r9
|
|
||||||
ssub16 r5, r5, r7
|
|
||||||
subs lr, lr, #1
|
|
||||||
pkhbt r8, r9, r5, lsl #16
|
|
||||||
pkhtb r9, r5, r9, asr #16
|
|
||||||
stm r0!, {r4,r6,r8,r9}
|
|
||||||
bgt 1b
|
|
||||||
|
|
||||||
pop {r4-r9, pc}
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
function ff_pix_abs16_armv6, export=1
|
function ff_pix_abs16_armv6, export=1
|
||||||
ldr r0, [sp]
|
ldr r0, [sp]
|
||||||
push {r4-r9, lr}
|
push {r4-r9, lr}
|
||||||
|
@ -26,10 +26,6 @@
|
|||||||
#include "libavcodec/mpegvideo.h"
|
#include "libavcodec/mpegvideo.h"
|
||||||
#include "dsputil_arm.h"
|
#include "dsputil_arm.h"
|
||||||
|
|
||||||
void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, int stride);
|
|
||||||
void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1,
|
|
||||||
const uint8_t *s2, int stride);
|
|
||||||
|
|
||||||
int ff_pix_abs16_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2,
|
int ff_pix_abs16_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2,
|
||||||
int line_size, int h);
|
int line_size, int h);
|
||||||
int ff_pix_abs16_x2_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2,
|
int ff_pix_abs16_x2_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2,
|
||||||
@ -46,10 +42,6 @@ int ff_sse16_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2,
|
|||||||
av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx,
|
av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx,
|
||||||
unsigned high_bit_depth)
|
unsigned high_bit_depth)
|
||||||
{
|
{
|
||||||
if (!high_bit_depth)
|
|
||||||
c->get_pixels = ff_get_pixels_armv6;
|
|
||||||
c->diff_pixels = ff_diff_pixels_armv6;
|
|
||||||
|
|
||||||
c->pix_abs[0][0] = ff_pix_abs16_armv6;
|
c->pix_abs[0][0] = ff_pix_abs16_armv6;
|
||||||
c->pix_abs[0][1] = ff_pix_abs16_x2_armv6;
|
c->pix_abs[0][1] = ff_pix_abs16_x2_armv6;
|
||||||
c->pix_abs[0][2] = ff_pix_abs16_y2_armv6;
|
c->pix_abs[0][2] = ff_pix_abs16_y2_armv6;
|
||||||
|
76
libavcodec/arm/pixblockdsp_armv6.S
Normal file
76
libavcodec/arm/pixblockdsp_armv6.S
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
|
||||||
|
*
|
||||||
|
* This file is part of Libav.
|
||||||
|
*
|
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Libav is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "libavutil/arm/asm.S"
|
||||||
|
|
||||||
|
function ff_get_pixels_armv6, export=1
|
||||||
|
pld [r1, r2]
|
||||||
|
push {r4-r8, lr}
|
||||||
|
mov lr, #8
|
||||||
|
1:
|
||||||
|
ldrd_post r4, r5, r1, r2
|
||||||
|
subs lr, lr, #1
|
||||||
|
uxtb16 r6, r4
|
||||||
|
uxtb16 r4, r4, ror #8
|
||||||
|
uxtb16 r12, r5
|
||||||
|
uxtb16 r8, r5, ror #8
|
||||||
|
pld [r1, r2]
|
||||||
|
pkhbt r5, r6, r4, lsl #16
|
||||||
|
pkhtb r6, r4, r6, asr #16
|
||||||
|
pkhbt r7, r12, r8, lsl #16
|
||||||
|
pkhtb r12, r8, r12, asr #16
|
||||||
|
stm r0!, {r5,r6,r7,r12}
|
||||||
|
bgt 1b
|
||||||
|
|
||||||
|
pop {r4-r8, pc}
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
function ff_diff_pixels_armv6, export=1
|
||||||
|
pld [r1, r3]
|
||||||
|
pld [r2, r3]
|
||||||
|
push {r4-r9, lr}
|
||||||
|
mov lr, #8
|
||||||
|
1:
|
||||||
|
ldrd_post r4, r5, r1, r3
|
||||||
|
ldrd_post r6, r7, r2, r3
|
||||||
|
uxtb16 r8, r4
|
||||||
|
uxtb16 r4, r4, ror #8
|
||||||
|
uxtb16 r9, r6
|
||||||
|
uxtb16 r6, r6, ror #8
|
||||||
|
pld [r1, r3]
|
||||||
|
ssub16 r9, r8, r9
|
||||||
|
ssub16 r6, r4, r6
|
||||||
|
uxtb16 r8, r5
|
||||||
|
uxtb16 r5, r5, ror #8
|
||||||
|
pld [r2, r3]
|
||||||
|
pkhbt r4, r9, r6, lsl #16
|
||||||
|
pkhtb r6, r6, r9, asr #16
|
||||||
|
uxtb16 r9, r7
|
||||||
|
uxtb16 r7, r7, ror #8
|
||||||
|
ssub16 r9, r8, r9
|
||||||
|
ssub16 r5, r5, r7
|
||||||
|
subs lr, lr, #1
|
||||||
|
pkhbt r8, r9, r5, lsl #16
|
||||||
|
pkhtb r9, r5, r9, asr #16
|
||||||
|
stm r0!, {r4,r6,r8,r9}
|
||||||
|
bgt 1b
|
||||||
|
|
||||||
|
pop {r4-r9, pc}
|
||||||
|
endfunc
|
42
libavcodec/arm/pixblockdsp_init_arm.c
Normal file
42
libavcodec/arm/pixblockdsp_init_arm.c
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
/*
|
||||||
|
* This file is part of Libav.
|
||||||
|
*
|
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Libav is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
|
#include "libavutil/cpu.h"
|
||||||
|
#include "libavutil/arm/cpu.h"
|
||||||
|
#include "libavcodec/avcodec.h"
|
||||||
|
#include "libavcodec/pixblockdsp.h"
|
||||||
|
|
||||||
|
void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, int stride);
|
||||||
|
void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1,
|
||||||
|
const uint8_t *s2, int stride);
|
||||||
|
|
||||||
|
av_cold void ff_pixblockdsp_init_arm(PixblockDSPContext *c,
|
||||||
|
AVCodecContext *avctx,
|
||||||
|
unsigned high_bit_depth)
|
||||||
|
{
|
||||||
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
if (have_armv6(cpu_flags)) {
|
||||||
|
if (!high_bit_depth)
|
||||||
|
c->get_pixels = ff_get_pixels_armv6;
|
||||||
|
c->diff_pixels = ff_diff_pixels_armv6;
|
||||||
|
}
|
||||||
|
}
|
@ -33,19 +33,19 @@
|
|||||||
#include "avcodec.h"
|
#include "avcodec.h"
|
||||||
#include "blockdsp.h"
|
#include "blockdsp.h"
|
||||||
#include "bswapdsp.h"
|
#include "bswapdsp.h"
|
||||||
#include "dsputil.h"
|
|
||||||
#include "fdctdsp.h"
|
#include "fdctdsp.h"
|
||||||
#include "idctdsp.h"
|
#include "idctdsp.h"
|
||||||
#include "get_bits.h"
|
#include "get_bits.h"
|
||||||
|
#include "pixblockdsp.h"
|
||||||
#include "put_bits.h"
|
#include "put_bits.h"
|
||||||
|
|
||||||
typedef struct ASV1Context{
|
typedef struct ASV1Context{
|
||||||
AVCodecContext *avctx;
|
AVCodecContext *avctx;
|
||||||
BlockDSPContext bdsp;
|
BlockDSPContext bdsp;
|
||||||
BswapDSPContext bbdsp;
|
BswapDSPContext bbdsp;
|
||||||
DSPContext dsp;
|
|
||||||
FDCTDSPContext fdsp;
|
FDCTDSPContext fdsp;
|
||||||
IDCTDSPContext idsp;
|
IDCTDSPContext idsp;
|
||||||
|
PixblockDSPContext pdsp;
|
||||||
PutBitContext pb;
|
PutBitContext pb;
|
||||||
GetBitContext gb;
|
GetBitContext gb;
|
||||||
ScanTable scantable;
|
ScanTable scantable;
|
||||||
|
@ -159,16 +159,16 @@ static inline void dct_get(ASV1Context *a, const AVFrame *frame,
|
|||||||
uint8_t *ptr_cb = frame->data[1] + (mb_y * 8 * frame->linesize[1]) + mb_x * 8;
|
uint8_t *ptr_cb = frame->data[1] + (mb_y * 8 * frame->linesize[1]) + mb_x * 8;
|
||||||
uint8_t *ptr_cr = frame->data[2] + (mb_y * 8 * frame->linesize[2]) + mb_x * 8;
|
uint8_t *ptr_cr = frame->data[2] + (mb_y * 8 * frame->linesize[2]) + mb_x * 8;
|
||||||
|
|
||||||
a->dsp.get_pixels(block[0], ptr_y , linesize);
|
a->pdsp.get_pixels(block[0], ptr_y, linesize);
|
||||||
a->dsp.get_pixels(block[1], ptr_y + 8, linesize);
|
a->pdsp.get_pixels(block[1], ptr_y + 8, linesize);
|
||||||
a->dsp.get_pixels(block[2], ptr_y + 8*linesize , linesize);
|
a->pdsp.get_pixels(block[2], ptr_y + 8 * linesize, linesize);
|
||||||
a->dsp.get_pixels(block[3], ptr_y + 8*linesize + 8, linesize);
|
a->pdsp.get_pixels(block[3], ptr_y + 8 * linesize + 8, linesize);
|
||||||
for(i=0; i<4; i++)
|
for(i=0; i<4; i++)
|
||||||
a->fdsp.fdct(block[i]);
|
a->fdsp.fdct(block[i]);
|
||||||
|
|
||||||
if(!(a->avctx->flags&CODEC_FLAG_GRAY)){
|
if(!(a->avctx->flags&CODEC_FLAG_GRAY)){
|
||||||
a->dsp.get_pixels(block[4], ptr_cb, frame->linesize[1]);
|
a->pdsp.get_pixels(block[4], ptr_cb, frame->linesize[1]);
|
||||||
a->dsp.get_pixels(block[5], ptr_cr, frame->linesize[2]);
|
a->pdsp.get_pixels(block[5], ptr_cr, frame->linesize[2]);
|
||||||
for(i=4; i<6; i++)
|
for(i=4; i<6; i++)
|
||||||
a->fdsp.fdct(block[i]);
|
a->fdsp.fdct(block[i]);
|
||||||
}
|
}
|
||||||
@ -248,8 +248,8 @@ static av_cold int encode_init(AVCodecContext *avctx){
|
|||||||
avctx->coded_frame->key_frame = 1;
|
avctx->coded_frame->key_frame = 1;
|
||||||
|
|
||||||
ff_asv_common_init(avctx);
|
ff_asv_common_init(avctx);
|
||||||
ff_dsputil_init(&a->dsp, avctx);
|
|
||||||
ff_fdctdsp_init(&a->fdsp, avctx);
|
ff_fdctdsp_init(&a->fdsp, avctx);
|
||||||
|
ff_pixblockdsp_init(&a->pdsp, avctx);
|
||||||
|
|
||||||
if(avctx->global_quality == 0) avctx->global_quality= 4*FF_QUALITY_SCALE;
|
if(avctx->global_quality == 0) avctx->global_quality= 4*FF_QUALITY_SCALE;
|
||||||
|
|
||||||
|
@ -30,10 +30,10 @@
|
|||||||
|
|
||||||
#include "avcodec.h"
|
#include "avcodec.h"
|
||||||
#include "blockdsp.h"
|
#include "blockdsp.h"
|
||||||
#include "dsputil.h"
|
|
||||||
#include "fdctdsp.h"
|
#include "fdctdsp.h"
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
#include "mpegvideo.h"
|
#include "mpegvideo.h"
|
||||||
|
#include "pixblockdsp.h"
|
||||||
#include "dnxhdenc.h"
|
#include "dnxhdenc.h"
|
||||||
|
|
||||||
// The largest value that will not lead to overflow for 10bit samples.
|
// The largest value that will not lead to overflow for 10bit samples.
|
||||||
@ -308,10 +308,10 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx)
|
|||||||
avctx->bits_per_raw_sample = ctx->cid_table->bit_depth;
|
avctx->bits_per_raw_sample = ctx->cid_table->bit_depth;
|
||||||
|
|
||||||
ff_blockdsp_init(&ctx->bdsp, avctx);
|
ff_blockdsp_init(&ctx->bdsp, avctx);
|
||||||
ff_dsputil_init(&ctx->m.dsp, avctx);
|
|
||||||
ff_fdctdsp_init(&ctx->m.fdsp, avctx);
|
ff_fdctdsp_init(&ctx->m.fdsp, avctx);
|
||||||
ff_idctdsp_init(&ctx->m.idsp, avctx);
|
ff_idctdsp_init(&ctx->m.idsp, avctx);
|
||||||
ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx);
|
ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx);
|
||||||
|
ff_pixblockdsp_init(&ctx->m.pdsp, avctx);
|
||||||
ff_dct_common_init(&ctx->m);
|
ff_dct_common_init(&ctx->m);
|
||||||
if (!ctx->m.dct_quantize)
|
if (!ctx->m.dct_quantize)
|
||||||
ctx->m.dct_quantize = ff_dct_quantize_c;
|
ctx->m.dct_quantize = ff_dct_quantize_c;
|
||||||
@ -540,12 +540,12 @@ void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
|
|||||||
((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs);
|
((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs);
|
||||||
const uint8_t *ptr_v = ctx->thread[0]->src[2] +
|
const uint8_t *ptr_v = ctx->thread[0]->src[2] +
|
||||||
((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs);
|
((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs);
|
||||||
DSPContext *dsp = &ctx->m.dsp;
|
PixblockDSPContext *pdsp = &ctx->m.pdsp;
|
||||||
|
|
||||||
dsp->get_pixels(ctx->blocks[0], ptr_y, ctx->m.linesize);
|
pdsp->get_pixels(ctx->blocks[0], ptr_y, ctx->m.linesize);
|
||||||
dsp->get_pixels(ctx->blocks[1], ptr_y + bw, ctx->m.linesize);
|
pdsp->get_pixels(ctx->blocks[1], ptr_y + bw, ctx->m.linesize);
|
||||||
dsp->get_pixels(ctx->blocks[2], ptr_u, ctx->m.uvlinesize);
|
pdsp->get_pixels(ctx->blocks[2], ptr_u, ctx->m.uvlinesize);
|
||||||
dsp->get_pixels(ctx->blocks[3], ptr_v, ctx->m.uvlinesize);
|
pdsp->get_pixels(ctx->blocks[3], ptr_v, ctx->m.uvlinesize);
|
||||||
|
|
||||||
if (mb_y + 1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) {
|
if (mb_y + 1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) {
|
||||||
if (ctx->interlaced) {
|
if (ctx->interlaced) {
|
||||||
@ -568,13 +568,13 @@ void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
|
|||||||
ctx->bdsp.clear_block(ctx->blocks[7]);
|
ctx->bdsp.clear_block(ctx->blocks[7]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
dsp->get_pixels(ctx->blocks[4],
|
pdsp->get_pixels(ctx->blocks[4],
|
||||||
ptr_y + ctx->dct_y_offset, ctx->m.linesize);
|
ptr_y + ctx->dct_y_offset, ctx->m.linesize);
|
||||||
dsp->get_pixels(ctx->blocks[5],
|
pdsp->get_pixels(ctx->blocks[5],
|
||||||
ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize);
|
ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize);
|
||||||
dsp->get_pixels(ctx->blocks[6],
|
pdsp->get_pixels(ctx->blocks[6],
|
||||||
ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize);
|
ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize);
|
||||||
dsp->get_pixels(ctx->blocks[7],
|
pdsp->get_pixels(ctx->blocks[7],
|
||||||
ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize);
|
ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -35,13 +35,6 @@
|
|||||||
|
|
||||||
uint32_t ff_square_tab[512] = { 0, };
|
uint32_t ff_square_tab[512] = { 0, };
|
||||||
|
|
||||||
#define BIT_DEPTH 16
|
|
||||||
#include "dsputilenc_template.c"
|
|
||||||
#undef BIT_DEPTH
|
|
||||||
|
|
||||||
#define BIT_DEPTH 8
|
|
||||||
#include "dsputilenc_template.c"
|
|
||||||
|
|
||||||
static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
|
static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
|
||||||
int line_size, int h)
|
int line_size, int h)
|
||||||
{
|
{
|
||||||
@ -110,27 +103,6 @@ static int sse16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1,
|
|
||||||
const uint8_t *s2, int stride)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/* read the pixels */
|
|
||||||
for (i = 0; i < 8; i++) {
|
|
||||||
block[0] = s1[0] - s2[0];
|
|
||||||
block[1] = s1[1] - s2[1];
|
|
||||||
block[2] = s1[2] - s2[2];
|
|
||||||
block[3] = s1[3] - s2[3];
|
|
||||||
block[4] = s1[4] - s2[4];
|
|
||||||
block[5] = s1[5] - s2[5];
|
|
||||||
block[6] = s1[6] - s2[6];
|
|
||||||
block[7] = s1[7] - s2[7];
|
|
||||||
s1 += stride;
|
|
||||||
s2 += stride;
|
|
||||||
block += 8;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static int sum_abs_dctelem_c(int16_t *block)
|
static int sum_abs_dctelem_c(int16_t *block)
|
||||||
{
|
{
|
||||||
int sum = 0, i;
|
int sum = 0, i;
|
||||||
@ -577,7 +549,7 @@ static int dct_sad8x8_c(MpegEncContext *s, uint8_t *src1,
|
|||||||
|
|
||||||
assert(h == 8);
|
assert(h == 8);
|
||||||
|
|
||||||
s->dsp.diff_pixels(temp, src1, src2, stride);
|
s->pdsp.diff_pixels(temp, src1, src2, stride);
|
||||||
s->fdsp.fdct(temp);
|
s->fdsp.fdct(temp);
|
||||||
return s->dsp.sum_abs_dctelem(temp);
|
return s->dsp.sum_abs_dctelem(temp);
|
||||||
}
|
}
|
||||||
@ -617,7 +589,7 @@ static int dct264_sad8x8_c(MpegEncContext *s, uint8_t *src1,
|
|||||||
int16_t dct[8][8];
|
int16_t dct[8][8];
|
||||||
int i, sum = 0;
|
int i, sum = 0;
|
||||||
|
|
||||||
s->dsp.diff_pixels(dct[0], src1, src2, stride);
|
s->pdsp.diff_pixels(dct[0], src1, src2, stride);
|
||||||
|
|
||||||
#define SRC(x) dct[i][x]
|
#define SRC(x) dct[i][x]
|
||||||
#define DST(x, v) dct[i][x] = v
|
#define DST(x, v) dct[i][x] = v
|
||||||
@ -644,7 +616,7 @@ static int dct_max8x8_c(MpegEncContext *s, uint8_t *src1,
|
|||||||
|
|
||||||
assert(h == 8);
|
assert(h == 8);
|
||||||
|
|
||||||
s->dsp.diff_pixels(temp, src1, src2, stride);
|
s->pdsp.diff_pixels(temp, src1, src2, stride);
|
||||||
s->fdsp.fdct(temp);
|
s->fdsp.fdct(temp);
|
||||||
|
|
||||||
for (i = 0; i < 64; i++)
|
for (i = 0; i < 64; i++)
|
||||||
@ -663,7 +635,7 @@ static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1,
|
|||||||
assert(h == 8);
|
assert(h == 8);
|
||||||
s->mb_intra = 0;
|
s->mb_intra = 0;
|
||||||
|
|
||||||
s->dsp.diff_pixels(temp, src1, src2, stride);
|
s->pdsp.diff_pixels(temp, src1, src2, stride);
|
||||||
|
|
||||||
memcpy(bak, temp, 64 * sizeof(int16_t));
|
memcpy(bak, temp, 64 * sizeof(int16_t));
|
||||||
|
|
||||||
@ -694,7 +666,7 @@ static int rd8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
|
|||||||
copy_block8(lsrc1, src1, 8, stride, 8);
|
copy_block8(lsrc1, src1, 8, stride, 8);
|
||||||
copy_block8(lsrc2, src2, 8, stride, 8);
|
copy_block8(lsrc2, src2, 8, stride, 8);
|
||||||
|
|
||||||
s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
|
s->pdsp.diff_pixels(temp, lsrc1, lsrc2, 8);
|
||||||
|
|
||||||
s->block_last_index[0 /* FIXME */] =
|
s->block_last_index[0 /* FIXME */] =
|
||||||
last =
|
last =
|
||||||
@ -766,7 +738,7 @@ static int bit8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
|
|||||||
|
|
||||||
assert(h == 8);
|
assert(h == 8);
|
||||||
|
|
||||||
s->dsp.diff_pixels(temp, src1, src2, stride);
|
s->pdsp.diff_pixels(temp, src1, src2, stride);
|
||||||
|
|
||||||
s->block_last_index[0 /* FIXME */] =
|
s->block_last_index[0 /* FIXME */] =
|
||||||
last =
|
last =
|
||||||
@ -932,8 +904,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
|
|||||||
{
|
{
|
||||||
const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
|
const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
|
||||||
|
|
||||||
c->diff_pixels = diff_pixels_c;
|
|
||||||
|
|
||||||
c->sum_abs_dctelem = sum_abs_dctelem_c;
|
c->sum_abs_dctelem = sum_abs_dctelem_c;
|
||||||
|
|
||||||
/* TODO [0] 16 [1] 8 */
|
/* TODO [0] 16 [1] 8 */
|
||||||
@ -975,16 +945,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
|
|||||||
c->nsse[0] = nsse16_c;
|
c->nsse[0] = nsse16_c;
|
||||||
c->nsse[1] = nsse8_c;
|
c->nsse[1] = nsse8_c;
|
||||||
|
|
||||||
switch (avctx->bits_per_raw_sample) {
|
|
||||||
case 9:
|
|
||||||
case 10:
|
|
||||||
c->get_pixels = get_pixels_16_c;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
c->get_pixels = get_pixels_8_c;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ARCH_ARM)
|
if (ARCH_ARM)
|
||||||
ff_dsputil_init_arm(c, avctx, high_bit_depth);
|
ff_dsputil_init_arm(c, avctx, high_bit_depth);
|
||||||
if (ARCH_PPC)
|
if (ARCH_PPC)
|
||||||
|
@ -48,14 +48,6 @@ typedef int (*me_cmp_func)(struct MpegEncContext *c,
|
|||||||
* DSPContext.
|
* DSPContext.
|
||||||
*/
|
*/
|
||||||
typedef struct DSPContext {
|
typedef struct DSPContext {
|
||||||
/* pixel ops : interface with DCT */
|
|
||||||
void (*get_pixels)(int16_t *block /* align 16 */,
|
|
||||||
const uint8_t *pixels /* align 8 */,
|
|
||||||
int line_size);
|
|
||||||
void (*diff_pixels)(int16_t *block /* align 16 */,
|
|
||||||
const uint8_t *s1 /* align 8 */,
|
|
||||||
const uint8_t *s2 /* align 8 */,
|
|
||||||
int stride);
|
|
||||||
int (*sum_abs_dctelem)(int16_t *block /* align 16 */);
|
int (*sum_abs_dctelem)(int16_t *block /* align 16 */);
|
||||||
|
|
||||||
me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */
|
me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */
|
||||||
|
@ -31,6 +31,7 @@
|
|||||||
#include "dsputil.h"
|
#include "dsputil.h"
|
||||||
#include "fdctdsp.h"
|
#include "fdctdsp.h"
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
|
#include "pixblockdsp.h"
|
||||||
#include "put_bits.h"
|
#include "put_bits.h"
|
||||||
#include "dv.h"
|
#include "dv.h"
|
||||||
#include "dv_tablegen.h"
|
#include "dv_tablegen.h"
|
||||||
@ -41,6 +42,7 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx)
|
|||||||
DVVideoContext *s = avctx->priv_data;
|
DVVideoContext *s = avctx->priv_data;
|
||||||
DSPContext dsp;
|
DSPContext dsp;
|
||||||
FDCTDSPContext fdsp;
|
FDCTDSPContext fdsp;
|
||||||
|
PixblockDSPContext pdsp;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
s->sys = avpriv_dv_codec_profile(avctx);
|
s->sys = avpriv_dv_codec_profile(avctx);
|
||||||
@ -65,9 +67,10 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx)
|
|||||||
|
|
||||||
ff_dsputil_init(&dsp, avctx);
|
ff_dsputil_init(&dsp, avctx);
|
||||||
ff_fdctdsp_init(&fdsp, avctx);
|
ff_fdctdsp_init(&fdsp, avctx);
|
||||||
|
ff_pixblockdsp_init(&pdsp, avctx);
|
||||||
ff_set_cmp(&dsp, dsp.ildct_cmp, avctx->ildct_cmp);
|
ff_set_cmp(&dsp, dsp.ildct_cmp, avctx->ildct_cmp);
|
||||||
|
|
||||||
s->get_pixels = dsp.get_pixels;
|
s->get_pixels = pdsp.get_pixels;
|
||||||
s->ildct_cmp = dsp.ildct_cmp[5];
|
s->ildct_cmp = dsp.ildct_cmp[5];
|
||||||
|
|
||||||
s->fdct[0] = fdsp.fdct;
|
s->fdct[0] = fdsp.fdct;
|
||||||
|
@ -39,6 +39,7 @@
|
|||||||
#include "idctdsp.h"
|
#include "idctdsp.h"
|
||||||
#include "mpegvideodsp.h"
|
#include "mpegvideodsp.h"
|
||||||
#include "mpegvideoencdsp.h"
|
#include "mpegvideoencdsp.h"
|
||||||
|
#include "pixblockdsp.h"
|
||||||
#include "put_bits.h"
|
#include "put_bits.h"
|
||||||
#include "ratecontrol.h"
|
#include "ratecontrol.h"
|
||||||
#include "parser.h"
|
#include "parser.h"
|
||||||
@ -361,6 +362,7 @@ typedef struct MpegEncContext {
|
|||||||
IDCTDSPContext idsp;
|
IDCTDSPContext idsp;
|
||||||
MpegVideoDSPContext mdsp;
|
MpegVideoDSPContext mdsp;
|
||||||
MpegvideoEncDSPContext mpvencdsp;
|
MpegvideoEncDSPContext mpvencdsp;
|
||||||
|
PixblockDSPContext pdsp;
|
||||||
QpelDSPContext qdsp;
|
QpelDSPContext qdsp;
|
||||||
VideoDSPContext vdsp;
|
VideoDSPContext vdsp;
|
||||||
H263DSPContext h263dsp;
|
H263DSPContext h263dsp;
|
||||||
|
@ -37,7 +37,6 @@
|
|||||||
#include "libavutil/timer.h"
|
#include "libavutil/timer.h"
|
||||||
#include "avcodec.h"
|
#include "avcodec.h"
|
||||||
#include "dct.h"
|
#include "dct.h"
|
||||||
#include "dsputil.h"
|
|
||||||
#include "idctdsp.h"
|
#include "idctdsp.h"
|
||||||
#include "mpeg12.h"
|
#include "mpeg12.h"
|
||||||
#include "mpegvideo.h"
|
#include "mpegvideo.h"
|
||||||
@ -48,6 +47,7 @@
|
|||||||
#include "mpegutils.h"
|
#include "mpegutils.h"
|
||||||
#include "mjpegenc.h"
|
#include "mjpegenc.h"
|
||||||
#include "msmpeg4.h"
|
#include "msmpeg4.h"
|
||||||
|
#include "pixblockdsp.h"
|
||||||
#include "qpeldsp.h"
|
#include "qpeldsp.h"
|
||||||
#include "faandct.h"
|
#include "faandct.h"
|
||||||
#include "thread.h"
|
#include "thread.h"
|
||||||
@ -703,6 +703,7 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
|
|||||||
|
|
||||||
ff_fdctdsp_init(&s->fdsp, avctx);
|
ff_fdctdsp_init(&s->fdsp, avctx);
|
||||||
ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
|
ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
|
||||||
|
ff_pixblockdsp_init(&s->pdsp, avctx);
|
||||||
ff_qpeldsp_init(&s->qdsp);
|
ff_qpeldsp_init(&s->qdsp);
|
||||||
|
|
||||||
s->avctx->coded_frame = s->current_picture.f;
|
s->avctx->coded_frame = s->current_picture.f;
|
||||||
@ -1943,21 +1944,21 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
s->dsp.get_pixels(s->block[0], ptr_y , wrap_y);
|
s->pdsp.get_pixels(s->block[0], ptr_y, wrap_y);
|
||||||
s->dsp.get_pixels(s->block[1], ptr_y + 8 , wrap_y);
|
s->pdsp.get_pixels(s->block[1], ptr_y + 8, wrap_y);
|
||||||
s->dsp.get_pixels(s->block[2], ptr_y + dct_offset , wrap_y);
|
s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset, wrap_y);
|
||||||
s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
|
s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
|
||||||
|
|
||||||
if (s->flags & CODEC_FLAG_GRAY) {
|
if (s->flags & CODEC_FLAG_GRAY) {
|
||||||
skip_dct[4] = 1;
|
skip_dct[4] = 1;
|
||||||
skip_dct[5] = 1;
|
skip_dct[5] = 1;
|
||||||
} else {
|
} else {
|
||||||
s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
|
s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
|
||||||
s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
|
s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
|
||||||
if (!s->chroma_y_shift) { /* 422 */
|
if (!s->chroma_y_shift) { /* 422 */
|
||||||
s->dsp.get_pixels(s->block[6],
|
s->pdsp.get_pixels(s->block[6],
|
||||||
ptr_cb + (dct_offset >> 1), wrap_c);
|
ptr_cb + (dct_offset >> 1), wrap_c);
|
||||||
s->dsp.get_pixels(s->block[7],
|
s->pdsp.get_pixels(s->block[7],
|
||||||
ptr_cr + (dct_offset >> 1), wrap_c);
|
ptr_cr + (dct_offset >> 1), wrap_c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2024,23 +2025,23 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
|
s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
|
||||||
s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
|
s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
|
||||||
s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
|
s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
|
||||||
dest_y + dct_offset, wrap_y);
|
dest_y + dct_offset, wrap_y);
|
||||||
s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
|
s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
|
||||||
dest_y + dct_offset + 8, wrap_y);
|
dest_y + dct_offset + 8, wrap_y);
|
||||||
|
|
||||||
if (s->flags & CODEC_FLAG_GRAY) {
|
if (s->flags & CODEC_FLAG_GRAY) {
|
||||||
skip_dct[4] = 1;
|
skip_dct[4] = 1;
|
||||||
skip_dct[5] = 1;
|
skip_dct[5] = 1;
|
||||||
} else {
|
} else {
|
||||||
s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
|
s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
|
||||||
s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
|
s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
|
||||||
if (!s->chroma_y_shift) { /* 422 */
|
if (!s->chroma_y_shift) { /* 422 */
|
||||||
s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
|
s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
|
||||||
dest_cb + (dct_offset >> 1), wrap_c);
|
dest_cb + (dct_offset >> 1), wrap_c);
|
||||||
s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
|
s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
|
||||||
dest_cr + (dct_offset >> 1), wrap_c);
|
dest_cr + (dct_offset >> 1), wrap_c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
76
libavcodec/pixblockdsp.c
Normal file
76
libavcodec/pixblockdsp.c
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
/*
|
||||||
|
* This file is part of Libav.
|
||||||
|
*
|
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Libav is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
|
#include "avcodec.h"
|
||||||
|
#include "pixblockdsp.h"
|
||||||
|
|
||||||
|
#define BIT_DEPTH 16
|
||||||
|
#include "pixblockdsp_template.c"
|
||||||
|
#undef BIT_DEPTH
|
||||||
|
|
||||||
|
#define BIT_DEPTH 8
|
||||||
|
#include "pixblockdsp_template.c"
|
||||||
|
|
||||||
|
static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1,
|
||||||
|
const uint8_t *s2, int stride)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/* read the pixels */
|
||||||
|
for (i = 0; i < 8; i++) {
|
||||||
|
block[0] = s1[0] - s2[0];
|
||||||
|
block[1] = s1[1] - s2[1];
|
||||||
|
block[2] = s1[2] - s2[2];
|
||||||
|
block[3] = s1[3] - s2[3];
|
||||||
|
block[4] = s1[4] - s2[4];
|
||||||
|
block[5] = s1[5] - s2[5];
|
||||||
|
block[6] = s1[6] - s2[6];
|
||||||
|
block[7] = s1[7] - s2[7];
|
||||||
|
s1 += stride;
|
||||||
|
s2 += stride;
|
||||||
|
block += 8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx)
|
||||||
|
{
|
||||||
|
const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
|
||||||
|
|
||||||
|
c->diff_pixels = diff_pixels_c;
|
||||||
|
|
||||||
|
switch (avctx->bits_per_raw_sample) {
|
||||||
|
case 9:
|
||||||
|
case 10:
|
||||||
|
c->get_pixels = get_pixels_16_c;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
c->get_pixels = get_pixels_8_c;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ARCH_ARM)
|
||||||
|
ff_pixblockdsp_init_arm(c, avctx, high_bit_depth);
|
||||||
|
if (ARCH_PPC)
|
||||||
|
ff_pixblockdsp_init_ppc(c, avctx, high_bit_depth);
|
||||||
|
if (ARCH_X86)
|
||||||
|
ff_pixblockdsp_init_x86(c, avctx, high_bit_depth);
|
||||||
|
}
|
44
libavcodec/pixblockdsp.h
Normal file
44
libavcodec/pixblockdsp.h
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
/*
|
||||||
|
* This file is part of Libav.
|
||||||
|
*
|
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Libav is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef AVCODEC_PIXBLOCKDSP_H
|
||||||
|
#define AVCODEC_PIXBLOCKDSP_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "avcodec.h"
|
||||||
|
|
||||||
|
typedef struct PixblockDSPContext {
|
||||||
|
void (*get_pixels)(int16_t *block /* align 16 */,
|
||||||
|
const uint8_t *pixels /* align 8 */,
|
||||||
|
int line_size);
|
||||||
|
void (*diff_pixels)(int16_t *block /* align 16 */,
|
||||||
|
const uint8_t *s1 /* align 8 */,
|
||||||
|
const uint8_t *s2 /* align 8 */,
|
||||||
|
int stride);
|
||||||
|
} PixblockDSPContext;
|
||||||
|
|
||||||
|
void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx);
|
||||||
|
void ff_pixblockdsp_init_arm(PixblockDSPContext *c, AVCodecContext *avctx,
|
||||||
|
unsigned high_bit_depth);
|
||||||
|
void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx,
|
||||||
|
unsigned high_bit_depth);
|
||||||
|
void ff_pixblockdsp_init_x86(PixblockDSPContext *c, AVCodecContext *avctx,
|
||||||
|
unsigned high_bit_depth);
|
||||||
|
|
||||||
|
#endif /* AVCODEC_PIXBLOCKDSP_H */
|
@ -1,10 +1,4 @@
|
|||||||
/*
|
/*
|
||||||
* DSP utils
|
|
||||||
* Copyright (c) 2000, 2001 Fabrice Bellard
|
|
||||||
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
|
|
||||||
*
|
|
||||||
* gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
|
|
||||||
*
|
|
||||||
* This file is part of Libav.
|
* This file is part of Libav.
|
||||||
*
|
*
|
||||||
* Libav is free software; you can redistribute it and/or
|
* Libav is free software; you can redistribute it and/or
|
||||||
@ -22,11 +16,6 @@
|
|||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
|
||||||
* @file
|
|
||||||
* DSP utils
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "bit_depth_template.c"
|
#include "bit_depth_template.c"
|
||||||
|
|
||||||
static void FUNCC(get_pixels)(int16_t *restrict block, const uint8_t *_pixels,
|
static void FUNCC(get_pixels)(int16_t *restrict block, const uint8_t *_pixels,
|
@ -15,6 +15,7 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodsp_altivec.o
|
|||||||
OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o \
|
OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o \
|
||||||
ppc/mpegvideodsp.o
|
ppc/mpegvideodsp.o
|
||||||
OBJS-$(CONFIG_MPEGVIDEOENC) += ppc/mpegvideoencdsp.o
|
OBJS-$(CONFIG_MPEGVIDEOENC) += ppc/mpegvideoencdsp.o
|
||||||
|
OBJS-$(CONFIG_PIXBLOCKDSP) += ppc/pixblockdsp.o
|
||||||
OBJS-$(CONFIG_VIDEODSP) += ppc/videodsp_ppc.o
|
OBJS-$(CONFIG_VIDEODSP) += ppc/videodsp_ppc.o
|
||||||
OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o
|
OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o
|
||||||
|
|
||||||
|
@ -402,105 +402,6 @@ static int sse16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
|
|
||||||
int line_size)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
vector unsigned char perm = vec_lvsl(0, pixels);
|
|
||||||
const vector unsigned char zero =
|
|
||||||
(const vector unsigned char) vec_splat_u8(0);
|
|
||||||
|
|
||||||
for (i = 0; i < 8; i++) {
|
|
||||||
/* Read potentially unaligned pixels.
|
|
||||||
* We're reading 16 pixels, and actually only want 8,
|
|
||||||
* but we simply ignore the extras. */
|
|
||||||
vector unsigned char pixl = vec_ld(0, pixels);
|
|
||||||
vector unsigned char pixr = vec_ld(7, pixels);
|
|
||||||
vector unsigned char bytes = vec_perm(pixl, pixr, perm);
|
|
||||||
|
|
||||||
// Convert the bytes into shorts.
|
|
||||||
vector signed short shorts = (vector signed short) vec_mergeh(zero,
|
|
||||||
bytes);
|
|
||||||
|
|
||||||
// Save the data to the block, we assume the block is 16-byte aligned.
|
|
||||||
vec_st(shorts, i * 16, (vector signed short *) block);
|
|
||||||
|
|
||||||
pixels += line_size;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
|
|
||||||
const uint8_t *s2, int stride)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
vector unsigned char perm1 = vec_lvsl(0, s1);
|
|
||||||
vector unsigned char perm2 = vec_lvsl(0, s2);
|
|
||||||
const vector unsigned char zero =
|
|
||||||
(const vector unsigned char) vec_splat_u8(0);
|
|
||||||
vector signed short shorts1, shorts2;
|
|
||||||
|
|
||||||
for (i = 0; i < 4; i++) {
|
|
||||||
/* Read potentially unaligned pixels.
|
|
||||||
* We're reading 16 pixels, and actually only want 8,
|
|
||||||
* but we simply ignore the extras. */
|
|
||||||
vector unsigned char pixl = vec_ld(0, s1);
|
|
||||||
vector unsigned char pixr = vec_ld(15, s1);
|
|
||||||
vector unsigned char bytes = vec_perm(pixl, pixr, perm1);
|
|
||||||
|
|
||||||
// Convert the bytes into shorts.
|
|
||||||
shorts1 = (vector signed short) vec_mergeh(zero, bytes);
|
|
||||||
|
|
||||||
// Do the same for the second block of pixels.
|
|
||||||
pixl = vec_ld(0, s2);
|
|
||||||
pixr = vec_ld(15, s2);
|
|
||||||
bytes = vec_perm(pixl, pixr, perm2);
|
|
||||||
|
|
||||||
// Convert the bytes into shorts.
|
|
||||||
shorts2 = (vector signed short) vec_mergeh(zero, bytes);
|
|
||||||
|
|
||||||
// Do the subtraction.
|
|
||||||
shorts1 = vec_sub(shorts1, shorts2);
|
|
||||||
|
|
||||||
// Save the data to the block, we assume the block is 16-byte aligned.
|
|
||||||
vec_st(shorts1, 0, (vector signed short *) block);
|
|
||||||
|
|
||||||
s1 += stride;
|
|
||||||
s2 += stride;
|
|
||||||
block += 8;
|
|
||||||
|
|
||||||
/* The code below is a copy of the code above...
|
|
||||||
* This is a manual unroll. */
|
|
||||||
|
|
||||||
/* Read potentially unaligned pixels.
|
|
||||||
* We're reading 16 pixels, and actually only want 8,
|
|
||||||
* but we simply ignore the extras. */
|
|
||||||
pixl = vec_ld(0, s1);
|
|
||||||
pixr = vec_ld(15, s1);
|
|
||||||
bytes = vec_perm(pixl, pixr, perm1);
|
|
||||||
|
|
||||||
// Convert the bytes into shorts.
|
|
||||||
shorts1 = (vector signed short) vec_mergeh(zero, bytes);
|
|
||||||
|
|
||||||
// Do the same for the second block of pixels.
|
|
||||||
pixl = vec_ld(0, s2);
|
|
||||||
pixr = vec_ld(15, s2);
|
|
||||||
bytes = vec_perm(pixl, pixr, perm2);
|
|
||||||
|
|
||||||
// Convert the bytes into shorts.
|
|
||||||
shorts2 = (vector signed short) vec_mergeh(zero, bytes);
|
|
||||||
|
|
||||||
// Do the subtraction.
|
|
||||||
shorts1 = vec_sub(shorts1, shorts2);
|
|
||||||
|
|
||||||
// Save the data to the block, we assume the block is 16-byte aligned.
|
|
||||||
vec_st(shorts1, 0, (vector signed short *) block);
|
|
||||||
|
|
||||||
s1 += stride;
|
|
||||||
s2 += stride;
|
|
||||||
block += 8;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst,
|
static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst,
|
||||||
uint8_t *src, int stride, int h)
|
uint8_t *src, int stride, int h)
|
||||||
{
|
{
|
||||||
@ -854,12 +755,6 @@ av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx,
|
|||||||
c->sse[0] = sse16_altivec;
|
c->sse[0] = sse16_altivec;
|
||||||
c->sse[1] = sse8_altivec;
|
c->sse[1] = sse8_altivec;
|
||||||
|
|
||||||
c->diff_pixels = diff_pixels_altivec;
|
|
||||||
|
|
||||||
if (!high_bit_depth) {
|
|
||||||
c->get_pixels = get_pixels_altivec;
|
|
||||||
}
|
|
||||||
|
|
||||||
c->hadamard8_diff[0] = hadamard8_diff16_altivec;
|
c->hadamard8_diff[0] = hadamard8_diff16_altivec;
|
||||||
c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
|
c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
|
||||||
}
|
}
|
||||||
|
153
libavcodec/ppc/pixblockdsp.c
Normal file
153
libavcodec/ppc/pixblockdsp.c
Normal file
@ -0,0 +1,153 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2002 Brian Foley
|
||||||
|
* Copyright (c) 2002 Dieter Shirley
|
||||||
|
* Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
|
||||||
|
*
|
||||||
|
* This file is part of Libav.
|
||||||
|
*
|
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Libav is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
#if HAVE_ALTIVEC_H
|
||||||
|
#include <altivec.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
|
#include "libavutil/cpu.h"
|
||||||
|
#include "libavutil/ppc/cpu.h"
|
||||||
|
#include "libavutil/ppc/types_altivec.h"
|
||||||
|
#include "libavutil/ppc/util_altivec.h"
|
||||||
|
#include "libavcodec/avcodec.h"
|
||||||
|
#include "libavcodec/pixblockdsp.h"
|
||||||
|
|
||||||
|
#if HAVE_ALTIVEC
|
||||||
|
|
||||||
|
static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
|
||||||
|
int line_size)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
vector unsigned char perm = vec_lvsl(0, pixels);
|
||||||
|
const vector unsigned char zero =
|
||||||
|
(const vector unsigned char) vec_splat_u8(0);
|
||||||
|
|
||||||
|
for (i = 0; i < 8; i++) {
|
||||||
|
/* Read potentially unaligned pixels.
|
||||||
|
* We're reading 16 pixels, and actually only want 8,
|
||||||
|
* but we simply ignore the extras. */
|
||||||
|
vector unsigned char pixl = vec_ld(0, pixels);
|
||||||
|
vector unsigned char pixr = vec_ld(7, pixels);
|
||||||
|
vector unsigned char bytes = vec_perm(pixl, pixr, perm);
|
||||||
|
|
||||||
|
// Convert the bytes into shorts.
|
||||||
|
vector signed short shorts = (vector signed short) vec_mergeh(zero,
|
||||||
|
bytes);
|
||||||
|
|
||||||
|
// Save the data to the block, we assume the block is 16-byte aligned.
|
||||||
|
vec_st(shorts, i * 16, (vector signed short *) block);
|
||||||
|
|
||||||
|
pixels += line_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
|
||||||
|
const uint8_t *s2, int stride)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
vector unsigned char perm1 = vec_lvsl(0, s1);
|
||||||
|
vector unsigned char perm2 = vec_lvsl(0, s2);
|
||||||
|
const vector unsigned char zero =
|
||||||
|
(const vector unsigned char) vec_splat_u8(0);
|
||||||
|
vector signed short shorts1, shorts2;
|
||||||
|
|
||||||
|
for (i = 0; i < 4; i++) {
|
||||||
|
/* Read potentially unaligned pixels.
|
||||||
|
* We're reading 16 pixels, and actually only want 8,
|
||||||
|
* but we simply ignore the extras. */
|
||||||
|
vector unsigned char pixl = vec_ld(0, s1);
|
||||||
|
vector unsigned char pixr = vec_ld(15, s1);
|
||||||
|
vector unsigned char bytes = vec_perm(pixl, pixr, perm1);
|
||||||
|
|
||||||
|
// Convert the bytes into shorts.
|
||||||
|
shorts1 = (vector signed short) vec_mergeh(zero, bytes);
|
||||||
|
|
||||||
|
// Do the same for the second block of pixels.
|
||||||
|
pixl = vec_ld(0, s2);
|
||||||
|
pixr = vec_ld(15, s2);
|
||||||
|
bytes = vec_perm(pixl, pixr, perm2);
|
||||||
|
|
||||||
|
// Convert the bytes into shorts.
|
||||||
|
shorts2 = (vector signed short) vec_mergeh(zero, bytes);
|
||||||
|
|
||||||
|
// Do the subtraction.
|
||||||
|
shorts1 = vec_sub(shorts1, shorts2);
|
||||||
|
|
||||||
|
// Save the data to the block, we assume the block is 16-byte aligned.
|
||||||
|
vec_st(shorts1, 0, (vector signed short *) block);
|
||||||
|
|
||||||
|
s1 += stride;
|
||||||
|
s2 += stride;
|
||||||
|
block += 8;
|
||||||
|
|
||||||
|
/* The code below is a copy of the code above...
|
||||||
|
* This is a manual unroll. */
|
||||||
|
|
||||||
|
/* Read potentially unaligned pixels.
|
||||||
|
* We're reading 16 pixels, and actually only want 8,
|
||||||
|
* but we simply ignore the extras. */
|
||||||
|
pixl = vec_ld(0, s1);
|
||||||
|
pixr = vec_ld(15, s1);
|
||||||
|
bytes = vec_perm(pixl, pixr, perm1);
|
||||||
|
|
||||||
|
// Convert the bytes into shorts.
|
||||||
|
shorts1 = (vector signed short) vec_mergeh(zero, bytes);
|
||||||
|
|
||||||
|
// Do the same for the second block of pixels.
|
||||||
|
pixl = vec_ld(0, s2);
|
||||||
|
pixr = vec_ld(15, s2);
|
||||||
|
bytes = vec_perm(pixl, pixr, perm2);
|
||||||
|
|
||||||
|
// Convert the bytes into shorts.
|
||||||
|
shorts2 = (vector signed short) vec_mergeh(zero, bytes);
|
||||||
|
|
||||||
|
// Do the subtraction.
|
||||||
|
shorts1 = vec_sub(shorts1, shorts2);
|
||||||
|
|
||||||
|
// Save the data to the block, we assume the block is 16-byte aligned.
|
||||||
|
vec_st(shorts1, 0, (vector signed short *) block);
|
||||||
|
|
||||||
|
s1 += stride;
|
||||||
|
s2 += stride;
|
||||||
|
block += 8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* HAVE_ALTIVEC */
|
||||||
|
|
||||||
|
av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
|
||||||
|
AVCodecContext *avctx,
|
||||||
|
unsigned high_bit_depth)
|
||||||
|
{
|
||||||
|
#if HAVE_ALTIVEC
|
||||||
|
if (!PPC_ALTIVEC(av_get_cpu_flags()))
|
||||||
|
return;
|
||||||
|
|
||||||
|
c->diff_pixels = diff_pixels_altivec;
|
||||||
|
|
||||||
|
if (!high_bit_depth) {
|
||||||
|
c->get_pixels = get_pixels_altivec;
|
||||||
|
}
|
||||||
|
#endif /* HAVE_ALTIVEC */
|
||||||
|
}
|
@ -26,6 +26,7 @@ OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o \
|
|||||||
x86/mpegvideodsp.o
|
x86/mpegvideodsp.o
|
||||||
OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o \
|
OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o \
|
||||||
x86/mpegvideoencdsp_init.o
|
x86/mpegvideoencdsp_init.o
|
||||||
|
OBJS-$(CONFIG_PIXBLOCKDSP) += x86/pixblockdsp_init.o
|
||||||
OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp_init.o
|
OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp_init.o
|
||||||
OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o
|
OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o
|
||||||
OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o
|
OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o
|
||||||
@ -93,6 +94,7 @@ YASM-OBJS-$(CONFIG_HPELDSP) += x86/fpel.o \
|
|||||||
YASM-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp.o
|
YASM-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp.o
|
||||||
YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o
|
YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o
|
||||||
YASM-OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoencdsp.o
|
YASM-OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoencdsp.o
|
||||||
|
YASM-OBJS-$(CONFIG_PIXBLOCKDSP) += x86/pixblockdsp.o
|
||||||
YASM-OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp.o \
|
YASM-OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp.o \
|
||||||
x86/fpel.o \
|
x86/fpel.o \
|
||||||
x86/qpel.o
|
x86/qpel.o
|
||||||
|
@ -334,87 +334,3 @@ cglobal sse16, 5, 5, 8
|
|||||||
paddd m7, m1
|
paddd m7, m1
|
||||||
movd eax, m7 ; return value
|
movd eax, m7 ; return value
|
||||||
RET
|
RET
|
||||||
|
|
||||||
INIT_MMX mmx
|
|
||||||
; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size)
|
|
||||||
cglobal get_pixels, 3,4
|
|
||||||
movsxdifnidn r2, r2d
|
|
||||||
add r0, 128
|
|
||||||
mov r3, -128
|
|
||||||
pxor m7, m7
|
|
||||||
.loop:
|
|
||||||
mova m0, [r1]
|
|
||||||
mova m2, [r1+r2]
|
|
||||||
mova m1, m0
|
|
||||||
mova m3, m2
|
|
||||||
punpcklbw m0, m7
|
|
||||||
punpckhbw m1, m7
|
|
||||||
punpcklbw m2, m7
|
|
||||||
punpckhbw m3, m7
|
|
||||||
mova [r0+r3+ 0], m0
|
|
||||||
mova [r0+r3+ 8], m1
|
|
||||||
mova [r0+r3+16], m2
|
|
||||||
mova [r0+r3+24], m3
|
|
||||||
lea r1, [r1+r2*2]
|
|
||||||
add r3, 32
|
|
||||||
js .loop
|
|
||||||
REP_RET
|
|
||||||
|
|
||||||
INIT_XMM sse2
|
|
||||||
cglobal get_pixels, 3, 4
|
|
||||||
movsxdifnidn r2, r2d
|
|
||||||
lea r3, [r2*3]
|
|
||||||
pxor m4, m4
|
|
||||||
movh m0, [r1]
|
|
||||||
movh m1, [r1+r2]
|
|
||||||
movh m2, [r1+r2*2]
|
|
||||||
movh m3, [r1+r3]
|
|
||||||
lea r1, [r1+r2*4]
|
|
||||||
punpcklbw m0, m4
|
|
||||||
punpcklbw m1, m4
|
|
||||||
punpcklbw m2, m4
|
|
||||||
punpcklbw m3, m4
|
|
||||||
mova [r0], m0
|
|
||||||
mova [r0+0x10], m1
|
|
||||||
mova [r0+0x20], m2
|
|
||||||
mova [r0+0x30], m3
|
|
||||||
movh m0, [r1]
|
|
||||||
movh m1, [r1+r2*1]
|
|
||||||
movh m2, [r1+r2*2]
|
|
||||||
movh m3, [r1+r3]
|
|
||||||
punpcklbw m0, m4
|
|
||||||
punpcklbw m1, m4
|
|
||||||
punpcklbw m2, m4
|
|
||||||
punpcklbw m3, m4
|
|
||||||
mova [r0+0x40], m0
|
|
||||||
mova [r0+0x50], m1
|
|
||||||
mova [r0+0x60], m2
|
|
||||||
mova [r0+0x70], m3
|
|
||||||
RET
|
|
||||||
|
|
||||||
INIT_MMX mmx
|
|
||||||
; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
|
|
||||||
; int stride);
|
|
||||||
cglobal diff_pixels, 4,5
|
|
||||||
movsxdifnidn r3, r3d
|
|
||||||
pxor m7, m7
|
|
||||||
add r0, 128
|
|
||||||
mov r4, -128
|
|
||||||
.loop:
|
|
||||||
mova m0, [r1]
|
|
||||||
mova m2, [r2]
|
|
||||||
mova m1, m0
|
|
||||||
mova m3, m2
|
|
||||||
punpcklbw m0, m7
|
|
||||||
punpckhbw m1, m7
|
|
||||||
punpcklbw m2, m7
|
|
||||||
punpckhbw m3, m7
|
|
||||||
psubw m0, m2
|
|
||||||
psubw m1, m3
|
|
||||||
mova [r0+r4+0], m0
|
|
||||||
mova [r0+r4+8], m1
|
|
||||||
add r1, r3
|
|
||||||
add r2, r3
|
|
||||||
add r4, 16
|
|
||||||
jne .loop
|
|
||||||
REP_RET
|
|
||||||
|
@ -30,11 +30,6 @@
|
|||||||
#include "libavcodec/mpegvideo.h"
|
#include "libavcodec/mpegvideo.h"
|
||||||
#include "dsputil_x86.h"
|
#include "dsputil_x86.h"
|
||||||
|
|
||||||
void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size);
|
|
||||||
void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size);
|
|
||||||
void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
|
|
||||||
int stride);
|
|
||||||
|
|
||||||
#if HAVE_INLINE_ASM
|
#if HAVE_INLINE_ASM
|
||||||
|
|
||||||
static int sse8_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
|
static int sse8_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
|
||||||
@ -823,16 +818,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
|
|||||||
{
|
{
|
||||||
int cpu_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
if (EXTERNAL_MMX(cpu_flags)) {
|
|
||||||
if (!high_bit_depth)
|
|
||||||
c->get_pixels = ff_get_pixels_mmx;
|
|
||||||
c->diff_pixels = ff_diff_pixels_mmx;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (EXTERNAL_SSE2(cpu_flags))
|
|
||||||
if (!high_bit_depth)
|
|
||||||
c->get_pixels = ff_get_pixels_sse2;
|
|
||||||
|
|
||||||
#if HAVE_INLINE_ASM
|
#if HAVE_INLINE_ASM
|
||||||
if (INLINE_MMX(cpu_flags)) {
|
if (INLINE_MMX(cpu_flags)) {
|
||||||
c->sum_abs_dctelem = sum_abs_dctelem_mmx;
|
c->sum_abs_dctelem = sum_abs_dctelem_mmx;
|
||||||
|
110
libavcodec/x86/pixblockdsp.asm
Normal file
110
libavcodec/x86/pixblockdsp.asm
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
;*****************************************************************************
|
||||||
|
;* SIMD-optimized pixel operations
|
||||||
|
;*****************************************************************************
|
||||||
|
;* Copyright (c) 2000, 2001 Fabrice Bellard
|
||||||
|
;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
|
||||||
|
;*
|
||||||
|
;* This file is part of Libav.
|
||||||
|
;*
|
||||||
|
;* Libav is free software; you can redistribute it and/or
|
||||||
|
;* modify it under the terms of the GNU Lesser General Public
|
||||||
|
;* License as published by the Free Software Foundation; either
|
||||||
|
;* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
;*
|
||||||
|
;* Libav is distributed in the hope that it will be useful,
|
||||||
|
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
;* Lesser General Public License for more details.
|
||||||
|
;*
|
||||||
|
;* You should have received a copy of the GNU Lesser General Public
|
||||||
|
;* License along with Libav; if not, write to the Free Software
|
||||||
|
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
;*****************************************************************************
|
||||||
|
|
||||||
|
%include "libavutil/x86/x86util.asm"
|
||||||
|
|
||||||
|
SECTION .text
|
||||||
|
|
||||||
|
INIT_MMX mmx
|
||||||
|
; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size)
|
||||||
|
cglobal get_pixels, 3,4
|
||||||
|
movsxdifnidn r2, r2d
|
||||||
|
add r0, 128
|
||||||
|
mov r3, -128
|
||||||
|
pxor m7, m7
|
||||||
|
.loop:
|
||||||
|
mova m0, [r1]
|
||||||
|
mova m2, [r1+r2]
|
||||||
|
mova m1, m0
|
||||||
|
mova m3, m2
|
||||||
|
punpcklbw m0, m7
|
||||||
|
punpckhbw m1, m7
|
||||||
|
punpcklbw m2, m7
|
||||||
|
punpckhbw m3, m7
|
||||||
|
mova [r0+r3+ 0], m0
|
||||||
|
mova [r0+r3+ 8], m1
|
||||||
|
mova [r0+r3+16], m2
|
||||||
|
mova [r0+r3+24], m3
|
||||||
|
lea r1, [r1+r2*2]
|
||||||
|
add r3, 32
|
||||||
|
js .loop
|
||||||
|
REP_RET
|
||||||
|
|
||||||
|
INIT_XMM sse2
|
||||||
|
cglobal get_pixels, 3, 4
|
||||||
|
movsxdifnidn r2, r2d
|
||||||
|
lea r3, [r2*3]
|
||||||
|
pxor m4, m4
|
||||||
|
movh m0, [r1]
|
||||||
|
movh m1, [r1+r2]
|
||||||
|
movh m2, [r1+r2*2]
|
||||||
|
movh m3, [r1+r3]
|
||||||
|
lea r1, [r1+r2*4]
|
||||||
|
punpcklbw m0, m4
|
||||||
|
punpcklbw m1, m4
|
||||||
|
punpcklbw m2, m4
|
||||||
|
punpcklbw m3, m4
|
||||||
|
mova [r0], m0
|
||||||
|
mova [r0+0x10], m1
|
||||||
|
mova [r0+0x20], m2
|
||||||
|
mova [r0+0x30], m3
|
||||||
|
movh m0, [r1]
|
||||||
|
movh m1, [r1+r2*1]
|
||||||
|
movh m2, [r1+r2*2]
|
||||||
|
movh m3, [r1+r3]
|
||||||
|
punpcklbw m0, m4
|
||||||
|
punpcklbw m1, m4
|
||||||
|
punpcklbw m2, m4
|
||||||
|
punpcklbw m3, m4
|
||||||
|
mova [r0+0x40], m0
|
||||||
|
mova [r0+0x50], m1
|
||||||
|
mova [r0+0x60], m2
|
||||||
|
mova [r0+0x70], m3
|
||||||
|
RET
|
||||||
|
|
||||||
|
INIT_MMX mmx
|
||||||
|
; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
|
||||||
|
; int stride);
|
||||||
|
cglobal diff_pixels, 4,5
|
||||||
|
movsxdifnidn r3, r3d
|
||||||
|
pxor m7, m7
|
||||||
|
add r0, 128
|
||||||
|
mov r4, -128
|
||||||
|
.loop:
|
||||||
|
mova m0, [r1]
|
||||||
|
mova m2, [r2]
|
||||||
|
mova m1, m0
|
||||||
|
mova m3, m2
|
||||||
|
punpcklbw m0, m7
|
||||||
|
punpckhbw m1, m7
|
||||||
|
punpcklbw m2, m7
|
||||||
|
punpckhbw m3, m7
|
||||||
|
psubw m0, m2
|
||||||
|
psubw m1, m3
|
||||||
|
mova [r0+r4+0], m0
|
||||||
|
mova [r0+r4+8], m1
|
||||||
|
add r1, r3
|
||||||
|
add r2, r3
|
||||||
|
add r4, 16
|
||||||
|
jne .loop
|
||||||
|
REP_RET
|
47
libavcodec/x86/pixblockdsp_init.c
Normal file
47
libavcodec/x86/pixblockdsp_init.c
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
/*
|
||||||
|
* SIMD-optimized pixel operations
|
||||||
|
*
|
||||||
|
* This file is part of Libav.
|
||||||
|
*
|
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Libav is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
|
#include "libavutil/cpu.h"
|
||||||
|
#include "libavutil/x86/cpu.h"
|
||||||
|
#include "libavcodec/pixblockdsp.h"
|
||||||
|
|
||||||
|
void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size);
|
||||||
|
void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size);
|
||||||
|
void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
|
||||||
|
int stride);
|
||||||
|
|
||||||
|
av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c,
|
||||||
|
AVCodecContext *avctx,
|
||||||
|
unsigned high_bit_depth)
|
||||||
|
{
|
||||||
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
if (EXTERNAL_MMX(cpu_flags)) {
|
||||||
|
if (!high_bit_depth)
|
||||||
|
c->get_pixels = ff_get_pixels_mmx;
|
||||||
|
c->diff_pixels = ff_diff_pixels_mmx;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
|
if (!high_bit_depth)
|
||||||
|
c->get_pixels = ff_get_pixels_sse2;
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user