You've already forked FFmpeg
mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-10 06:10:52 +02:00
libavcodec/blockdsp : add AVX version
Also modify the required alignment, to 32 instead of 16 for several codecs Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
committed by
James Almer
parent
4590d073cc
commit
cbbec68847
@@ -54,7 +54,7 @@ typedef struct ASV1Context {
|
|||||||
int mb_height;
|
int mb_height;
|
||||||
int mb_width2;
|
int mb_width2;
|
||||||
int mb_height2;
|
int mb_height2;
|
||||||
DECLARE_ALIGNED(16, int16_t, block)[6][64];
|
DECLARE_ALIGNED(32, int16_t, block)[6][64];
|
||||||
uint16_t intra_matrix[64];
|
uint16_t intra_matrix[64];
|
||||||
int q_intra_matrix[64];
|
int q_intra_matrix[64];
|
||||||
uint8_t *bitstream_buffer;
|
uint8_t *bitstream_buffer;
|
||||||
|
@@ -813,7 +813,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
|
|||||||
int v, col[2];
|
int v, col[2];
|
||||||
const uint8_t *scan;
|
const uint8_t *scan;
|
||||||
int xoff, yoff;
|
int xoff, yoff;
|
||||||
LOCAL_ALIGNED_16(int16_t, block, [64]);
|
LOCAL_ALIGNED_32(int16_t, block, [64]);
|
||||||
LOCAL_ALIGNED_16(int32_t, dctblock, [64]);
|
LOCAL_ALIGNED_16(int32_t, dctblock, [64]);
|
||||||
int coordmap[64];
|
int coordmap[64];
|
||||||
int ybias = is_key ? -15 : 0;
|
int ybias = is_key ? -15 : 0;
|
||||||
@@ -976,7 +976,7 @@ static int bink_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
|
|||||||
uint8_t *dst, *prev, *ref_start, *ref_end;
|
uint8_t *dst, *prev, *ref_start, *ref_end;
|
||||||
int v, col[2];
|
int v, col[2];
|
||||||
const uint8_t *scan;
|
const uint8_t *scan;
|
||||||
LOCAL_ALIGNED_16(int16_t, block, [64]);
|
LOCAL_ALIGNED_32(int16_t, block, [64]);
|
||||||
LOCAL_ALIGNED_16(uint8_t, ublock, [64]);
|
LOCAL_ALIGNED_16(uint8_t, ublock, [64]);
|
||||||
LOCAL_ALIGNED_16(int32_t, dctblock, [64]);
|
LOCAL_ALIGNED_16(int32_t, dctblock, [64]);
|
||||||
int coordmap[64];
|
int coordmap[64];
|
||||||
|
@@ -74,7 +74,7 @@ typedef struct DNXHDEncContext {
|
|||||||
unsigned min_padding;
|
unsigned min_padding;
|
||||||
int intra_quant_bias;
|
int intra_quant_bias;
|
||||||
|
|
||||||
DECLARE_ALIGNED(16, int16_t, blocks)[12][64];
|
DECLARE_ALIGNED(32, int16_t, blocks)[12][64];
|
||||||
DECLARE_ALIGNED(16, uint8_t, edge_buf_y)[512]; // has to hold 16x16 uint16 when depth=10
|
DECLARE_ALIGNED(16, uint8_t, edge_buf_y)[512]; // has to hold 16x16 uint16 when depth=10
|
||||||
DECLARE_ALIGNED(16, uint8_t, edge_buf_uv)[2][512]; // has to hold 16x16 uint16_t when depth=10
|
DECLARE_ALIGNED(16, uint8_t, edge_buf_uv)[2][512]; // has to hold 16x16 uint16_t when depth=10
|
||||||
|
|
||||||
|
@@ -54,7 +54,7 @@ typedef struct MadContext {
|
|||||||
GetBitContext gb;
|
GetBitContext gb;
|
||||||
void *bitstream_buf;
|
void *bitstream_buf;
|
||||||
unsigned int bitstream_buf_size;
|
unsigned int bitstream_buf_size;
|
||||||
DECLARE_ALIGNED(16, int16_t, block)[64];
|
DECLARE_ALIGNED(32, int16_t, block)[64];
|
||||||
ScanTable scantable;
|
ScanTable scantable;
|
||||||
uint16_t quant_matrix[64];
|
uint16_t quant_matrix[64];
|
||||||
int mb_x;
|
int mb_x;
|
||||||
|
@@ -51,7 +51,7 @@ typedef struct TqiContext {
|
|||||||
uint16_t intra_matrix[64];
|
uint16_t intra_matrix[64];
|
||||||
int last_dc[3];
|
int last_dc[3];
|
||||||
|
|
||||||
DECLARE_ALIGNED(16, int16_t, block)[6][64];
|
DECLARE_ALIGNED(32, int16_t, block)[6][64];
|
||||||
} TqiContext;
|
} TqiContext;
|
||||||
|
|
||||||
static av_cold int tqi_decode_init(AVCodecContext *avctx)
|
static av_cold int tqi_decode_init(AVCodecContext *avctx)
|
||||||
|
@@ -122,7 +122,7 @@ typedef struct JPGContext {
|
|||||||
|
|
||||||
VLC dc_vlc[2], ac_vlc[2];
|
VLC dc_vlc[2], ac_vlc[2];
|
||||||
int prev_dc[3];
|
int prev_dc[3];
|
||||||
DECLARE_ALIGNED(16, int16_t, block)[6][64];
|
DECLARE_ALIGNED(32, int16_t, block)[6][64];
|
||||||
|
|
||||||
uint8_t *buf;
|
uint8_t *buf;
|
||||||
} JPGContext;
|
} JPGContext;
|
||||||
|
@@ -574,7 +574,7 @@ not_coded:
|
|||||||
|
|
||||||
static int h263_skip_b_part(MpegEncContext *s, int cbp)
|
static int h263_skip_b_part(MpegEncContext *s, int cbp)
|
||||||
{
|
{
|
||||||
LOCAL_ALIGNED_16(int16_t, dblock, [64]);
|
LOCAL_ALIGNED_32(int16_t, dblock, [64]);
|
||||||
int i, mbi;
|
int i, mbi;
|
||||||
int bli[6];
|
int bli[6];
|
||||||
|
|
||||||
|
@@ -48,7 +48,7 @@ typedef struct MDECContext {
|
|||||||
int mb_width;
|
int mb_width;
|
||||||
int mb_height;
|
int mb_height;
|
||||||
int mb_x, mb_y;
|
int mb_x, mb_y;
|
||||||
DECLARE_ALIGNED(16, int16_t, block)[6][64];
|
DECLARE_ALIGNED(32, int16_t, block)[6][64];
|
||||||
DECLARE_ALIGNED(16, uint16_t, quant_matrix)[64];
|
DECLARE_ALIGNED(16, uint16_t, quant_matrix)[64];
|
||||||
uint8_t *bitstream_buffer;
|
uint8_t *bitstream_buffer;
|
||||||
unsigned int bitstream_buffer_size;
|
unsigned int bitstream_buffer_size;
|
||||||
|
@@ -49,7 +49,7 @@ typedef struct MimicContext {
|
|||||||
|
|
||||||
ThreadFrame frames [16];
|
ThreadFrame frames [16];
|
||||||
|
|
||||||
DECLARE_ALIGNED(16, int16_t, dct_block)[64];
|
DECLARE_ALIGNED(32, int16_t, dct_block)[64];
|
||||||
|
|
||||||
GetBitContext gb;
|
GetBitContext gb;
|
||||||
ScanTable scantable;
|
ScanTable scantable;
|
||||||
|
@@ -98,7 +98,7 @@ typedef struct MJpegDecodeContext {
|
|||||||
int got_picture; ///< we found a SOF and picture is valid, too.
|
int got_picture; ///< we found a SOF and picture is valid, too.
|
||||||
int linesize[MAX_COMPONENTS]; ///< linesize << interlaced
|
int linesize[MAX_COMPONENTS]; ///< linesize << interlaced
|
||||||
int8_t *qscale_table;
|
int8_t *qscale_table;
|
||||||
DECLARE_ALIGNED(16, int16_t, block)[64];
|
DECLARE_ALIGNED(32, int16_t, block)[64];
|
||||||
int16_t (*blocks[MAX_COMPONENTS])[64]; ///< intermediate sums (progressive mode)
|
int16_t (*blocks[MAX_COMPONENTS])[64]; ///< intermediate sums (progressive mode)
|
||||||
uint8_t *last_nnz[MAX_COMPONENTS];
|
uint8_t *last_nnz[MAX_COMPONENTS];
|
||||||
uint64_t coefs_finished[MAX_COMPONENTS]; ///< bitmask of which coefs have been completely decoded (progressive mode)
|
uint64_t coefs_finished[MAX_COMPONENTS]; ///< bitmask of which coefs have been completely decoded (progressive mode)
|
||||||
|
@@ -368,7 +368,7 @@ static int decode_slice_luma(AVCodecContext *avctx, SliceContext *slice,
|
|||||||
const int16_t *qmat)
|
const int16_t *qmat)
|
||||||
{
|
{
|
||||||
ProresContext *ctx = avctx->priv_data;
|
ProresContext *ctx = avctx->priv_data;
|
||||||
LOCAL_ALIGNED_16(int16_t, blocks, [8*4*64]);
|
LOCAL_ALIGNED_32(int16_t, blocks, [8*4*64]);
|
||||||
int16_t *block;
|
int16_t *block;
|
||||||
GetBitContext gb;
|
GetBitContext gb;
|
||||||
int i, blocks_per_slice = slice->mb_count<<2;
|
int i, blocks_per_slice = slice->mb_count<<2;
|
||||||
@@ -402,7 +402,7 @@ static int decode_slice_chroma(AVCodecContext *avctx, SliceContext *slice,
|
|||||||
const int16_t *qmat, int log2_blocks_per_mb)
|
const int16_t *qmat, int log2_blocks_per_mb)
|
||||||
{
|
{
|
||||||
ProresContext *ctx = avctx->priv_data;
|
ProresContext *ctx = avctx->priv_data;
|
||||||
LOCAL_ALIGNED_16(int16_t, blocks, [8*4*64]);
|
LOCAL_ALIGNED_32(int16_t, blocks, [8*4*64]);
|
||||||
int16_t *block;
|
int16_t *block;
|
||||||
GetBitContext gb;
|
GetBitContext gb;
|
||||||
int i, j, blocks_per_slice = slice->mb_count << log2_blocks_per_mb;
|
int i, j, blocks_per_slice = slice->mb_count << log2_blocks_per_mb;
|
||||||
@@ -485,7 +485,7 @@ static void decode_slice_alpha(ProresContext *ctx,
|
|||||||
{
|
{
|
||||||
GetBitContext gb;
|
GetBitContext gb;
|
||||||
int i;
|
int i;
|
||||||
LOCAL_ALIGNED_16(int16_t, blocks, [8*4*64]);
|
LOCAL_ALIGNED_32(int16_t, blocks, [8*4*64]);
|
||||||
int16_t *block;
|
int16_t *block;
|
||||||
|
|
||||||
for (i = 0; i < blocks_per_slice<<2; i++)
|
for (i = 0; i < blocks_per_slice<<2; i++)
|
||||||
|
@@ -224,7 +224,7 @@ static inline int decode_dct_block(const SHQContext *s, GetBitContext *gb, int l
|
|||||||
{
|
{
|
||||||
const int *quant_matrix = s->quant_matrix;
|
const int *quant_matrix = s->quant_matrix;
|
||||||
const uint8_t *scantable = s->intra_scantable.permutated;
|
const uint8_t *scantable = s->intra_scantable.permutated;
|
||||||
LOCAL_ALIGNED_16(int16_t, block, [64]);
|
LOCAL_ALIGNED_32(int16_t, block, [64]);
|
||||||
int dc_offset;
|
int dc_offset;
|
||||||
|
|
||||||
s->bdsp.clear_block(block);
|
s->bdsp.clear_block(block);
|
||||||
|
@@ -51,7 +51,7 @@ typedef struct Wmv2Context {
|
|||||||
int hshift;
|
int hshift;
|
||||||
|
|
||||||
ScanTable abt_scantable[2];
|
ScanTable abt_scantable[2];
|
||||||
DECLARE_ALIGNED(16, int16_t, abt_block2)[6][64];
|
DECLARE_ALIGNED(32, int16_t, abt_block2)[6][64];
|
||||||
} Wmv2Context;
|
} Wmv2Context;
|
||||||
|
|
||||||
void ff_wmv2_common_init(Wmv2Context *w);
|
void ff_wmv2_common_init(Wmv2Context *w);
|
||||||
|
@@ -4,6 +4,8 @@
|
|||||||
;* Copyright (c) 2008 Loren Merritt
|
;* Copyright (c) 2008 Loren Merritt
|
||||||
;* Copyright (c) 2009 Fiona Glaser
|
;* Copyright (c) 2009 Fiona Glaser
|
||||||
;*
|
;*
|
||||||
|
;* AVX version by Jokyo Images
|
||||||
|
;*
|
||||||
;* This file is part of FFmpeg.
|
;* This file is part of FFmpeg.
|
||||||
;*
|
;*
|
||||||
;* FFmpeg is free software; you can redistribute it and/or
|
;* FFmpeg is free software; you can redistribute it and/or
|
||||||
@@ -39,20 +41,18 @@ cglobal clear_block, 1, 1, %1, blocks
|
|||||||
mova [blocksq+mmsize*(1+%%i)], m0
|
mova [blocksq+mmsize*(1+%%i)], m0
|
||||||
mova [blocksq+mmsize*(2+%%i)], m0
|
mova [blocksq+mmsize*(2+%%i)], m0
|
||||||
mova [blocksq+mmsize*(3+%%i)], m0
|
mova [blocksq+mmsize*(3+%%i)], m0
|
||||||
mova [blocksq+mmsize*(4+%%i)], m0
|
%assign %%i %%i+4
|
||||||
mova [blocksq+mmsize*(5+%%i)], m0
|
|
||||||
mova [blocksq+mmsize*(6+%%i)], m0
|
|
||||||
mova [blocksq+mmsize*(7+%%i)], m0
|
|
||||||
%assign %%i %%i+8
|
|
||||||
%endrep
|
%endrep
|
||||||
RET
|
RET
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
INIT_MMX mmx
|
INIT_MMX mmx
|
||||||
%define ZERO pxor
|
%define ZERO pxor
|
||||||
CLEAR_BLOCK 0, 2
|
CLEAR_BLOCK 0, 4
|
||||||
INIT_XMM sse
|
INIT_XMM sse
|
||||||
%define ZERO xorps
|
%define ZERO xorps
|
||||||
|
CLEAR_BLOCK 1, 2
|
||||||
|
INIT_YMM avx
|
||||||
CLEAR_BLOCK 1, 1
|
CLEAR_BLOCK 1, 1
|
||||||
|
|
||||||
;-----------------------------------------
|
;-----------------------------------------
|
||||||
@@ -84,3 +84,5 @@ CLEAR_BLOCKS 0
|
|||||||
INIT_XMM sse
|
INIT_XMM sse
|
||||||
%define ZERO xorps
|
%define ZERO xorps
|
||||||
CLEAR_BLOCKS 1
|
CLEAR_BLOCKS 1
|
||||||
|
INIT_YMM avx
|
||||||
|
CLEAR_BLOCKS 1
|
||||||
|
@@ -28,8 +28,10 @@
|
|||||||
|
|
||||||
void ff_clear_block_mmx(int16_t *block);
|
void ff_clear_block_mmx(int16_t *block);
|
||||||
void ff_clear_block_sse(int16_t *block);
|
void ff_clear_block_sse(int16_t *block);
|
||||||
|
void ff_clear_block_avx(int16_t *block);
|
||||||
void ff_clear_blocks_mmx(int16_t *blocks);
|
void ff_clear_blocks_mmx(int16_t *blocks);
|
||||||
void ff_clear_blocks_sse(int16_t *blocks);
|
void ff_clear_blocks_sse(int16_t *blocks);
|
||||||
|
void ff_clear_blocks_avx(int16_t *blocks);
|
||||||
|
|
||||||
av_cold void ff_blockdsp_init_x86(BlockDSPContext *c,
|
av_cold void ff_blockdsp_init_x86(BlockDSPContext *c,
|
||||||
AVCodecContext *avctx)
|
AVCodecContext *avctx)
|
||||||
@@ -50,5 +52,9 @@ av_cold void ff_blockdsp_init_x86(BlockDSPContext *c,
|
|||||||
c->clear_block = ff_clear_block_sse;
|
c->clear_block = ff_clear_block_sse;
|
||||||
c->clear_blocks = ff_clear_blocks_sse;
|
c->clear_blocks = ff_clear_blocks_sse;
|
||||||
}
|
}
|
||||||
|
if (EXTERNAL_AVX_FAST(cpu_flags)) {
|
||||||
|
c->clear_block = ff_clear_block_avx;
|
||||||
|
c->clear_blocks = ff_clear_blocks_avx;
|
||||||
|
}
|
||||||
#endif /* HAVE_X86ASM */
|
#endif /* HAVE_X86ASM */
|
||||||
}
|
}
|
||||||
|
@@ -53,8 +53,8 @@ do { \
|
|||||||
|
|
||||||
void checkasm_check_blockdsp(void)
|
void checkasm_check_blockdsp(void)
|
||||||
{
|
{
|
||||||
LOCAL_ALIGNED_16(uint16_t, buf0, [6 * 8 * 8]);
|
LOCAL_ALIGNED_32(uint16_t, buf0, [6 * 8 * 8]);
|
||||||
LOCAL_ALIGNED_16(uint16_t, buf1, [6 * 8 * 8]);
|
LOCAL_ALIGNED_32(uint16_t, buf1, [6 * 8 * 8]);
|
||||||
|
|
||||||
AVCodecContext avctx = { 0 };
|
AVCodecContext avctx = { 0 };
|
||||||
BlockDSPContext h;
|
BlockDSPContext h;
|
||||||
|
Reference in New Issue
Block a user