From 2ed008204d5467be03a0a3af1e293b2f7038d0a0 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Mon, 11 Feb 2013 17:04:27 -0800 Subject: [PATCH] h264: Add add_pixels4/8() to h264dsp, and remove add_pixels4 from dsputil MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These functions are mostly H264-specific (the only other user I can spot is bink), and this allows us to special-case some functionality for H264. Also remove the 16-bit-coeff with >8bpp versions (unused) and merge the duplicate 32-bit-coeff for >8bpp (identical). Signed-off-by: Martin Storsjö --- libavcodec/dsputil.c | 24 +++++++++++- libavcodec/dsputil.h | 1 - libavcodec/dsputil_template.c | 42 -------------------- libavcodec/h264.c | 14 +++---- libavcodec/h264_mb_template.c | 2 +- libavcodec/h264addpx_template.c | 68 +++++++++++++++++++++++++++++++++ libavcodec/h264dsp.c | 18 +++++++++ libavcodec/h264dsp.h | 4 ++ 8 files changed, 120 insertions(+), 53 deletions(-) create mode 100644 libavcodec/h264addpx_template.c diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 30c315eab8..62fa0acbde 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -403,6 +403,26 @@ static void put_signed_pixels_clamped_c(const int16_t *block, } } +static void add_pixels8_c(uint8_t *restrict pixels, + int16_t *block, + int line_size) +{ + int i; + + for(i=0;i<8;i++) { + pixels[0] += block[0]; + pixels[1] += block[1]; + pixels[2] += block[2]; + pixels[3] += block[3]; + pixels[4] += block[4]; + pixels[5] += block[5]; + pixels[6] += block[6]; + pixels[7] += block[7]; + pixels += line_size; + block += 8; + } +} + static void add_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels, int line_size) { @@ -2678,6 +2698,8 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) c->shrink[2]= ff_shrink44; c->shrink[3]= ff_shrink88; + c->add_pixels8 = add_pixels8_c; + #define hpel_funcs(prefix, idx, num) \ c->prefix ## _pixels_tab idx [0] = prefix ## _pixels ## num ## _8_c; \ c->prefix ## _pixels_tab idx [1] = prefix ## _pixels ## num ## _x2_8_c; \ @@ -2706,8 +2728,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) c->draw_edges = FUNCC(draw_edges , depth);\ c->clear_block = FUNCC(clear_block ## dct , depth);\ c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\ - c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\ - c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\ switch (avctx->bits_per_raw_sample) { case 9: diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index d0ba95ceeb..e69ac32e6e 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -153,7 +153,6 @@ typedef struct DSPContext { void (*put_signed_pixels_clamped)(const int16_t *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); void (*add_pixels_clamped)(const int16_t *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); void (*add_pixels8)(uint8_t *pixels, int16_t *block, int line_size); - void (*add_pixels4)(uint8_t *pixels, int16_t *block, int line_size); int (*sum_abs_dctelem)(int16_t *block/*align 16*/); /** * translational global motion compensation. diff --git a/libavcodec/dsputil_template.c b/libavcodec/dsputil_template.c index 5244e20b84..ec7f9305ec 100644 --- a/libavcodec/dsputil_template.c +++ b/libavcodec/dsputil_template.c @@ -89,48 +89,6 @@ static void FUNCC(get_pixels ## suffix)(int16_t *restrict _block, \ } \ } \ \ -static void FUNCC(add_pixels8 ## suffix)(uint8_t *restrict _pixels, \ - int16_t *_block, \ - int line_size) \ -{ \ - int i; \ - pixel *restrict pixels = (pixel *restrict)_pixels; \ - dctcoef *block = (dctcoef*)_block; \ - line_size /= sizeof(pixel); \ - \ - for(i=0;i<8;i++) { \ - pixels[0] += block[0]; \ - pixels[1] += block[1]; \ - pixels[2] += block[2]; \ - pixels[3] += block[3]; \ - pixels[4] += block[4]; \ - pixels[5] += block[5]; \ - pixels[6] += block[6]; \ - pixels[7] += block[7]; \ - pixels += line_size; \ - block += 8; \ - } \ -} \ - \ -static void FUNCC(add_pixels4 ## suffix)(uint8_t *restrict _pixels, \ - int16_t *_block, \ - int line_size) \ -{ \ - int i; \ - pixel *restrict pixels = (pixel *restrict)_pixels; \ - dctcoef *block = (dctcoef*)_block; \ - line_size /= sizeof(pixel); \ - \ - for(i=0;i<4;i++) { \ - pixels[0] += block[0]; \ - pixels[1] += block[1]; \ - pixels[2] += block[2]; \ - pixels[3] += block[3]; \ - pixels += line_size; \ - block += 4; \ - } \ -} \ - \ static void FUNCC(clear_block ## suffix)(int16_t *block) \ { \ memset(block, 0, sizeof(dctcoef)*64); \ diff --git a/libavcodec/h264.c b/libavcodec/h264.c index 79e4b0baf6..37cac01af3 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -2116,7 +2116,7 @@ static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, if (IS_8x8DCT(mb_type)) { if (transform_bypass) { idct_dc_add = - idct_add = h->dsp.add_pixels8; + idct_add = h->h264dsp.h264_add_pixels8; } else { idct_dc_add = h->h264dsp.h264_idct8_dc_add; idct_add = h->h264dsp.h264_idct8_add; @@ -2141,7 +2141,7 @@ static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, } else { if (transform_bypass) { idct_dc_add = - idct_add = h->dsp.add_pixels4; + idct_add = h->h264dsp.h264_add_pixels4; } else { idct_dc_add = h->h264dsp.h264_idct_dc_add; idct_add = h->h264dsp.h264_idct_add; @@ -2238,9 +2238,9 @@ static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, for (i = 0; i < 16; i++) if (h->non_zero_count_cache[scan8[i + p * 16]] || dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256)) - h->dsp.add_pixels4(dest_y + block_offset[i], - h->mb + (i * 16 + p * 256 << pixel_shift), - linesize); + h->h264dsp.h264_add_pixels4(dest_y + block_offset[i], + h->mb + (i * 16 + p * 256 << pixel_shift), + linesize); } } else { h->h264dsp.h264_idct_add16intra(dest_y, block_offset, @@ -2251,8 +2251,8 @@ static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, } else if (h->cbp & 15) { if (transform_bypass) { const int di = IS_8x8DCT(mb_type) ? 4 : 1; - idct_add = IS_8x8DCT(mb_type) ? h->dsp.add_pixels8 - : h->dsp.add_pixels4; + idct_add = IS_8x8DCT(mb_type) ? h->h264dsp.h264_add_pixels8 + : h->h264dsp.h264_add_pixels4; for (i = 0; i < 16; i += di) if (h->non_zero_count_cache[scan8[i + p * 16]]) idct_add(dest_y + block_offset[i], diff --git a/libavcodec/h264_mb_template.c b/libavcodec/h264_mb_template.c index 7ff7edd2cd..fdefed4304 100644 --- a/libavcodec/h264_mb_template.c +++ b/libavcodec/h264_mb_template.c @@ -207,7 +207,7 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h) h->mb + (16 * 16 * 2 << PIXEL_SHIFT), uvlinesize); } else { - idct_add = h->dsp.add_pixels4; + idct_add = h->h264dsp.h264_add_pixels4; for (j = 1; j < 3; j++) { for (i = j * 16; i < j * 16 + 4; i++) if (h->non_zero_count_cache[scan8[i]] || diff --git a/libavcodec/h264addpx_template.c b/libavcodec/h264addpx_template.c new file mode 100644 index 0000000000..d1babc54e5 --- /dev/null +++ b/libavcodec/h264addpx_template.c @@ -0,0 +1,68 @@ +/* + * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder + * Copyright (c) 2003-2011 Michael Niedermayer + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * H.264 / AVC / MPEG4 part10 DSP functions. + * @author Michael Niedermayer + */ + +#include "bit_depth_template.c" + +static void FUNCC(ff_h264_add_pixels4)(uint8_t *_dst, int16_t *_src, int stride) +{ + int i; + pixel *dst = (pixel *) _dst; + dctcoef *src = (dctcoef *) _src; + stride /= sizeof(pixel); + + for (i = 0; i < 4; i++) { + dst[0] += src[0]; + dst[1] += src[1]; + dst[2] += src[2]; + dst[3] += src[3]; + + dst += stride; + src += 4; + } +} + +static void FUNCC(ff_h264_add_pixels8)(uint8_t *_dst, int16_t *_src, int stride) +{ + int i; + pixel *dst = (pixel *) _dst; + dctcoef *src = (dctcoef *) _src; + stride /= sizeof(pixel); + + for (i = 0; i < 8; i++) { + dst[0] += src[0]; + dst[1] += src[1]; + dst[2] += src[2]; + dst[3] += src[3]; + dst[4] += src[4]; + dst[5] += src[5]; + dst[6] += src[6]; + dst[7] += src[7]; + + dst += stride; + src += 8; + } +} diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c index d29c685008..b739758e24 100644 --- a/libavcodec/h264dsp.c +++ b/libavcodec/h264dsp.c @@ -43,11 +43,29 @@ #include "h264dsp_template.c" #undef BIT_DEPTH +#define BIT_DEPTH 8 +#include "h264addpx_template.c" +#undef BIT_DEPTH + +#define BIT_DEPTH 16 +#include "h264addpx_template.c" +#undef BIT_DEPTH + void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) { #undef FUNC #define FUNC(a, depth) a ## _ ## depth ## _c +#define ADDPX_DSP(depth) \ + c->h264_add_pixels4 = FUNC(ff_h264_add_pixels4, depth);\ + c->h264_add_pixels8 = FUNC(ff_h264_add_pixels8, depth) + + if (bit_depth > 8 && bit_depth <= 16) { + ADDPX_DSP(16); + } else { + ADDPX_DSP(8); + } + #define H264_DSP(depth) \ c->h264_idct_add= FUNC(ff_h264_idct_add, depth);\ c->h264_idct8_add= FUNC(ff_h264_idct8_add, depth);\ diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h index c6bcd9e214..757caf8162 100644 --- a/libavcodec/h264dsp.h +++ b/libavcodec/h264dsp.h @@ -101,6 +101,10 @@ typedef struct H264DSPContext { void (*h264_luma_dc_dequant_idct)(int16_t *output, int16_t *input /*align 16*/, int qmul); void (*h264_chroma_dc_dequant_idct)(int16_t *block, int qmul); + + /* bypass-transform */ + void (*h264_add_pixels8)(uint8_t *dst, int16_t *block, int stride); + void (*h264_add_pixels4)(uint8_t *dst, int16_t *block, int stride); } H264DSPContext; void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,