From 218d6844b37d339ffbf2044ad07d8be7767e2734 Mon Sep 17 00:00:00 2001 From: Ben Avison Date: Mon, 5 Aug 2013 13:12:47 +0100 Subject: [PATCH] h264dsp: Factorize code into a new function, h264_find_start_code_candidate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This performs the start code search which was previously part of h264_find_frame_end() - the most CPU intensive part of the function. By itself, this results in a performance regression: Before After Mean StdDev Mean StdDev Change Overall time 2925.6 26.2 3068.5 31.7 -4.7% but this can more than be made up for by platform-optimised implementations of the function. Signed-off-by: Martin Storsjö --- libavcodec/h264_parser.c | 27 +++------------------------ libavcodec/h264dsp.c | 29 +++++++++++++++++++++++++++++ libavcodec/h264dsp.h | 9 +++++++++ 3 files changed, 41 insertions(+), 24 deletions(-) diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c index da2a5f99db..ef5da98934 100644 --- a/libavcodec/h264_parser.c +++ b/libavcodec/h264_parser.c @@ -47,30 +47,9 @@ static int h264_find_frame_end(H264Context *h, const uint8_t *buf, for (i = 0; i < buf_size; i++) { if (state == 7) { -#if HAVE_FAST_UNALIGNED - /* we check i < buf_size instead of i + 3 / 7 because it is - * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE - * bytes at the end. - */ -#if HAVE_FAST_64BIT - while (i < buf_size && - !((~*(const uint64_t *)(buf + i) & - (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) & - 0x8080808080808080ULL)) - i += 8; -#else - while (i < buf_size && - !((~*(const uint32_t *)(buf + i) & - (*(const uint32_t *)(buf + i) - 0x01010101U)) & - 0x80808080U)) - i += 4; -#endif -#endif - for (; i < buf_size; i++) - if (!buf[i]) { - state = 2; - break; - } + i += h->h264dsp.h264_find_start_code_candidate(buf + i, buf_size - i); + if (i < buf_size) + state = 2; } else if (state <= 2) { if (buf[i] == 1) state ^= 5; // 2->7, 1->4, 0->5 diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c index 3ca6abefda..a901dbb9e1 100644 --- a/libavcodec/h264dsp.c +++ b/libavcodec/h264dsp.c @@ -53,6 +53,34 @@ #include "h264addpx_template.c" #undef BIT_DEPTH +static int h264_find_start_code_candidate_c(const uint8_t *buf, int size) +{ + int i = 0; +#if HAVE_FAST_UNALIGNED + /* we check i < size instead of i + 3 / 7 because it is + * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE + * bytes at the end. + */ +#if HAVE_FAST_64BIT + while (i < size && + !((~*(const uint64_t *)(buf + i) & + (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) & + 0x8080808080808080ULL)) + i += 8; +#else + while (i < size && + !((~*(const uint32_t *)(buf + i) & + (*(const uint32_t *)(buf + i) - 0x01010101U)) & + 0x80808080U)) + i += 4; +#endif +#endif + for (; i < size; i++) + if (!buf[i]) + break; + return i; +} + av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) { @@ -133,6 +161,7 @@ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, H264_DSP(8); break; } + c->h264_find_start_code_candidate = h264_find_start_code_candidate_c; if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth, chroma_format_idc); if (ARCH_PPC) ff_h264dsp_init_ppc(c, bit_depth, chroma_format_idc); diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h index 1f9f8fe823..6249ba70d5 100644 --- a/libavcodec/h264dsp.h +++ b/libavcodec/h264dsp.h @@ -105,6 +105,15 @@ typedef struct H264DSPContext { /* bypass-transform */ void (*h264_add_pixels8_clear)(uint8_t *dst, int16_t *block, int stride); void (*h264_add_pixels4_clear)(uint8_t *dst, int16_t *block, int stride); + + /** + * Search buf from the start for up to size bytes. Return the index + * of a zero byte, or >= size if not found. Ideally, use lookahead + * to filter out any zero bytes that are known to not be followed by + * one or more further zero bytes and a one byte. Better still, filter + * out any bytes that form the trailing_zero_8bits syntax element too. + */ + int (*h264_find_start_code_candidate)(const uint8_t *buf, int size); } H264DSPContext; void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,