mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-06-04 05:57:49 +02:00
avcodec/vc1: Introduce fast path for unescaping bitstream buffer
Includes a checkasm test. Signed-off-by: Ben Avison <bavison@riscosopen.org> Signed-off-by: Martin Storsjö <martin@martin.st>
This commit is contained in:
parent
bd3615a81a
commit
2e26847780
@ -491,7 +491,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
|
|||||||
size = next - start - 4;
|
size = next - start - 4;
|
||||||
if (size <= 0)
|
if (size <= 0)
|
||||||
continue;
|
continue;
|
||||||
buf2_size = vc1_unescape_buffer(start + 4, size, buf2);
|
buf2_size = v->vc1dsp.vc1_unescape_buffer(start + 4, size, buf2);
|
||||||
init_get_bits(&gb, buf2, buf2_size * 8);
|
init_get_bits(&gb, buf2, buf2_size * 8);
|
||||||
switch (AV_RB32(start)) {
|
switch (AV_RB32(start)) {
|
||||||
case VC1_CODE_SEQHDR:
|
case VC1_CODE_SEQHDR:
|
||||||
@ -681,7 +681,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
|
|||||||
case VC1_CODE_FRAME:
|
case VC1_CODE_FRAME:
|
||||||
if (avctx->hwaccel)
|
if (avctx->hwaccel)
|
||||||
buf_start = start;
|
buf_start = start;
|
||||||
buf_size2 = vc1_unescape_buffer(start + 4, size, buf2);
|
buf_size2 = v->vc1dsp.vc1_unescape_buffer(start + 4, size, buf2);
|
||||||
break;
|
break;
|
||||||
case VC1_CODE_FIELD: {
|
case VC1_CODE_FIELD: {
|
||||||
int buf_size3;
|
int buf_size3;
|
||||||
@ -698,8 +698,8 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
|
|||||||
ret = AVERROR(ENOMEM);
|
ret = AVERROR(ENOMEM);
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
buf_size3 = vc1_unescape_buffer(start + 4, size,
|
buf_size3 = v->vc1dsp.vc1_unescape_buffer(start + 4, size,
|
||||||
slices[n_slices].buf);
|
slices[n_slices].buf);
|
||||||
init_get_bits(&slices[n_slices].gb, slices[n_slices].buf,
|
init_get_bits(&slices[n_slices].gb, slices[n_slices].buf,
|
||||||
buf_size3 << 3);
|
buf_size3 << 3);
|
||||||
slices[n_slices].mby_start = avctx->coded_height + 31 >> 5;
|
slices[n_slices].mby_start = avctx->coded_height + 31 >> 5;
|
||||||
@ -710,7 +710,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case VC1_CODE_ENTRYPOINT: /* it should be before frame data */
|
case VC1_CODE_ENTRYPOINT: /* it should be before frame data */
|
||||||
buf_size2 = vc1_unescape_buffer(start + 4, size, buf2);
|
buf_size2 = v->vc1dsp.vc1_unescape_buffer(start + 4, size, buf2);
|
||||||
init_get_bits(&s->gb, buf2, buf_size2 * 8);
|
init_get_bits(&s->gb, buf2, buf_size2 * 8);
|
||||||
ff_vc1_decode_entry_point(avctx, v, &s->gb);
|
ff_vc1_decode_entry_point(avctx, v, &s->gb);
|
||||||
break;
|
break;
|
||||||
@ -727,8 +727,8 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
|
|||||||
ret = AVERROR(ENOMEM);
|
ret = AVERROR(ENOMEM);
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
buf_size3 = vc1_unescape_buffer(start + 4, size,
|
buf_size3 = v->vc1dsp.vc1_unescape_buffer(start + 4, size,
|
||||||
slices[n_slices].buf);
|
slices[n_slices].buf);
|
||||||
init_get_bits(&slices[n_slices].gb, slices[n_slices].buf,
|
init_get_bits(&slices[n_slices].gb, slices[n_slices].buf,
|
||||||
buf_size3 << 3);
|
buf_size3 << 3);
|
||||||
slices[n_slices].mby_start = get_bits(&slices[n_slices].gb, 9);
|
slices[n_slices].mby_start = get_bits(&slices[n_slices].gb, 9);
|
||||||
@ -762,7 +762,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
|
|||||||
ret = AVERROR(ENOMEM);
|
ret = AVERROR(ENOMEM);
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
buf_size3 = vc1_unescape_buffer(divider + 4, buf + buf_size - divider - 4, slices[n_slices].buf);
|
buf_size3 = v->vc1dsp.vc1_unescape_buffer(divider + 4, buf + buf_size - divider - 4, slices[n_slices].buf);
|
||||||
init_get_bits(&slices[n_slices].gb, slices[n_slices].buf,
|
init_get_bits(&slices[n_slices].gb, slices[n_slices].buf,
|
||||||
buf_size3 << 3);
|
buf_size3 << 3);
|
||||||
slices[n_slices].mby_start = s->mb_height + 1 >> 1;
|
slices[n_slices].mby_start = s->mb_height + 1 >> 1;
|
||||||
@ -771,9 +771,9 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
|
|||||||
n_slices1 = n_slices - 1;
|
n_slices1 = n_slices - 1;
|
||||||
n_slices++;
|
n_slices++;
|
||||||
}
|
}
|
||||||
buf_size2 = vc1_unescape_buffer(buf, divider - buf, buf2);
|
buf_size2 = v->vc1dsp.vc1_unescape_buffer(buf, divider - buf, buf2);
|
||||||
} else {
|
} else {
|
||||||
buf_size2 = vc1_unescape_buffer(buf, buf_size, buf2);
|
buf_size2 = v->vc1dsp.vc1_unescape_buffer(buf, buf_size, buf2);
|
||||||
}
|
}
|
||||||
init_get_bits(&s->gb, buf2, buf_size2*8);
|
init_get_bits(&s->gb, buf2, buf_size2*8);
|
||||||
} else{
|
} else{
|
||||||
|
@ -34,6 +34,7 @@
|
|||||||
#include "rnd_avg.h"
|
#include "rnd_avg.h"
|
||||||
#include "vc1dsp.h"
|
#include "vc1dsp.h"
|
||||||
#include "startcode.h"
|
#include "startcode.h"
|
||||||
|
#include "vc1_common.h"
|
||||||
|
|
||||||
/* Apply overlap transform to horizontal edge */
|
/* Apply overlap transform to horizontal edge */
|
||||||
static void vc1_v_overlap_c(uint8_t *src, ptrdiff_t stride)
|
static void vc1_v_overlap_c(uint8_t *src, ptrdiff_t stride)
|
||||||
@ -1030,6 +1031,7 @@ av_cold void ff_vc1dsp_init(VC1DSPContext *dsp)
|
|||||||
#endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */
|
#endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */
|
||||||
|
|
||||||
dsp->startcode_find_candidate = ff_startcode_find_candidate_c;
|
dsp->startcode_find_candidate = ff_startcode_find_candidate_c;
|
||||||
|
dsp->vc1_unescape_buffer = vc1_unescape_buffer;
|
||||||
|
|
||||||
if (ARCH_AARCH64)
|
if (ARCH_AARCH64)
|
||||||
ff_vc1dsp_init_aarch64(dsp);
|
ff_vc1dsp_init_aarch64(dsp);
|
||||||
|
@ -80,6 +80,9 @@ typedef struct VC1DSPContext {
|
|||||||
* one or more further zero bytes and a one byte.
|
* one or more further zero bytes and a one byte.
|
||||||
*/
|
*/
|
||||||
int (*startcode_find_candidate)(const uint8_t *buf, int size);
|
int (*startcode_find_candidate)(const uint8_t *buf, int size);
|
||||||
|
|
||||||
|
/* Copy a buffer, removing startcode emulation escape bytes as we go */
|
||||||
|
int (*vc1_unescape_buffer)(const uint8_t *src, int size, uint8_t *dst);
|
||||||
} VC1DSPContext;
|
} VC1DSPContext;
|
||||||
|
|
||||||
void ff_vc1dsp_init(VC1DSPContext* c);
|
void ff_vc1dsp_init(VC1DSPContext* c);
|
||||||
|
@ -374,6 +374,70 @@ static void check_loop_filter(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define TEST_UNESCAPE \
|
||||||
|
do { \
|
||||||
|
for (int count = 100; count > 0; --count) { \
|
||||||
|
escaped_offset = rnd() & 7; \
|
||||||
|
unescaped_offset = rnd() & 7; \
|
||||||
|
escaped_len = (1u << (rnd() % 8) + 3) - (rnd() & 7); \
|
||||||
|
RANDOMIZE_BUFFER8(unescaped, UNESCAPE_BUF_SIZE); \
|
||||||
|
len0 = call_ref(escaped0 + escaped_offset, escaped_len, unescaped0 + unescaped_offset); \
|
||||||
|
len1 = call_new(escaped1 + escaped_offset, escaped_len, unescaped1 + unescaped_offset); \
|
||||||
|
if (len0 != len1 || memcmp(unescaped0, unescaped1, UNESCAPE_BUF_SIZE)) \
|
||||||
|
fail(); \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
static void check_unescape(void)
|
||||||
|
{
|
||||||
|
/* This appears to be a typical length of buffer in use */
|
||||||
|
#define LOG2_UNESCAPE_BUF_SIZE 17
|
||||||
|
#define UNESCAPE_BUF_SIZE (1u<<LOG2_UNESCAPE_BUF_SIZE)
|
||||||
|
LOCAL_ALIGNED_8(uint8_t, escaped0, [UNESCAPE_BUF_SIZE]);
|
||||||
|
LOCAL_ALIGNED_8(uint8_t, escaped1, [UNESCAPE_BUF_SIZE]);
|
||||||
|
LOCAL_ALIGNED_8(uint8_t, unescaped0, [UNESCAPE_BUF_SIZE]);
|
||||||
|
LOCAL_ALIGNED_8(uint8_t, unescaped1, [UNESCAPE_BUF_SIZE]);
|
||||||
|
|
||||||
|
VC1DSPContext h;
|
||||||
|
|
||||||
|
ff_vc1dsp_init(&h);
|
||||||
|
|
||||||
|
if (check_func(h.vc1_unescape_buffer, "vc1dsp.vc1_unescape_buffer")) {
|
||||||
|
int len0, len1, escaped_offset, unescaped_offset, escaped_len;
|
||||||
|
declare_func_emms(AV_CPU_FLAG_MMX, int, const uint8_t *, int, uint8_t *);
|
||||||
|
|
||||||
|
/* Test data which consists of escapes sequences packed as tightly as possible */
|
||||||
|
for (int x = 0; x < UNESCAPE_BUF_SIZE; ++x)
|
||||||
|
escaped1[x] = escaped0[x] = 3 * (x % 3 == 0);
|
||||||
|
TEST_UNESCAPE;
|
||||||
|
|
||||||
|
/* Test random data */
|
||||||
|
RANDOMIZE_BUFFER8(escaped, UNESCAPE_BUF_SIZE);
|
||||||
|
TEST_UNESCAPE;
|
||||||
|
|
||||||
|
/* Test data with escape sequences at random intervals */
|
||||||
|
for (int x = 0; x <= UNESCAPE_BUF_SIZE - 4;) {
|
||||||
|
int gap, gap_msb;
|
||||||
|
escaped1[x+0] = escaped0[x+0] = 0;
|
||||||
|
escaped1[x+1] = escaped0[x+1] = 0;
|
||||||
|
escaped1[x+2] = escaped0[x+2] = 3;
|
||||||
|
escaped1[x+3] = escaped0[x+3] = rnd() & 3;
|
||||||
|
gap_msb = 2u << (rnd() % 8);
|
||||||
|
gap = (rnd() &~ -gap_msb) | gap_msb;
|
||||||
|
x += gap;
|
||||||
|
}
|
||||||
|
TEST_UNESCAPE;
|
||||||
|
|
||||||
|
/* Test data which is known to contain no escape sequences */
|
||||||
|
memset(escaped0, 0xFF, UNESCAPE_BUF_SIZE);
|
||||||
|
memset(escaped1, 0xFF, UNESCAPE_BUF_SIZE);
|
||||||
|
TEST_UNESCAPE;
|
||||||
|
|
||||||
|
/* Benchmark the no-escape-sequences case */
|
||||||
|
bench_new(escaped1, UNESCAPE_BUF_SIZE, unescaped1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void checkasm_check_vc1dsp(void)
|
void checkasm_check_vc1dsp(void)
|
||||||
{
|
{
|
||||||
check_inv_trans_inplace();
|
check_inv_trans_inplace();
|
||||||
@ -382,4 +446,7 @@ void checkasm_check_vc1dsp(void)
|
|||||||
|
|
||||||
check_loop_filter();
|
check_loop_filter();
|
||||||
report("loop_filter");
|
report("loop_filter");
|
||||||
|
|
||||||
|
check_unescape();
|
||||||
|
report("unescape_buffer");
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user