From 7eb5b20a87c1b786759de6b61cd6655edbb3e320 Mon Sep 17 00:00:00 2001 From: Christophe Gisquet Date: Thu, 5 Jun 2014 23:37:00 +0200 Subject: [PATCH] huffyuvdec: trick for plane decoding <= 14bits Refactor the code to minimize code duplication. Before: 130870 decicycles in g, 1048139 runs, 437 skips 10bits: 9.048 12bits: 10.733 After: 126960 decicycles in g, 1048136 runs, 440 skips 10bits: 8.642 12bits: 9.656 Signed-off-by: Michael Niedermayer --- libavcodec/huffyuvdec.c | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/libavcodec/huffyuvdec.c b/libavcodec/huffyuvdec.c index 6c925d35f6..bc9d038dce 100644 --- a/libavcodec/huffyuvdec.c +++ b/libavcodec/huffyuvdec.c @@ -586,15 +586,14 @@ static av_cold int decode_init_thread_copy(AVCodecContext *avctx) #define GET_VLC_DUAL(dst0, dst1, name, gb, dtable, table1, table2, \ - bits, max_depth) \ + bits, max_depth, OP) \ do { \ unsigned int index = SHOW_UBITS(name, gb, bits); \ int code, n = dtable[index][1]; \ \ if (n>0) { \ code = dtable[index][0]; \ - dst0 = code>>8; \ - dst1 = code; \ + OP(dst0, dst1, code); \ LAST_SKIP_BITS(name, gb, n); \ } else { \ int nb_bits; \ @@ -606,10 +605,12 @@ static av_cold int decode_init_thread_copy(AVCodecContext *avctx) } \ } while (0) +#define OP8bits(dst0, dst1, code) dst0 = code>>8; dst1 = code + #define READ_2PIX(dst0, dst1, plane1)\ UPDATE_CACHE(re, &s->gb); \ GET_VLC_DUAL(dst0, dst1, re, &s->gb, s->vlc[4+plane1].table, \ - s->vlc[0].table, s->vlc[plane1].table, VLC_BITS, 3) + s->vlc[0].table, s->vlc[plane1].table, VLC_BITS, 3, OP8bits) static void decode_422_bitstream(HYuvContext *s, int count) { @@ -634,24 +635,15 @@ static void decode_422_bitstream(HYuvContext *s, int count) CLOSE_READER(re, &s->gb); } -/* TODO instead of restarting the read when the code isn't in the first level - * of the joint table, jump into the 2nd level of the individual table. */ -#define READ_2PIX_PLANE(dst0, dst1, plane) \ +#define READ_2PIX_PLANE(dst0, dst1, plane, OP) \ UPDATE_CACHE(re, &s->gb); \ GET_VLC_DUAL(dst0, dst1, re, &s->gb, s->vlc[4+plane].table, \ - s->vlc[plane].table, s->vlc[plane].table, VLC_BITS, 3) + s->vlc[plane].table, s->vlc[plane].table, VLC_BITS, 3, OP) -#define READ_2PIX_PLANE14(dst0, dst1, plane){\ - int16_t code = get_vlc2(&s->gb, s->vlc[4+plane].table, VLC_BITS, 1);\ - if(code != (int16_t)0xffff){\ - dst0 = code>>8;\ - dst1 = sign_extend(code, 8);\ - }else{\ - dst0 = get_vlc2(&s->gb, s->vlc[plane].table, VLC_BITS, 3);\ - dst1 = get_vlc2(&s->gb, s->vlc[plane].table, VLC_BITS, 3);\ - }\ -} +#define OP14bits(dst0, dst1, code) dst0 = code>>8; dst1 = sign_extend(code, 8) +/* TODO instead of restarting the read when the code isn't in the first level + * of the joint table, jump into the 2nd level of the individual table. */ #define READ_2PIX_PLANE16(dst0, dst1, plane){\ dst0 = get_vlc2(&s->gb, s->vlc[plane].table, VLC_BITS, 3)<<2;\ dst0 += get_bits(&s->gb, 2);\ @@ -668,24 +660,26 @@ static void decode_plane_bitstream(HYuvContext *s, int count, int plane) OPEN_READER(re, &s->gb); if (count >= (get_bits_left(&s->gb)) / (31 * 2)) { for (i = 0; i < count && get_bits_left(&s->gb) > 0; i++) { - READ_2PIX_PLANE(s->temp[0][2 * i], s->temp[0][2 * i + 1], plane); + READ_2PIX_PLANE(s->temp[0][2 * i], s->temp[0][2 * i + 1], plane, OP8bits); } } else { for(i=0; itemp[0][2 * i], s->temp[0][2 * i + 1], plane); + READ_2PIX_PLANE(s->temp[0][2 * i], s->temp[0][2 * i + 1], plane, OP8bits); } } CLOSE_READER(re, &s->gb); } else if (s->bps <= 14) { + OPEN_READER(re, &s->gb); if (count >= (get_bits_left(&s->gb)) / (31 * 2)) { for (i = 0; i < count && get_bits_left(&s->gb) > 0; i++) { - READ_2PIX_PLANE14(s->temp16[0][2 * i], s->temp16[0][2 * i + 1], plane); + READ_2PIX_PLANE(s->temp16[0][2 * i], s->temp16[0][2 * i + 1], plane, OP14bits); } } else { for(i=0; itemp16[0][2 * i], s->temp16[0][2 * i + 1], plane); + READ_2PIX_PLANE(s->temp16[0][2 * i], s->temp16[0][2 * i + 1], plane, OP14bits); } } + CLOSE_READER(re, &s->gb); } else { if (count >= (get_bits_left(&s->gb)) / (31 * 2)) { for (i = 0; i < count && get_bits_left(&s->gb) > 0; i++) {