From 05c04cdf54fee9332d337380fd4cd8502bdac2be Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <darkshikari@gmail.com>
Date: Thu, 12 Aug 2010 01:11:32 +0000
Subject: [PATCH] VP5/6/8: ~7% faster arithmetic decoding Grab from the
 bitstream in 16-bit chunks instead of 8-bit chunks. TODO: grab in 32-bit
 chunks on 64-bit systems.

Originally committed as revision 24783 to svn://svn.ffmpeg.org/ffmpeg/trunk
---
 libavcodec/vp56.h           | 10 +++++-----
 libavcodec/vp56rac.c        |  4 ++--
 libavcodec/x86/vp56_arith.h |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/libavcodec/vp56.h b/libavcodec/vp56.h
index 50a39f75bc..da6b1b64b8 100644
--- a/libavcodec/vp56.h
+++ b/libavcodec/vp56.h
@@ -194,8 +194,8 @@ static av_always_inline unsigned int vp56_rac_renorm(VP56RangeCoder *c)
     code_word <<= shift;
     bits       += shift;
     if(bits >= 0 && c->buffer < c->end) {
-        code_word |= *c->buffer++ << bits;
-        bits -= 8;
+        code_word |= bytestream_get_be16(&c->buffer) << bits;
+        bits -= 16;
     }
     c->bits = bits;
     return code_word;
@@ -211,7 +211,7 @@ static av_always_inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob)
 {
     unsigned int code_word = vp56_rac_renorm(c);
     unsigned int low = 1 + (((c->high - 1) * prob) >> 8);
-    unsigned int low_shift = low << 8;
+    unsigned int low_shift = low << 16;
     int bit = code_word >= low_shift;
 
     c->high = bit ? c->high - low : low;
@@ -226,7 +226,7 @@ static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int pro
 {
     unsigned long code_word = vp56_rac_renorm(c);
     unsigned low = 1 + (((c->high - 1) * prob) >> 8);
-    unsigned low_shift = low << 8;
+    unsigned low_shift = low << 16;
 
     if (code_word >= low_shift) {
         c->high     -= low;
@@ -244,7 +244,7 @@ static av_always_inline int vp56_rac_get(VP56RangeCoder *c)
     unsigned int code_word = vp56_rac_renorm(c);
     /* equiprobable */
     int low = (c->high + 1) >> 1;
-    unsigned int low_shift = low << 8;
+    unsigned int low_shift = low << 16;
     int bit = code_word >= low_shift;
     if (bit) {
         c->high   -= low;
diff --git a/libavcodec/vp56rac.c b/libavcodec/vp56rac.c
index 752a42bc6b..f11531de24 100644
--- a/libavcodec/vp56rac.c
+++ b/libavcodec/vp56rac.c
@@ -40,8 +40,8 @@ const uint8_t ff_vp56_norm_shift[256]= {
 void ff_vp56_init_range_decoder(VP56RangeCoder *c, const uint8_t *buf, int buf_size)
 {
     c->high = 255;
-    c->bits = -8;
+    c->bits = -16;
     c->buffer = buf;
     c->end = buf + buf_size;
-    c->code_word = bytestream_get_be16(&c->buffer);
+    c->code_word = bytestream_get_be24(&c->buffer);
 }
diff --git a/libavcodec/x86/vp56_arith.h b/libavcodec/x86/vp56_arith.h
index ae3bc3dc77..ddbf38b1a9 100644
--- a/libavcodec/x86/vp56_arith.h
+++ b/libavcodec/x86/vp56_arith.h
@@ -31,7 +31,7 @@ static av_always_inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob)
     unsigned int code_word = vp56_rac_renorm(c);
     unsigned int high = c->high;
     unsigned int low = 1 + (((high - 1) * prob) >> 8);
-    unsigned int low_shift = low << 8;
+    unsigned int low_shift = low << 16;
     int bit = 0;
 
     __asm__(