From 917fe188f15fcfd4406ff991d4bf8c60a6ccbfc2 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 31 Jul 2016 04:01:57 +0200 Subject: [PATCH] Implemented repOffset "minus 1" on ll==0 --- lib/compress/zstd_opt.h | 28 ++++++++++++++++------------ lib/decompress/zstd_decompress.c | 4 ++-- zstd_compression_format.md | 19 ++++++++----------- 3 files changed, 26 insertions(+), 25 deletions(-) diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h index 3eac1ac87..1946a3ae5 100644 --- a/lib/compress/zstd_opt.h +++ b/lib/compress/zstd_opt.h @@ -453,7 +453,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, /* check repCode */ { U32 i; - for (i=0; i last_pos || price < opt[mlen].price) @@ -544,9 +544,9 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, best_mlen = minMatch; { U32 i; - for (i=0; i litlen) { @@ -661,7 +663,8 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ rep[1] = rep[0]; rep[0] = best_off; } - if (litLength == 0 && offset<=1) offset = 1-offset; + if ((litLength == 0) & (offset==0)) offset = rep[1]; /* protection, but should never happen */ + if ((litLength == 0) & (offset<=2)) offset--; } ZSTD_LOG_ENCODE("%d/%d: ENCODE literals=%d mlen=%d off=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]); @@ -746,7 +749,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, /* check repCode */ { U32 i; - for (i=0; i litlen) { @@ -973,8 +976,9 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ if (offset != 1) rep[2] = rep[1]; rep[1] = rep[0]; rep[0] = best_off; - } - if (litLength == 0 && offset<=1) offset = 1-offset; + } + if ((litLength==0) & (offset==0)) offset = rep[1]; /* protection, but should never happen */ + if ((litLength==0) & (offset<=2)) offset --; } ZSTD_LOG_ENCODE("%d/%d: ENCODE literals=%d mlen=%d off=%d rep[0]=%d rep[1]=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep[0], (int)rep[1]); diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index e1ac20049..958d63692 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -627,9 +627,9 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState) } if (ofCode <= 1) { - if ((llCode == 0) & (offset <= 1)) offset = 1-offset; + offset += (llCode==0); if (offset) { - size_t const temp = seqState->prevOffset[offset]; + size_t const temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; seqState->prevOffset[1] = seqState->prevOffset[0]; seqState->prevOffset[0] = offset = temp; diff --git a/zstd_compression_format.md b/zstd_compression_format.md index b4f8b8af4..da5c94afd 100644 --- a/zstd_compression_format.md +++ b/zstd_compression_format.md @@ -1081,11 +1081,11 @@ As seen in [Offset Codes], the first 3 values define a repeated offset. They are sorted in recency order, with 1 meaning "most recent one". There is an exception though, when current sequence's literal length is `0`. -In which case, the first 2 values are swapped, -meaning `2` refers to the most recent offset, -while `1` refers to the second most recent offset, +In which case, repcodes are "pushed by one", +so 1 becomes 2, 2 becomes 3, +and 3 becomes "offset_1 - 1_byte". -Repeat offsets start with the following values : 1, 4 and 8 (in order). +On first block, offset history is populated by the following values : 1, 4 and 8 (in order). Then each block receives its start value from previous compressed block. Note that non-compressed blocks are skipped, @@ -1095,14 +1095,11 @@ they do not contribute to offset history. ###### Offset updates rules -When the new offset is a normal one, -offset history is simply translated by one position, -with the new offset taking first spot. +New offset take the lead in offset history, +up to its previous place if it was already present. -- When repeat offset 1 (most recent) is used, history is unmodified. -- When repeat offset 2 is used, it's swapped with offset 1. -- When repeat offset 3 is used, it takes first spot, - pushing the other ones by one position. +It means that when repeat offset 1 (most recent) is used, history is unmodified. +When repeat offset 2 is used, it's swapped with offset 1. Dictionary format