diff --git a/lib/common/bitstream.h b/lib/common/bitstream.h index 3a45244f8..d3873002e 100644 --- a/lib/common/bitstream.h +++ b/lib/common/bitstream.h @@ -60,6 +60,9 @@ extern "C" { # include /* support for bextr (experimental) */ #endif +#define STREAM_ACCUMULATOR_MIN_32 25 +#define STREAM_ACCUMULATOR_MIN_64 57 +#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) /*-****************************************** * bitStream encoding API (write forward) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index ec758db00..89f6575dc 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -579,9 +579,9 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) } -size_t ZSTD_compressSequences(ZSTD_CCtx* zc, +FORCE_INLINE size_t ZSTD_compressSequences_generic (ZSTD_CCtx* zc, void* dst, size_t dstCapacity, - size_t srcSize) + size_t srcSize, int const longOffsets) { const seqStore_t* seqStorePtr = &(zc->seqStore); U32 count[MaxSeq+1]; @@ -716,7 +716,18 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, if (MEM_32bits()) BIT_flushBits(&blockStream); BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); if (MEM_32bits()) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); + if (longOffsets) { + U32 const ofBits = ofCodeTable[nbSeq-1]; + int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); + BIT_flushBits(&blockStream); + } + BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, + ofBits - extraBits); + } else { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); + } BIT_flushBits(&blockStream); { size_t n; @@ -738,7 +749,17 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ - BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ + if (longOffsets) { + int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[n].offset, extraBits); + BIT_flushBits(&blockStream); /* (7)*/ + } + BIT_addBits(&blockStream, sequences[n].offset >> extraBits, + ofBits - extraBits); /* 31 */ + } else { + BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ + } BIT_flushBits(&blockStream); /* (7)*/ } } @@ -763,6 +784,16 @@ _check_compressibility: return op - ostart; } +FORCE_INLINE size_t ZSTD_compressSequences (ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + size_t srcSize) +{ + if (zc->params.cParams.windowLog > STREAM_ACCUMULATOR_MIN) { + return ZSTD_compressSequences_generic(zc, dst, dstCapacity, srcSize, 1); + } else { + return ZSTD_compressSequences_generic(zc, dst, dstCapacity, srcSize, 0); + } +} #if 0 /* for debug */ # define STORESEQ_DEBUG diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 2646c8028..e39bf42bf 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1144,7 +1144,7 @@ static size_t ZSTD_decompressSequences( } -static seq_t ZSTD_decodeSequenceLong(seqState_t* seqState) +FORCE_INLINE seq_t ZSTD_decodeSequenceLong_generic(seqState_t* seqState, int const longOffsets) { seq_t seq; @@ -1179,8 +1179,15 @@ static seq_t ZSTD_decodeSequenceLong(seqState_t* seqState) if (!ofCode) offset = 0; else { - offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); + if (longOffsets) { + int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN); + offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); + if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream); + if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); + } else { + offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); + } } if (ofCode <= 1) { @@ -1224,6 +1231,14 @@ static seq_t ZSTD_decodeSequenceLong(seqState_t* seqState) return seq; } +static seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, unsigned const windowSize) { + if (ZSTD_highbit32(windowSize) > STREAM_ACCUMULATOR_MIN) { + return ZSTD_decodeSequenceLong_generic(seqState, 1); + } else { + return ZSTD_decodeSequenceLong_generic(seqState, 0); + } +} + FORCE_INLINE size_t ZSTD_execSequenceLong(BYTE* op, BYTE* const oend, seq_t sequence, @@ -1321,6 +1336,7 @@ static size_t ZSTD_decompressSequencesLong( const BYTE* const base = (const BYTE*) (dctx->base); const BYTE* const vBase = (const BYTE*) (dctx->vBase); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + unsigned const windowSize = dctx->fParams.windowSize; int nbSeq; /* Build Decoding Tables */ @@ -1350,13 +1366,13 @@ static size_t ZSTD_decompressSequencesLong( /* prepare in advance */ for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNb