1
0
mirror of https://github.com/facebook/zstd.git synced 2025-03-06 16:56:49 +02:00

Offsets >= 32MB in 32-bits mode

This commit is contained in:
Sean Purcell 2017-03-01 14:36:25 -08:00
parent 1d7f30f9d4
commit d44703d145
4 changed files with 60 additions and 10 deletions

View File

@ -60,6 +60,9 @@ extern "C" {
# include <immintrin.h> /* support for bextr (experimental) */ # include <immintrin.h> /* support for bextr (experimental) */
#endif #endif
#define STREAM_ACCUMULATOR_MIN_32 25
#define STREAM_ACCUMULATOR_MIN_64 57
#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
/*-****************************************** /*-******************************************
* bitStream encoding API (write forward) * bitStream encoding API (write forward)

View File

@ -579,9 +579,9 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
} }
size_t ZSTD_compressSequences(ZSTD_CCtx* zc, FORCE_INLINE size_t ZSTD_compressSequences_generic (ZSTD_CCtx* zc,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
size_t srcSize) size_t srcSize, int const longOffsets)
{ {
const seqStore_t* seqStorePtr = &(zc->seqStore); const seqStore_t* seqStorePtr = &(zc->seqStore);
U32 count[MaxSeq+1]; U32 count[MaxSeq+1];
@ -716,7 +716,18 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
if (MEM_32bits()) BIT_flushBits(&blockStream); if (MEM_32bits()) BIT_flushBits(&blockStream);
BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
if (MEM_32bits()) BIT_flushBits(&blockStream); if (MEM_32bits()) BIT_flushBits(&blockStream);
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); if (longOffsets) {
U32 const ofBits = ofCodeTable[nbSeq-1];
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
if (extraBits) {
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
BIT_flushBits(&blockStream);
}
BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
ofBits - extraBits);
} else {
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
}
BIT_flushBits(&blockStream); BIT_flushBits(&blockStream);
{ size_t n; { size_t n;
@ -738,7 +749,17 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ if (longOffsets) {
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
if (extraBits) {
BIT_addBits(&blockStream, sequences[n].offset, extraBits);
BIT_flushBits(&blockStream); /* (7)*/
}
BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
ofBits - extraBits); /* 31 */
} else {
BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
}
BIT_flushBits(&blockStream); /* (7)*/ BIT_flushBits(&blockStream); /* (7)*/
} } } }
@ -763,6 +784,16 @@ _check_compressibility:
return op - ostart; return op - ostart;
} }
FORCE_INLINE size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
void* dst, size_t dstCapacity,
size_t srcSize)
{
if (zc->params.cParams.windowLog > STREAM_ACCUMULATOR_MIN) {
return ZSTD_compressSequences_generic(zc, dst, dstCapacity, srcSize, 1);
} else {
return ZSTD_compressSequences_generic(zc, dst, dstCapacity, srcSize, 0);
}
}
#if 0 /* for debug */ #if 0 /* for debug */
# define STORESEQ_DEBUG # define STORESEQ_DEBUG

View File

@ -1144,7 +1144,7 @@ static size_t ZSTD_decompressSequences(
} }
static seq_t ZSTD_decodeSequenceLong(seqState_t* seqState) FORCE_INLINE seq_t ZSTD_decodeSequenceLong_generic(seqState_t* seqState, int const longOffsets)
{ {
seq_t seq; seq_t seq;
@ -1179,8 +1179,15 @@ static seq_t ZSTD_decodeSequenceLong(seqState_t* seqState)
if (!ofCode) if (!ofCode)
offset = 0; offset = 0;
else { else {
offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ if (longOffsets) {
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN);
offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
} else {
offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
}
} }
if (ofCode <= 1) { if (ofCode <= 1) {
@ -1224,6 +1231,14 @@ static seq_t ZSTD_decodeSequenceLong(seqState_t* seqState)
return seq; return seq;
} }
static seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, unsigned const windowSize) {
if (ZSTD_highbit32(windowSize) > STREAM_ACCUMULATOR_MIN) {
return ZSTD_decodeSequenceLong_generic(seqState, 1);
} else {
return ZSTD_decodeSequenceLong_generic(seqState, 0);
}
}
FORCE_INLINE FORCE_INLINE
size_t ZSTD_execSequenceLong(BYTE* op, size_t ZSTD_execSequenceLong(BYTE* op,
BYTE* const oend, seq_t sequence, BYTE* const oend, seq_t sequence,
@ -1321,6 +1336,7 @@ static size_t ZSTD_decompressSequencesLong(
const BYTE* const base = (const BYTE*) (dctx->base); const BYTE* const base = (const BYTE*) (dctx->base);
const BYTE* const vBase = (const BYTE*) (dctx->vBase); const BYTE* const vBase = (const BYTE*) (dctx->vBase);
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
unsigned const windowSize = dctx->fParams.windowSize;
int nbSeq; int nbSeq;
/* Build Decoding Tables */ /* Build Decoding Tables */
@ -1350,13 +1366,13 @@ static size_t ZSTD_decompressSequencesLong(
/* prepare in advance */ /* prepare in advance */
for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNb<seqAdvance; seqNb++) { for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNb<seqAdvance; seqNb++) {
sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState); sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, windowSize);
} }
if (seqNb<seqAdvance) return ERROR(corruption_detected); if (seqNb<seqAdvance) return ERROR(corruption_detected);
/* decode and decompress */ /* decode and decompress */
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb<nbSeq ; seqNb++) { for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb<nbSeq ; seqNb++) {
seq_t const sequence = ZSTD_decodeSequenceLong(&seqState); seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, windowSize);
size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd); size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd);
if (ZSTD_isError(oneSeqSize)) return oneSeqSize; if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
ZSTD_PREFETCH(sequence.match); ZSTD_PREFETCH(sequence.match);

View File

@ -343,7 +343,7 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output
#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
#define ZSTD_WINDOWLOG_MAX_32 25 #define ZSTD_WINDOWLOG_MAX_32 27
#define ZSTD_WINDOWLOG_MAX_64 27 #define ZSTD_WINDOWLOG_MAX_64 27
#define ZSTD_WINDOWLOG_MAX ((U32)(MEM_32bits() ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) #define ZSTD_WINDOWLOG_MAX ((U32)(MEM_32bits() ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
#define ZSTD_WINDOWLOG_MIN 10 #define ZSTD_WINDOWLOG_MIN 10