mirror of
https://github.com/facebook/zstd.git
synced 2025-03-06 16:56:49 +02:00
Merge pull request #2376 from senhuang42/clarify_sequence_extraction_api
Refine external ZSTD_Sequence API
This commit is contained in:
commit
c37c714ef1
@ -341,23 +341,28 @@ MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src,
|
||||
* Private declarations
|
||||
*********************************************/
|
||||
typedef struct seqDef_s {
|
||||
U32 offset;
|
||||
U32 offset; /* Offset code of the sequence */
|
||||
U16 litLength;
|
||||
U16 matchLength;
|
||||
} seqDef;
|
||||
|
||||
typedef struct {
|
||||
seqDef* sequencesStart;
|
||||
seqDef* sequences;
|
||||
seqDef* sequences; /* ptr to end of sequences */
|
||||
BYTE* litStart;
|
||||
BYTE* lit;
|
||||
BYTE* lit; /* ptr to end of literals */
|
||||
BYTE* llCode;
|
||||
BYTE* mlCode;
|
||||
BYTE* ofCode;
|
||||
size_t maxNbSeq;
|
||||
size_t maxNbLit;
|
||||
U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
|
||||
U32 longLengthPos;
|
||||
|
||||
/* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength
|
||||
* in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
|
||||
* the existing value of the litLength or matchLength by 0x10000.
|
||||
*/
|
||||
U32 longLengthID; /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */
|
||||
U32 longLengthPos; /* Index of the sequence to apply long length modification to */
|
||||
} seqStore_t;
|
||||
|
||||
typedef struct {
|
||||
|
@ -2464,17 +2464,22 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
|
||||
static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
|
||||
{
|
||||
const seqStore_t* seqStore = ZSTD_getSeqStore(zc);
|
||||
const seqDef* seqs = seqStore->sequencesStart;
|
||||
size_t seqsSize = seqStore->sequences - seqs;
|
||||
const seqDef* seqStoreSeqs = seqStore->sequencesStart;
|
||||
size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs;
|
||||
size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart);
|
||||
size_t literalsRead = 0;
|
||||
size_t lastLLSize;
|
||||
|
||||
ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];
|
||||
size_t i; size_t position; int repIdx;
|
||||
size_t i;
|
||||
int repIdx;
|
||||
|
||||
assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);
|
||||
for (i = 0, position = 0; i < seqsSize; ++i) {
|
||||
outSeqs[i].offset = seqs[i].offset;
|
||||
outSeqs[i].litLength = seqs[i].litLength;
|
||||
outSeqs[i].matchLength = seqs[i].matchLength + MINMATCH;
|
||||
/* Ensure we have enough space for last literals "sequence" */
|
||||
assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1);
|
||||
for (i = 0; i < seqStoreSeqSize; ++i) {
|
||||
outSeqs[i].litLength = seqStoreSeqs[i].litLength;
|
||||
outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH;
|
||||
|
||||
if (i == seqStore->longLengthPos) {
|
||||
if (seqStore->longLengthID == 1) {
|
||||
@ -2484,32 +2489,39 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
|
||||
}
|
||||
}
|
||||
|
||||
if (outSeqs[i].offset <= ZSTD_REP_NUM) {
|
||||
outSeqs[i].rep = outSeqs[i].offset;
|
||||
repIdx = (unsigned int)i - outSeqs[i].offset;
|
||||
if (seqStoreSeqs[i].offset <= ZSTD_REP_NUM) {
|
||||
outSeqs[i].rep = seqStoreSeqs[i].offset;
|
||||
repIdx = (unsigned int)i - seqStoreSeqs[i].offset;
|
||||
|
||||
if (outSeqs[i].litLength == 0) {
|
||||
if (outSeqs[i].offset < 3) {
|
||||
if (seqStoreSeqs[i].litLength == 0) {
|
||||
if (seqStoreSeqs[i].offset < 3) {
|
||||
--repIdx;
|
||||
} else {
|
||||
repIdx = (unsigned int)i - 1;
|
||||
}
|
||||
++outSeqs[i].rep;
|
||||
}
|
||||
assert(repIdx >= -3);
|
||||
outSeqs[i].offset = repIdx >= 0 ? outSeqs[repIdx].offset : repStartValue[-repIdx - 1];
|
||||
if (outSeqs[i].rep == 4) {
|
||||
if (outSeqs[i].rep == 3 && outSeqs[i].litLength == 0) {
|
||||
--outSeqs[i].offset;
|
||||
}
|
||||
} else {
|
||||
outSeqs[i].offset -= ZSTD_REP_NUM;
|
||||
outSeqs[i].offset = seqStoreSeqs[i].offset - ZSTD_REP_NUM;
|
||||
}
|
||||
|
||||
position += outSeqs[i].litLength;
|
||||
outSeqs[i].matchPos = (unsigned int)position;
|
||||
position += outSeqs[i].matchLength;
|
||||
literalsRead += outSeqs[i].litLength;
|
||||
}
|
||||
zc->seqCollector.seqIndex += seqsSize;
|
||||
|
||||
/* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.
|
||||
* If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker
|
||||
* for the block boundary, according to the API.
|
||||
*/
|
||||
assert(seqStoreLiteralsSize >= literalsRead);
|
||||
lastLLSize = seqStoreLiteralsSize - literalsRead;
|
||||
outSeqs[i].litLength = (U32)lastLLSize;
|
||||
outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0;
|
||||
seqStoreSeqSize++;
|
||||
|
||||
zc->seqCollector.seqIndex += seqStoreSeqSize;
|
||||
}
|
||||
|
||||
size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
|
||||
@ -2584,6 +2596,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
|
||||
|
||||
if (zc->seqCollector.collectSequences) {
|
||||
ZSTD_copyBlockSequences(zc);
|
||||
ZSTD_confirmRepcodesAndEntropyTables(zc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
52
lib/zstd.h
52
lib/zstd.h
@ -1116,21 +1116,39 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
|
||||
typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
|
||||
|
||||
typedef struct {
|
||||
unsigned int matchPos; /* Match pos in dst */
|
||||
/* If seqDef.offset > 3, then this is seqDef.offset - 3
|
||||
* If seqDef.offset < 3, then this is the corresponding repeat offset
|
||||
* But if seqDef.offset < 3 and litLength == 0, this is the
|
||||
* repeat offset before the corresponding repeat offset
|
||||
* And if seqDef.offset == 3 and litLength == 0, this is the
|
||||
* most recent repeat offset - 1
|
||||
*/
|
||||
unsigned int offset;
|
||||
unsigned int litLength; /* Literal length */
|
||||
unsigned int matchLength; /* Match length */
|
||||
/* 0 when seq not rep and seqDef.offset otherwise
|
||||
* when litLength == 0 this will be <= 4, otherwise <= 3 like normal
|
||||
*/
|
||||
unsigned int rep;
|
||||
unsigned int offset; /* The offset of the match. (NOT the same as the offset code)
|
||||
* If offset == 0 and matchLength == 0, this sequence represents the last
|
||||
* literals in the block of litLength size.
|
||||
*/
|
||||
|
||||
unsigned int litLength; /* Literal length of the sequence. */
|
||||
unsigned int matchLength; /* Match length of the sequence. */
|
||||
|
||||
/* Note: Users of this API may provide a sequence with matchLength == litLength == offset == 0.
|
||||
* In this case, we will treat the sequence as a marker for a block boundary.
|
||||
*/
|
||||
|
||||
unsigned int rep; /* Represents which repeat offset is represented by the field 'offset'.
|
||||
* Ranges from [0, 3].
|
||||
*
|
||||
* Repeat offsets are essentially previous offsets from previous sequences sorted in
|
||||
* recency order. For more detail, see doc/zstd_compression_format.md
|
||||
*
|
||||
* If rep == 0, then 'offset' does not contain a repeat offset.
|
||||
* If rep > 0:
|
||||
* If litLength != 0:
|
||||
* rep == 1 --> offset == repeat_offset_1
|
||||
* rep == 2 --> offset == repeat_offset_2
|
||||
* rep == 3 --> offset == repeat_offset_3
|
||||
* If litLength == 0:
|
||||
* rep == 1 --> offset == repeat_offset_2
|
||||
* rep == 2 --> offset == repeat_offset_3
|
||||
* rep == 3 --> offset == repeat_offset_1 - 1
|
||||
*
|
||||
* Note: This field is optional. ZSTD_getSequences() will calculate the value of
|
||||
* 'rep', but repeat offsets do not necessarily need to be calculated from an external
|
||||
* sequence provider's perspective.
|
||||
*/
|
||||
} ZSTD_Sequence;
|
||||
|
||||
typedef struct {
|
||||
@ -1276,7 +1294,9 @@ ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcS
|
||||
ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
|
||||
|
||||
/*! ZSTD_getSequences() :
|
||||
* Extract sequences from the sequence store
|
||||
* Extract sequences from the sequence store.
|
||||
* Each block will end with a dummy sequence with offset == 0, matchLength == 0, and litLength == length of last literals.
|
||||
*
|
||||
* zc can be used to insert custom compression params.
|
||||
* This function invokes ZSTD_compress2
|
||||
* @return : number of sequences extracted
|
||||
|
@ -309,22 +309,23 @@ static void FUZ_decodeSequences(BYTE* dst, ZSTD_Sequence* seqs, size_t seqsSize,
|
||||
{
|
||||
size_t i;
|
||||
size_t j;
|
||||
for(i = 0; i < seqsSize - 1; ++i) {
|
||||
assert(dst + seqs[i].litLength + seqs[i].matchLength < dst + size);
|
||||
assert(src + seqs[i].litLength + seqs[i].matchLength < src + size);
|
||||
for(i = 0; i < seqsSize; ++i) {
|
||||
assert(dst + seqs[i].litLength + seqs[i].matchLength <= dst + size);
|
||||
assert(src + seqs[i].litLength + seqs[i].matchLength <= src + size);
|
||||
|
||||
memcpy(dst, src, seqs[i].litLength);
|
||||
dst += seqs[i].litLength;
|
||||
src += seqs[i].litLength;
|
||||
size -= seqs[i].litLength;
|
||||
|
||||
for (j = 0; j < seqs[i].matchLength; ++j)
|
||||
dst[j] = dst[j - seqs[i].offset];
|
||||
dst += seqs[i].matchLength;
|
||||
src += seqs[i].matchLength;
|
||||
size -= seqs[i].matchLength;
|
||||
if (seqs[i].offset != 0) {
|
||||
for (j = 0; j < seqs[i].matchLength; ++j)
|
||||
dst[j] = dst[j - seqs[i].offset];
|
||||
dst += seqs[i].matchLength;
|
||||
src += seqs[i].matchLength;
|
||||
size -= seqs[i].matchLength;
|
||||
}
|
||||
}
|
||||
memcpy(dst, src, size);
|
||||
}
|
||||
|
||||
/*=============================================
|
||||
@ -2726,6 +2727,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
||||
ZSTD_freeCCtx(cctx);
|
||||
free(seqs);
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
/* Multiple blocks of zeros test */
|
||||
#define LONGZEROSLENGTH 1000000 /* 1MB of zeros */
|
||||
|
Loading…
x
Reference in New Issue
Block a user