mirror of
https://github.com/facebook/zstd.git
synced 2025-03-06 08:49:28 +02:00
improved speed of the Sequences converter
This commit is contained in:
parent
95ad9e47ff
commit
12c47d3262
@ -1084,7 +1084,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
|
||||
*
|
||||
* Note: This field is optional. ZSTD_generateSequences() will calculate the value of
|
||||
* 'rep', but repeat offsets do not necessarily need to be calculated from an external
|
||||
* sequence provider's perspective. For example, ZSTD_compressSequences() does not
|
||||
* sequence provider perspective. For example, ZSTD_compressSequences() does not
|
||||
* use this 'rep' field at all (as of now).
|
||||
*/
|
||||
} ZSTD_Sequence;
|
||||
@ -1331,7 +1331,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr,
|
||||
<pre><b>ZSTD_DEPRECATED("For debugging only, will be replaced by ZSTD_extractSequences()")
|
||||
ZSTDLIB_STATIC_API size_t
|
||||
ZSTD_generateSequences(ZSTD_CCtx* zc,
|
||||
ZSTD_Sequence* outSeqs, size_t outSeqsSize,
|
||||
ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
|
||||
const void* src, size_t srcSize);
|
||||
</b><p> WARNING: This function is meant for debugging and informational purposes ONLY!
|
||||
Its implementation is flawed, and it will be deleted in a future version.
|
||||
@ -1345,7 +1345,7 @@ ZSTD_generateSequences(ZSTD_CCtx* zc,
|
||||
@param zc The compression context to be used for ZSTD_compress2(). Set any
|
||||
compression parameters you need on this context.
|
||||
@param outSeqs The output sequences buffer of size @p outSeqsSize
|
||||
@param outSeqsSize The size of the output sequences buffer.
|
||||
@param outSeqsCapacity The size of the output sequences buffer.
|
||||
ZSTD_sequenceBound(srcSize) is an upper bound on the number
|
||||
of sequences that can be generated.
|
||||
@param src The source buffer to generate sequences from of size @p srcSize.
|
||||
@ -1392,11 +1392,17 @@ ZSTD_compressSequences(ZSTD_CCtx* cctx,
|
||||
the block size derived from the cctx, and sequences may be split. This is the default setting.
|
||||
|
||||
If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain
|
||||
block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
|
||||
valid block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
|
||||
|
||||
If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined
|
||||
behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
|
||||
specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
|
||||
When ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, it's possible to decide generating repcodes
|
||||
using the advanced parameter ZSTD_c_repcodeResolution. Repcodes will improve compression ratio, though the benefit
|
||||
can vary greatly depending on Sequences. On the other hand, repcode resolution is an expensive operation.
|
||||
By default, it's disabled at low (<10) compression levels, and enabled above the threshold (>=10).
|
||||
ZSTD_c_repcodeResolution makes it possible to directly manage this processing in either direction.
|
||||
|
||||
If ZSTD_c_validateSequences == 0, this function blindly accepts the Sequences provided. Invalid Sequences cause undefined
|
||||
behavior. If ZSTD_c_validateSequences == 1, then the function will detect invalid Sequences (see doc/zstd_compression_format.md for
|
||||
specifics regarding offset/matchlength requirements) and then bail out and return an error.
|
||||
|
||||
In addition to the two adjustable experimental params, there are other important cctx params.
|
||||
- ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN.
|
||||
@ -1414,19 +1420,21 @@ ZSTD_compressSequences(ZSTD_CCtx* cctx,
|
||||
<pre><b>ZSTDLIB_STATIC_API size_t
|
||||
ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
|
||||
const void* literals, size_t litSize, size_t srcSize);
|
||||
const ZSTD_Sequence* inSeqs, size_t nbSequences,
|
||||
const void* literals, size_t litSize);
|
||||
</b><p> This is a variant of ZSTD_compressSequences() which,
|
||||
instead of receiving (src,srcSize) as input parameter, receives (literals,litSize),
|
||||
aka all literals already extracted and laid out into a single continuous buffer.
|
||||
aka all the literals, already extracted and laid out into a single continuous buffer.
|
||||
This can be useful if the process generating the sequences also happens to generate the buffer of literals,
|
||||
thus skipping an extraction + caching stage.
|
||||
It's essentially a speed optimization when the right conditions are met,
|
||||
but it also is restricted by the following limitations:
|
||||
It's a speed optimization, useful when the right conditions are met,
|
||||
but it also features the following limitations:
|
||||
- Only supports explicit delimiter mode
|
||||
- Not compatible with frame checksum, which must disabled
|
||||
- Can fail when unable to compress sufficiently
|
||||
Also, to be valid, @litSize must be equal to the sum of all @.litLength fields in @inSeqs.
|
||||
- Does not write the content size in frame header
|
||||
- If any block is incompressible, will fail and return an error
|
||||
- @litSize must be == sum of all @.litLength fields in @inSeqs. Any discrepancy will generate an error.
|
||||
- the buffer @literals must be larger than @litSize by at least 8 bytes.
|
||||
@return : final compressed size, or a ZSTD error code.
|
||||
|
||||
</p></pre><BR>
|
||||
|
@ -2207,7 +2207,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
||||
zc->appliedParams.fParams.contentSizeFlag = 0;
|
||||
DEBUGLOG(4, "pledged content size : %u ; flag : %u",
|
||||
(unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag);
|
||||
zc->blockSize = blockSize;
|
||||
zc->blockSizeMax = blockSize;
|
||||
|
||||
XXH64_reset(&zc->xxhState, 0);
|
||||
zc->stage = ZSTDcs_init;
|
||||
@ -4293,8 +4293,8 @@ ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc,
|
||||
lastBlock, 0 /* isPartition */);
|
||||
FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!");
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits");
|
||||
assert(zc->blockSize <= ZSTD_BLOCKSIZE_MAX);
|
||||
assert(cSizeSingleBlock <= zc->blockSize + ZSTD_blockHeaderSize);
|
||||
assert(zc->blockSizeMax <= ZSTD_BLOCKSIZE_MAX);
|
||||
assert(cSizeSingleBlock <= zc->blockSizeMax + ZSTD_blockHeaderSize);
|
||||
return cSizeSingleBlock;
|
||||
}
|
||||
|
||||
@ -4328,7 +4328,7 @@ ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc,
|
||||
dstCapacity -= cSizeChunk;
|
||||
cSize += cSizeChunk;
|
||||
*currSeqStore = *nextSeqStore;
|
||||
assert(cSizeChunk <= zc->blockSize + ZSTD_blockHeaderSize);
|
||||
assert(cSizeChunk <= zc->blockSizeMax + ZSTD_blockHeaderSize);
|
||||
}
|
||||
/* cRep and dRep may have diverged during the compression.
|
||||
* If so, we use the dRep repcodes for the next block.
|
||||
@ -4580,7 +4580,7 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
|
||||
const void* src, size_t srcSize,
|
||||
U32 lastFrameChunk)
|
||||
{
|
||||
size_t blockSizeMax = cctx->blockSize;
|
||||
size_t blockSizeMax = cctx->blockSizeMax;
|
||||
size_t remaining = srcSize;
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
@ -4816,7 +4816,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
|
||||
src, (BYTE const*)src + srcSize);
|
||||
}
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
|
||||
DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSizeMax);
|
||||
{ size_t const cSize = frame ?
|
||||
ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
|
||||
ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */);
|
||||
@ -6070,11 +6070,11 @@ size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
|
||||
static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx)
|
||||
{
|
||||
if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
|
||||
return cctx->blockSize - cctx->stableIn_notConsumed;
|
||||
return cctx->blockSizeMax - cctx->stableIn_notConsumed;
|
||||
}
|
||||
assert(cctx->appliedParams.inBufferMode == ZSTD_bm_buffered);
|
||||
{ size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos;
|
||||
if (hintInSize==0) hintInSize = cctx->blockSize;
|
||||
if (hintInSize==0) hintInSize = cctx->blockSizeMax;
|
||||
return hintInSize;
|
||||
}
|
||||
}
|
||||
@ -6162,7 +6162,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
||||
} else {
|
||||
assert(zcs->appliedParams.inBufferMode == ZSTD_bm_stable);
|
||||
if ( (flushMode == ZSTD_e_continue)
|
||||
&& ( (size_t)(iend - ip) < zcs->blockSize) ) {
|
||||
&& ( (size_t)(iend - ip) < zcs->blockSizeMax) ) {
|
||||
/* can't compress a full block : stop here */
|
||||
zcs->stableIn_notConsumed = (size_t)(iend - ip);
|
||||
ip = iend; /* pretend to have consumed input */
|
||||
@ -6181,7 +6181,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
||||
size_t cSize;
|
||||
size_t oSize = (size_t)(oend-op);
|
||||
size_t const iSize = inputBuffered ? zcs->inBuffPos - zcs->inToCompress
|
||||
: MIN((size_t)(iend - ip), zcs->blockSize);
|
||||
: MIN((size_t)(iend - ip), zcs->blockSizeMax);
|
||||
if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)
|
||||
cDst = op; /* compress into output buffer, to skip flush stage */
|
||||
else
|
||||
@ -6196,9 +6196,9 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
|
||||
FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
|
||||
zcs->frameEnded = lastBlock;
|
||||
/* prepare next block */
|
||||
zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
|
||||
zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSizeMax;
|
||||
if (zcs->inBuffTarget > zcs->inBuffSize)
|
||||
zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;
|
||||
zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSizeMax;
|
||||
DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
|
||||
(unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize);
|
||||
if (!lastBlock)
|
||||
@ -6413,7 +6413,7 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
|
||||
/* for small input: avoid automatic flush on reaching end of block, since
|
||||
* it would require to add a 3-bytes null block to end frame
|
||||
*/
|
||||
cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize);
|
||||
cctx->inBuffTarget = cctx->blockSizeMax + (cctx->blockSizeMax == pledgedSrcSize);
|
||||
} else {
|
||||
cctx->inBuffTarget = 0;
|
||||
}
|
||||
@ -6951,7 +6951,7 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
|
||||
size_t compressedSeqsSize;
|
||||
size_t cBlockSize;
|
||||
size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters,
|
||||
cctx->blockSize, remaining,
|
||||
cctx->blockSizeMax, remaining,
|
||||
inSeqs, inSeqsSize, seqPos);
|
||||
U32 const lastBlock = (blockSize == remaining);
|
||||
FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size");
|
||||
@ -7093,167 +7093,112 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* cctx,
|
||||
}
|
||||
|
||||
/*
|
||||
* seqPos must end on an explicit block delimiter
|
||||
* @blockSize must be exactly correct.
|
||||
* Note: Sequence validation functionality has been disabled (removed).
|
||||
* This is helpful to find back simplicity, leading to performance.
|
||||
* It may be re-inserted later.
|
||||
*/
|
||||
FORCE_INLINE_TEMPLATE size_t
|
||||
ZSTD_convertBlockSequences_wBlockDelim_internal(ZSTD_CCtx* cctx,
|
||||
ZSTD_SequencePosition* seqPos,
|
||||
size_t ZSTD_convertBlockSequences(ZSTD_CCtx* cctx,
|
||||
const ZSTD_Sequence* const inSeqs, size_t nbSequences,
|
||||
size_t blockSize,
|
||||
int const repcodeResolution,
|
||||
int const checkSequences)
|
||||
int const repcodeResolution)
|
||||
{
|
||||
U32 idx = seqPos->idx;
|
||||
U32 const startIdx = idx;
|
||||
Repcodes_t updatedRepcodes;
|
||||
U32 dictSize = 0;
|
||||
size_t startPosInSrc = seqPos->posInSrc;
|
||||
size_t litConsumed = 0;
|
||||
size_t seqNb = 0;
|
||||
|
||||
DEBUGLOG(5, "ZSTD_transferSequencesOnly_wBlockDelim (blockSize = %zu)", blockSize);
|
||||
DEBUGLOG(5, "ZSTD_convertBlockSequences (nbSequences = %zu)", nbSequences);
|
||||
|
||||
/* dictSize is useful to check offset within Sequence validation */
|
||||
if (checkSequences) {
|
||||
if (cctx->cdict) {
|
||||
dictSize = (U32)cctx->cdict->dictContentSize;
|
||||
} else if (cctx->prefixDict.dict) {
|
||||
dictSize = (U32)cctx->prefixDict.dictSize;
|
||||
}
|
||||
}
|
||||
RETURN_ERROR_IF(nbSequences >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
|
||||
"Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
|
||||
|
||||
ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(Repcodes_t));
|
||||
|
||||
/* check end condition */
|
||||
assert(nbSequences >= 1);
|
||||
assert(inSeqs[nbSequences-1].matchLength == 0);
|
||||
assert(inSeqs[nbSequences-1].offset == 0);
|
||||
|
||||
/* Convert Sequences from public format to internal format */
|
||||
for (; idx < nbSequences && (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0); ++idx) {
|
||||
U32 const litLength = inSeqs[idx].litLength;
|
||||
U32 const matchLength = inSeqs[idx].matchLength;
|
||||
for (seqNb = 0; seqNb < nbSequences - 1 ; seqNb++) {
|
||||
U32 const litLength = inSeqs[seqNb].litLength;
|
||||
U32 const matchLength = inSeqs[seqNb].matchLength;
|
||||
U32 offBase;
|
||||
|
||||
if (!repcodeResolution) {
|
||||
offBase = OFFSET_TO_OFFBASE(inSeqs[idx].offset);
|
||||
offBase = OFFSET_TO_OFFBASE(inSeqs[seqNb].offset);
|
||||
} else {
|
||||
U32 const ll0 = (litLength == 0);
|
||||
offBase = ZSTD_finalizeOffBase(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
|
||||
offBase = ZSTD_finalizeOffBase(inSeqs[seqNb].offset, updatedRepcodes.rep, ll0);
|
||||
ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0);
|
||||
}
|
||||
|
||||
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
|
||||
if (checkSequences) {
|
||||
seqPos->posInSrc += litLength + matchLength;
|
||||
FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch,
|
||||
seqPos->posInSrc,
|
||||
cctx->appliedParams.cParams.windowLog, dictSize,
|
||||
ZSTD_hasExtSeqProd(&cctx->appliedParams)),
|
||||
"Sequence validation failed");
|
||||
}
|
||||
RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
|
||||
"Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
|
||||
ZSTD_storeSeqOnly(&cctx->seqStore, litLength, offBase, matchLength);
|
||||
litConsumed += litLength;
|
||||
}
|
||||
|
||||
/* last sequence (only literals) */
|
||||
{ size_t const lastLitLength = inSeqs[idx].litLength;
|
||||
litConsumed += lastLitLength;
|
||||
if (checkSequences) {
|
||||
seqPos->posInSrc += lastLitLength;
|
||||
/* blockSize must be exactly correct (checked before calling this function) */
|
||||
RETURN_ERROR_IF((seqPos->posInSrc - startPosInSrc) != blockSize, externalSequences_invalid, "mismatch between Sequences and specified blockSize");
|
||||
} else {
|
||||
/* blockSize presumed exactly correct (checked before calling this function) */
|
||||
seqPos->posInSrc += blockSize;
|
||||
}
|
||||
}
|
||||
litConsumed += inSeqs[nbSequences-1].litLength;
|
||||
|
||||
/* If we skipped repcode search while parsing, we need to update repcodes now */
|
||||
assert(idx >= startIdx);
|
||||
if (!repcodeResolution && idx != startIdx) {
|
||||
if (!repcodeResolution && nbSequences > 1) {
|
||||
U32* const rep = updatedRepcodes.rep;
|
||||
U32 lastSeqIdx = idx - 1; /* index of last non-block-delimiter sequence */
|
||||
|
||||
if (lastSeqIdx >= startIdx + 2) {
|
||||
if (nbSequences >= 4) {
|
||||
U32 lastSeqIdx = (U32)nbSequences - 2; /* index of last full sequence */
|
||||
rep[2] = inSeqs[lastSeqIdx - 2].offset;
|
||||
rep[1] = inSeqs[lastSeqIdx - 1].offset;
|
||||
rep[0] = inSeqs[lastSeqIdx].offset;
|
||||
} else if (lastSeqIdx == startIdx + 1) {
|
||||
} else if (nbSequences == 3) {
|
||||
rep[2] = rep[0];
|
||||
rep[1] = inSeqs[lastSeqIdx - 1].offset;
|
||||
rep[0] = inSeqs[lastSeqIdx].offset;
|
||||
rep[1] = inSeqs[0].offset;
|
||||
rep[0] = inSeqs[1].offset;
|
||||
} else {
|
||||
assert(lastSeqIdx == startIdx);
|
||||
assert(nbSequences == 2);
|
||||
rep[2] = rep[1];
|
||||
rep[1] = rep[0];
|
||||
rep[0] = inSeqs[lastSeqIdx].offset;
|
||||
rep[0] = inSeqs[0].offset;
|
||||
}
|
||||
}
|
||||
|
||||
ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(Repcodes_t));
|
||||
|
||||
seqPos->idx = idx+1;
|
||||
return litConsumed;
|
||||
}
|
||||
|
||||
/* for tests only */
|
||||
void CCTX_resetSeqStore(ZSTD_CCtx* cctx)
|
||||
{
|
||||
cctx->seqStore.sequences = cctx->seqStore.sequencesStart;
|
||||
cctx->seqStore.lit = cctx->seqStore.litStart;
|
||||
}
|
||||
|
||||
typedef size_t (*ZSTD_convertBlockSequences_f) (ZSTD_CCtx* cctx,
|
||||
ZSTD_SequencePosition* seqPos,
|
||||
const ZSTD_Sequence* const inSeqs, size_t nbSequences,
|
||||
size_t blockSize,
|
||||
int const repcodeResolution);
|
||||
|
||||
size_t
|
||||
ZSTD_convertBlockSequences_wBlockDelim(ZSTD_CCtx* cctx,
|
||||
ZSTD_SequencePosition* seqPos,
|
||||
const ZSTD_Sequence* const inSeqs, size_t nbSequences,
|
||||
size_t blockSize,
|
||||
int const repcodeResolution)
|
||||
static size_t getNbSequencesFor1Block(const ZSTD_Sequence* seqs, size_t nbSeqs)
|
||||
{
|
||||
return ZSTD_convertBlockSequences_wBlockDelim_internal(cctx,
|
||||
seqPos, inSeqs, nbSequences, blockSize,
|
||||
repcodeResolution, 0);
|
||||
size_t n;
|
||||
assert(seqs);
|
||||
for (n=0; n<nbSeqs; n++) {
|
||||
if (seqs[n].matchLength == 0) {
|
||||
assert(seqs[n].offset == 0);
|
||||
return n+1;
|
||||
}
|
||||
}
|
||||
RETURN_ERROR(externalSequences_invalid, "missing final block delimiter");
|
||||
}
|
||||
|
||||
static size_t
|
||||
ZSTD_convertBlockSequences_wBlockDelim_andCheckSequences(ZSTD_CCtx* cctx,
|
||||
ZSTD_SequencePosition* seqPos,
|
||||
const ZSTD_Sequence* const inSeqs, size_t nbSequences,
|
||||
size_t blockSize,
|
||||
int const repcodeResolution)
|
||||
{
|
||||
return ZSTD_convertBlockSequences_wBlockDelim_internal(cctx,
|
||||
seqPos, inSeqs, nbSequences, blockSize,
|
||||
repcodeResolution, 1);
|
||||
}
|
||||
|
||||
static size_t
|
||||
ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const ZSTD_Sequence* inSeqs, size_t nbSequences,
|
||||
const void* literals, size_t litSize, size_t srcSize)
|
||||
const void* literals, size_t litSize)
|
||||
{
|
||||
size_t cSize = 0;
|
||||
size_t remaining = srcSize;
|
||||
ZSTD_SequencePosition seqPos = {0, 0, 0};
|
||||
BYTE* op = (BYTE*)dst;
|
||||
int const repcodeResolution = (cctx->appliedParams.searchForExternalRepcodes == ZSTD_ps_enable);
|
||||
const ZSTD_convertBlockSequences_f convertBlockSequences = cctx->appliedParams.validateSequences ?
|
||||
ZSTD_convertBlockSequences_wBlockDelim_andCheckSequences
|
||||
: ZSTD_convertBlockSequences_wBlockDelim;
|
||||
|
||||
DEBUGLOG(4, "ZSTD_compressSequencesAndLiterals_internal: nbSeqs=%zu, litSize=%zu", nbSequences, litSize);
|
||||
if (cctx->appliedParams.blockDelimiters == ZSTD_sf_noBlockDelimiters) {
|
||||
RETURN_ERROR(GENERIC, "This mode is only compatible with explicit delimiters");
|
||||
}
|
||||
assert(cctx->appliedParams.searchForExternalRepcodes != ZSTD_ps_auto);
|
||||
|
||||
DEBUGLOG(4, "ZSTD_compressSequencesAndLiterals_internal: nbSeqs=%zu, litSize=%zu", nbSequences, litSize);
|
||||
RETURN_ERROR_IF(nbSequences == 0, externalSequences_invalid, "Requires at least 1 end-of-block");
|
||||
|
||||
/* Special case: empty frame */
|
||||
if (remaining == 0) {
|
||||
if ((nbSequences == 1) && (inSeqs[0].litLength == 0)) {
|
||||
U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1);
|
||||
RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header");
|
||||
MEM_writeLE32(op, cBlockHeader24);
|
||||
@ -7262,23 +7207,21 @@ ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx,
|
||||
cSize += ZSTD_blockHeaderSize;
|
||||
}
|
||||
|
||||
while (remaining) {
|
||||
while (nbSequences) {
|
||||
size_t compressedSeqsSize, cBlockSize, litConsumed;
|
||||
size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters,
|
||||
cctx->blockSize, remaining,
|
||||
inSeqs, nbSequences, seqPos);
|
||||
U32 const lastBlock = (blockSize == remaining);
|
||||
FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size");
|
||||
assert(blockSize <= remaining);
|
||||
size_t nbBlockSeqs = getNbSequencesFor1Block(inSeqs, nbSequences);
|
||||
U32 const lastBlock = (nbBlockSeqs == nbSequences);
|
||||
FORWARD_IF_ERROR(nbBlockSeqs, "Error while trying to determine nb of sequences for a block");
|
||||
assert(nbBlockSeqs <= nbSequences);
|
||||
ZSTD_resetSeqStore(&cctx->seqStore);
|
||||
|
||||
litConsumed = convertBlockSequences(cctx,
|
||||
&seqPos,
|
||||
inSeqs, nbSequences,
|
||||
blockSize,
|
||||
litConsumed = ZSTD_convertBlockSequences(cctx,
|
||||
inSeqs, nbBlockSeqs,
|
||||
repcodeResolution);
|
||||
FORWARD_IF_ERROR(litConsumed, "Bad sequence conversion");
|
||||
RETURN_ERROR_IF(litConsumed > litSize, externalSequences_invalid, "discrepancy between literals buffer and Sequences");
|
||||
inSeqs += nbBlockSeqs;
|
||||
nbSequences -= nbBlockSeqs;
|
||||
|
||||
/* Note: when blockSize is very small, other variant send it uncompressed.
|
||||
* Here, we still send the sequences, because we don't have the original source to send it uncompressed.
|
||||
@ -7286,16 +7229,18 @@ ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx,
|
||||
* but that's complex and costly memory intensive, which goes against the objectives of this variant. */
|
||||
|
||||
RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "not enough dstCapacity to write a new compressed block");
|
||||
compressedSeqsSize = ZSTD_entropyCompressSeqStore_wExtLitBuffer(
|
||||
|
||||
compressedSeqsSize = ZSTD_entropyCompressSeqStore_internal(
|
||||
op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,
|
||||
literals, litConsumed,
|
||||
blockSize,
|
||||
&cctx->seqStore,
|
||||
&cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
|
||||
&cctx->appliedParams,
|
||||
cctx->tmpWorkspace, cctx->tmpWkspSize /* statically allocated in resetCCtx */,
|
||||
cctx->bmi2);
|
||||
FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");
|
||||
/* note: the spec forbids for any compressed block to be larger than maximum block size */
|
||||
if (compressedSeqsSize > cctx->blockSizeMax) compressedSeqsSize = 0;
|
||||
DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize);
|
||||
litSize -= litConsumed;
|
||||
literals = (const char*)literals + litConsumed;
|
||||
@ -7331,7 +7276,6 @@ ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx,
|
||||
break;
|
||||
} else {
|
||||
op += cBlockSize;
|
||||
remaining -= blockSize;
|
||||
dstCapacity -= cBlockSize;
|
||||
cctx->isFirstBlock = 0;
|
||||
}
|
||||
@ -7347,7 +7291,7 @@ size_t
|
||||
ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
|
||||
const void* literals, size_t litSize, size_t srcSize)
|
||||
const void* literals, size_t litSize)
|
||||
{
|
||||
BYTE* op = (BYTE*)dst;
|
||||
size_t cSize = 0;
|
||||
@ -7355,15 +7299,18 @@ ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx,
|
||||
/* Transparent initialization stage, same as compressStream2() */
|
||||
DEBUGLOG(4, "ZSTD_compressSequencesAndLiterals (dstCapacity=%zu)", dstCapacity);
|
||||
assert(cctx != NULL);
|
||||
FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed");
|
||||
FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_continue, ZSTD_CONTENTSIZE_UNKNOWN), "CCtx initialization failed");
|
||||
|
||||
if (cctx->appliedParams.blockDelimiters == ZSTD_sf_noBlockDelimiters) {
|
||||
RETURN_ERROR(frameParameter_unsupported, "This mode is only compatible with explicit delimiters");
|
||||
}
|
||||
if (cctx->appliedParams.fParams.checksumFlag) {
|
||||
RETURN_ERROR(frameParameter_unsupported, "this mode is incompatible with frame checksum");
|
||||
RETURN_ERROR(frameParameter_unsupported, "this mode is not compatible with frame checksum");
|
||||
}
|
||||
|
||||
/* Begin writing output, starting with frame header */
|
||||
{ size_t const frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity,
|
||||
&cctx->appliedParams, srcSize, cctx->dictID);
|
||||
&cctx->appliedParams, ZSTD_CONTENTSIZE_UNKNOWN, cctx->dictID);
|
||||
op += frameHeaderSize;
|
||||
assert(frameHeaderSize <= dstCapacity);
|
||||
dstCapacity -= frameHeaderSize;
|
||||
@ -7374,7 +7321,7 @@ ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx,
|
||||
{ size_t const cBlocksSize = ZSTD_compressSequencesAndLiterals_internal(cctx,
|
||||
op, dstCapacity,
|
||||
inSeqs, inSeqsSize,
|
||||
literals, litSize, srcSize);
|
||||
literals, litSize);
|
||||
FORWARD_IF_ERROR(cBlocksSize, "Compressing blocks failed!");
|
||||
cSize += cBlocksSize;
|
||||
assert(cBlocksSize <= dstCapacity);
|
||||
|
@ -484,7 +484,7 @@ struct ZSTD_CCtx_s {
|
||||
size_t dictContentSize;
|
||||
|
||||
ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
|
||||
size_t blockSize;
|
||||
size_t blockSizeMax;
|
||||
unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
|
||||
unsigned long long consumedSrcSize;
|
||||
unsigned long long producedCSize;
|
||||
@ -1528,15 +1528,11 @@ typedef struct {
|
||||
size_t posInSrc; /* Number of bytes given by sequences provided so far */
|
||||
} ZSTD_SequencePosition;
|
||||
|
||||
size_t
|
||||
ZSTD_convertBlockSequences_wBlockDelim(ZSTD_CCtx* cctx,
|
||||
ZSTD_SequencePosition* seqPos,
|
||||
/* for benchmark */
|
||||
size_t ZSTD_convertBlockSequences(ZSTD_CCtx* cctx,
|
||||
const ZSTD_Sequence* const inSeqs, size_t nbSequences,
|
||||
size_t blockSize,
|
||||
int const repcodeResolution);
|
||||
|
||||
/* for tests only */
|
||||
void CCTX_resetSeqStore(ZSTD_CCtx* cctx);
|
||||
|
||||
/* ==============================================================
|
||||
* Private declarations
|
||||
|
@ -1675,6 +1675,7 @@ ZSTD_compressSequences(ZSTD_CCtx* cctx,
|
||||
* but it also features the following limitations:
|
||||
* - Only supports explicit delimiter mode
|
||||
* - Not compatible with frame checksum, which must disabled
|
||||
* - Does not write the content size in frame header
|
||||
* - If any block is incompressible, will fail and return an error
|
||||
* - @litSize must be == sum of all @.litLength fields in @inSeqs. Any discrepancy will generate an error.
|
||||
* - the buffer @literals must be larger than @litSize by at least 8 bytes.
|
||||
@ -1684,7 +1685,7 @@ ZSTDLIB_STATIC_API size_t
|
||||
ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const ZSTD_Sequence* inSeqs, size_t nbSequences,
|
||||
const void* literals, size_t litSize, size_t srcSize);
|
||||
const void* literals, size_t litSize);
|
||||
|
||||
|
||||
/*! ZSTD_writeSkippableFrame() :
|
||||
|
@ -26,6 +26,9 @@ export ZSTD_LEGACY_SUPPORT
|
||||
DEBUGLEVEL ?= 2
|
||||
export DEBUGLEVEL # transmit value to sub-makefiles
|
||||
|
||||
.PHONY: default
|
||||
default: fullbench
|
||||
|
||||
LIBZSTD_MK_DIR := ../lib
|
||||
include $(LIBZSTD_MK_DIR)/libzstd.mk
|
||||
|
||||
@ -78,9 +81,6 @@ FUZZERTEST ?= -T200s
|
||||
ZSTDRTTEST = --test-large-data
|
||||
DECODECORPUS_TESTTIME ?= -T30
|
||||
|
||||
.PHONY: default
|
||||
default: fullbench
|
||||
|
||||
.PHONY: all
|
||||
all: fullbench fuzzer zstreamtest paramgrill datagen decodecorpus roundTripCrash poolTests
|
||||
|
||||
|
@ -623,9 +623,9 @@ local_compressSequencesAndLiterals(const void* input, size_t inputSize,
|
||||
ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_repcodeResolution, ZSTD_ps_enable);
|
||||
#endif
|
||||
assert(12 + nbSeqs * sizeof(ZSTD_Sequence) + nbLiterals == inputSize); (void)inputSize;
|
||||
(void)payload;
|
||||
(void)payload; (void)srcSize;
|
||||
|
||||
return ZSTD_compressSequencesAndLiterals(g_zcc, dst, dstCapacity, seqs, nbSeqs, literals, nbLiterals, srcSize);
|
||||
return ZSTD_compressSequencesAndLiterals(g_zcc, dst, dstCapacity, seqs, nbSeqs, literals, nbLiterals);
|
||||
}
|
||||
|
||||
static PrepResult prepConvertSequences(const void* src, size_t srcSize, int cLevel)
|
||||
@ -669,22 +669,21 @@ local_convertSequences(const void* input, size_t inputSize,
|
||||
void* dst, size_t dstCapacity,
|
||||
void* payload)
|
||||
{
|
||||
ZSTD_SequencePosition seqPos = { 0, 0 , 0 };
|
||||
const char* ip = input;
|
||||
size_t const blockSize = MEM_read32(ip);
|
||||
size_t const nbSeqs = MEM_read32(ip+=4);
|
||||
const ZSTD_Sequence* seqs = (const ZSTD_Sequence*)(const void*)(ip+=4);
|
||||
ZSTD_CCtx_reset(g_zcc, ZSTD_reset_session_and_parameters);
|
||||
CCTX_resetSeqStore(g_zcc);
|
||||
ZSTD_resetSeqStore(&g_zcc->seqStore);
|
||||
ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_blockDelimiters, ZSTD_sf_explicitBlockDelimiters);
|
||||
# if 0 /* for tests */
|
||||
ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_repcodeResolution, ZSTD_ps_enable);
|
||||
#endif
|
||||
assert(8 + nbSeqs * sizeof(ZSTD_Sequence) == inputSize); (void)inputSize;
|
||||
(void)dst; (void)dstCapacity;
|
||||
(void)payload;
|
||||
(void)payload; (void)blockSize;
|
||||
|
||||
return ZSTD_convertBlockSequences_wBlockDelim(g_zcc, &seqPos, seqs, nbSeqs, blockSize, 0);
|
||||
return ZSTD_convertBlockSequences(g_zcc, seqs, nbSeqs, 0);
|
||||
}
|
||||
|
||||
|
||||
|
@ -3909,35 +3909,21 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
||||
FUZ_transferLiterals(litBuffer, decompressSize, CNBuffer, srcSize, seqs, nbSeqs);
|
||||
|
||||
/* not enough literals: must fail */
|
||||
compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize-1, srcSize);
|
||||
compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize-1);
|
||||
if (!ZSTD_isError(compressedSize)) {
|
||||
DISPLAY("ZSTD_compressSequencesAndLiterals() should have failed: not enough literals provided\n");
|
||||
goto _output_error;
|
||||
}
|
||||
|
||||
/* too many literals: must fail */
|
||||
compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize+1, srcSize);
|
||||
compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize+1);
|
||||
if (!ZSTD_isError(compressedSize)) {
|
||||
DISPLAY("ZSTD_compressSequencesAndLiterals() should have failed: too many literals provided\n");
|
||||
goto _output_error;
|
||||
}
|
||||
|
||||
/* too short srcSize: must fail */
|
||||
compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize, srcSize-1);
|
||||
if (!ZSTD_isError(compressedSize)) {
|
||||
DISPLAY("ZSTD_compressSequencesAndLiterals() should have failed: srcSize is too short\n");
|
||||
goto _output_error;
|
||||
}
|
||||
|
||||
/* too large srcSize: must fail */
|
||||
compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize, srcSize+1);
|
||||
if (!ZSTD_isError(compressedSize)) {
|
||||
DISPLAY("ZSTD_compressSequencesAndLiterals() should have failed: srcSize is too short\n");
|
||||
goto _output_error;
|
||||
}
|
||||
|
||||
/* correct amount of literals: should compress successfully */
|
||||
compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, litBuffer, litSize, srcSize);
|
||||
compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, litBuffer, litSize);
|
||||
if (ZSTD_isError(compressedSize)) {
|
||||
DISPLAY("Error in ZSTD_compressSequencesAndLiterals()\n");
|
||||
goto _output_error;
|
||||
|
Loading…
x
Reference in New Issue
Block a user