diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html index 7cfeda22d..2c4c4b0e8 100644 --- a/doc/zstd_manual.html +++ b/doc/zstd_manual.html @@ -1084,7 +1084,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); * * Note: This field is optional. ZSTD_generateSequences() will calculate the value of * 'rep', but repeat offsets do not necessarily need to be calculated from an external - * sequence provider's perspective. For example, ZSTD_compressSequences() does not + * sequence provider perspective. For example, ZSTD_compressSequences() does not * use this 'rep' field at all (as of now). */ } ZSTD_Sequence; @@ -1331,7 +1331,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr,
ZSTD_DEPRECATED("For debugging only, will be replaced by ZSTD_extractSequences()") ZSTDLIB_STATIC_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, - ZSTD_Sequence* outSeqs, size_t outSeqsSize, + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, const void* src, size_t srcSize);WARNING: This function is meant for debugging and informational purposes ONLY! Its implementation is flawed, and it will be deleted in a future version. @@ -1345,7 +1345,7 @@ ZSTD_generateSequences(ZSTD_CCtx* zc, @param zc The compression context to be used for ZSTD_compress2(). Set any compression parameters you need on this context. @param outSeqs The output sequences buffer of size @p outSeqsSize - @param outSeqsSize The size of the output sequences buffer. + @param outSeqsCapacity The size of the output sequences buffer. ZSTD_sequenceBound(srcSize) is an upper bound on the number of sequences that can be generated. @param src The source buffer to generate sequences from of size @p srcSize. @@ -1392,11 +1392,17 @@ ZSTD_compressSequences(ZSTD_CCtx* cctx, the block size derived from the cctx, and sequences may be split. This is the default setting. If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain - block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided. + valid block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided. - If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined - behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for - specifics regarding offset/matchlength requirements) then the function will bail out and return an error. + When ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, it's possible to decide generating repcodes + using the advanced parameter ZSTD_c_repcodeResolution. Repcodes will improve compression ratio, though the benefit + can vary greatly depending on Sequences. On the other hand, repcode resolution is an expensive operation. + By default, it's disabled at low (<10) compression levels, and enabled above the threshold (>=10). + ZSTD_c_repcodeResolution makes it possible to directly manage this processing in either direction. + + If ZSTD_c_validateSequences == 0, this function blindly accepts the Sequences provided. Invalid Sequences cause undefined + behavior. If ZSTD_c_validateSequences == 1, then the function will detect invalid Sequences (see doc/zstd_compression_format.md for + specifics regarding offset/matchlength requirements) and then bail out and return an error. In addition to the two adjustable experimental params, there are other important cctx params. - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN. @@ -1414,19 +1420,21 @@ ZSTD_compressSequences(ZSTD_CCtx* cctx,
ZSTDLIB_STATIC_API size_t ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, - const ZSTD_Sequence* inSeqs, size_t inSeqsSize, - const void* literals, size_t litSize, size_t srcSize); + const ZSTD_Sequence* inSeqs, size_t nbSequences, + const void* literals, size_t litSize);This is a variant of ZSTD_compressSequences() which, instead of receiving (src,srcSize) as input parameter, receives (literals,litSize), - aka all literals already extracted and laid out into a single continuous buffer. + aka all the literals, already extracted and laid out into a single continuous buffer. This can be useful if the process generating the sequences also happens to generate the buffer of literals, thus skipping an extraction + caching stage. - It's essentially a speed optimization when the right conditions are met, - but it also is restricted by the following limitations: + It's a speed optimization, useful when the right conditions are met, + but it also features the following limitations: - Only supports explicit delimiter mode - Not compatible with frame checksum, which must disabled - - Can fail when unable to compress sufficiently - Also, to be valid, @litSize must be equal to the sum of all @.litLength fields in @inSeqs. + - Does not write the content size in frame header + - If any block is incompressible, will fail and return an error + - @litSize must be == sum of all @.litLength fields in @inSeqs. Any discrepancy will generate an error. + - the buffer @literals must be larger than @litSize by at least 8 bytes. @return : final compressed size, or a ZSTD error code.
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index c4f699eb0..70a9731bf 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2207,7 +2207,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, zc->appliedParams.fParams.contentSizeFlag = 0; DEBUGLOG(4, "pledged content size : %u ; flag : %u", (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); - zc->blockSize = blockSize; + zc->blockSizeMax = blockSize; XXH64_reset(&zc->xxhState, 0); zc->stage = ZSTDcs_init; @@ -4293,8 +4293,8 @@ ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, lastBlock, 0 /* isPartition */); FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!"); DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits"); - assert(zc->blockSize <= ZSTD_BLOCKSIZE_MAX); - assert(cSizeSingleBlock <= zc->blockSize + ZSTD_blockHeaderSize); + assert(zc->blockSizeMax <= ZSTD_BLOCKSIZE_MAX); + assert(cSizeSingleBlock <= zc->blockSizeMax + ZSTD_blockHeaderSize); return cSizeSingleBlock; } @@ -4328,7 +4328,7 @@ ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, dstCapacity -= cSizeChunk; cSize += cSizeChunk; *currSeqStore = *nextSeqStore; - assert(cSizeChunk <= zc->blockSize + ZSTD_blockHeaderSize); + assert(cSizeChunk <= zc->blockSizeMax + ZSTD_blockHeaderSize); } /* cRep and dRep may have diverged during the compression. * If so, we use the dRep repcodes for the next block. @@ -4580,7 +4580,7 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx, const void* src, size_t srcSize, U32 lastFrameChunk) { - size_t blockSizeMax = cctx->blockSize; + size_t blockSizeMax = cctx->blockSizeMax; size_t remaining = srcSize; const BYTE* ip = (const BYTE*)src; BYTE* const ostart = (BYTE*)dst; @@ -4816,7 +4816,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, src, (BYTE const*)src + srcSize); } - DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); + DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSizeMax); { size_t const cSize = frame ? ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */); @@ -6070,11 +6070,11 @@ size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx) { if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { - return cctx->blockSize - cctx->stableIn_notConsumed; + return cctx->blockSizeMax - cctx->stableIn_notConsumed; } assert(cctx->appliedParams.inBufferMode == ZSTD_bm_buffered); { size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos; - if (hintInSize==0) hintInSize = cctx->blockSize; + if (hintInSize==0) hintInSize = cctx->blockSizeMax; return hintInSize; } } @@ -6162,7 +6162,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, } else { assert(zcs->appliedParams.inBufferMode == ZSTD_bm_stable); if ( (flushMode == ZSTD_e_continue) - && ( (size_t)(iend - ip) < zcs->blockSize) ) { + && ( (size_t)(iend - ip) < zcs->blockSizeMax) ) { /* can't compress a full block : stop here */ zcs->stableIn_notConsumed = (size_t)(iend - ip); ip = iend; /* pretend to have consumed input */ @@ -6181,7 +6181,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, size_t cSize; size_t oSize = (size_t)(oend-op); size_t const iSize = inputBuffered ? zcs->inBuffPos - zcs->inToCompress - : MIN((size_t)(iend - ip), zcs->blockSize); + : MIN((size_t)(iend - ip), zcs->blockSizeMax); if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) cDst = op; /* compress into output buffer, to skip flush stage */ else @@ -6196,9 +6196,9 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); zcs->frameEnded = lastBlock; /* prepare next block */ - zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; + zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSizeMax; if (zcs->inBuffTarget > zcs->inBuffSize) - zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; + zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSizeMax; DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u", (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize); if (!lastBlock) @@ -6413,7 +6413,7 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, /* for small input: avoid automatic flush on reaching end of block, since * it would require to add a 3-bytes null block to end frame */ - cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize); + cctx->inBuffTarget = cctx->blockSizeMax + (cctx->blockSizeMax == pledgedSrcSize); } else { cctx->inBuffTarget = 0; } @@ -6951,7 +6951,7 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, size_t compressedSeqsSize; size_t cBlockSize; size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters, - cctx->blockSize, remaining, + cctx->blockSizeMax, remaining, inSeqs, inSeqsSize, seqPos); U32 const lastBlock = (blockSize == remaining); FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size"); @@ -7093,167 +7093,112 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* cctx, } /* - * seqPos must end on an explicit block delimiter - * @blockSize must be exactly correct. + * Note: Sequence validation functionality has been disabled (removed). + * This is helpful to find back simplicity, leading to performance. + * It may be re-inserted later. */ -FORCE_INLINE_TEMPLATE size_t -ZSTD_convertBlockSequences_wBlockDelim_internal(ZSTD_CCtx* cctx, - ZSTD_SequencePosition* seqPos, +size_t ZSTD_convertBlockSequences(ZSTD_CCtx* cctx, const ZSTD_Sequence* const inSeqs, size_t nbSequences, - size_t blockSize, - int const repcodeResolution, - int const checkSequences) + int const repcodeResolution) { - U32 idx = seqPos->idx; - U32 const startIdx = idx; Repcodes_t updatedRepcodes; - U32 dictSize = 0; - size_t startPosInSrc = seqPos->posInSrc; size_t litConsumed = 0; + size_t seqNb = 0; - DEBUGLOG(5, "ZSTD_transferSequencesOnly_wBlockDelim (blockSize = %zu)", blockSize); + DEBUGLOG(5, "ZSTD_convertBlockSequences (nbSequences = %zu)", nbSequences); - /* dictSize is useful to check offset within Sequence validation */ - if (checkSequences) { - if (cctx->cdict) { - dictSize = (U32)cctx->cdict->dictContentSize; - } else if (cctx->prefixDict.dict) { - dictSize = (U32)cctx->prefixDict.dictSize; - } - } + RETURN_ERROR_IF(nbSequences >= cctx->seqStore.maxNbSeq, externalSequences_invalid, + "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(Repcodes_t)); + /* check end condition */ + assert(nbSequences >= 1); + assert(inSeqs[nbSequences-1].matchLength == 0); + assert(inSeqs[nbSequences-1].offset == 0); + /* Convert Sequences from public format to internal format */ - for (; idx < nbSequences && (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0); ++idx) { - U32 const litLength = inSeqs[idx].litLength; - U32 const matchLength = inSeqs[idx].matchLength; + for (seqNb = 0; seqNb < nbSequences - 1 ; seqNb++) { + U32 const litLength = inSeqs[seqNb].litLength; + U32 const matchLength = inSeqs[seqNb].matchLength; U32 offBase; if (!repcodeResolution) { - offBase = OFFSET_TO_OFFBASE(inSeqs[idx].offset); + offBase = OFFSET_TO_OFFBASE(inSeqs[seqNb].offset); } else { U32 const ll0 = (litLength == 0); - offBase = ZSTD_finalizeOffBase(inSeqs[idx].offset, updatedRepcodes.rep, ll0); + offBase = ZSTD_finalizeOffBase(inSeqs[seqNb].offset, updatedRepcodes.rep, ll0); ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0); } DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength); - if (checkSequences) { - seqPos->posInSrc += litLength + matchLength; - FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, - seqPos->posInSrc, - cctx->appliedParams.cParams.windowLog, dictSize, - ZSTD_hasExtSeqProd(&cctx->appliedParams)), - "Sequence validation failed"); - } - RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid, - "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); ZSTD_storeSeqOnly(&cctx->seqStore, litLength, offBase, matchLength); litConsumed += litLength; } /* last sequence (only literals) */ - { size_t const lastLitLength = inSeqs[idx].litLength; - litConsumed += lastLitLength; - if (checkSequences) { - seqPos->posInSrc += lastLitLength; - /* blockSize must be exactly correct (checked before calling this function) */ - RETURN_ERROR_IF((seqPos->posInSrc - startPosInSrc) != blockSize, externalSequences_invalid, "mismatch between Sequences and specified blockSize"); - } else { - /* blockSize presumed exactly correct (checked before calling this function) */ - seqPos->posInSrc += blockSize; - } - } + litConsumed += inSeqs[nbSequences-1].litLength; /* If we skipped repcode search while parsing, we need to update repcodes now */ - assert(idx >= startIdx); - if (!repcodeResolution && idx != startIdx) { + if (!repcodeResolution && nbSequences > 1) { U32* const rep = updatedRepcodes.rep; - U32 lastSeqIdx = idx - 1; /* index of last non-block-delimiter sequence */ - if (lastSeqIdx >= startIdx + 2) { + if (nbSequences >= 4) { + U32 lastSeqIdx = (U32)nbSequences - 2; /* index of last full sequence */ rep[2] = inSeqs[lastSeqIdx - 2].offset; rep[1] = inSeqs[lastSeqIdx - 1].offset; rep[0] = inSeqs[lastSeqIdx].offset; - } else if (lastSeqIdx == startIdx + 1) { + } else if (nbSequences == 3) { rep[2] = rep[0]; - rep[1] = inSeqs[lastSeqIdx - 1].offset; - rep[0] = inSeqs[lastSeqIdx].offset; + rep[1] = inSeqs[0].offset; + rep[0] = inSeqs[1].offset; } else { - assert(lastSeqIdx == startIdx); + assert(nbSequences == 2); rep[2] = rep[1]; rep[1] = rep[0]; - rep[0] = inSeqs[lastSeqIdx].offset; + rep[0] = inSeqs[0].offset; } } ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(Repcodes_t)); - seqPos->idx = idx+1; return litConsumed; } -/* for tests only */ -void CCTX_resetSeqStore(ZSTD_CCtx* cctx) -{ - cctx->seqStore.sequences = cctx->seqStore.sequencesStart; - cctx->seqStore.lit = cctx->seqStore.litStart; -} - typedef size_t (*ZSTD_convertBlockSequences_f) (ZSTD_CCtx* cctx, - ZSTD_SequencePosition* seqPos, const ZSTD_Sequence* const inSeqs, size_t nbSequences, - size_t blockSize, int const repcodeResolution); -size_t -ZSTD_convertBlockSequences_wBlockDelim(ZSTD_CCtx* cctx, - ZSTD_SequencePosition* seqPos, - const ZSTD_Sequence* const inSeqs, size_t nbSequences, - size_t blockSize, - int const repcodeResolution) +static size_t getNbSequencesFor1Block(const ZSTD_Sequence* seqs, size_t nbSeqs) { - return ZSTD_convertBlockSequences_wBlockDelim_internal(cctx, - seqPos, inSeqs, nbSequences, blockSize, - repcodeResolution, 0); + size_t n; + assert(seqs); + for (n=0; nappliedParams.searchForExternalRepcodes == ZSTD_ps_enable); - const ZSTD_convertBlockSequences_f convertBlockSequences = cctx->appliedParams.validateSequences ? - ZSTD_convertBlockSequences_wBlockDelim_andCheckSequences - : ZSTD_convertBlockSequences_wBlockDelim; - - DEBUGLOG(4, "ZSTD_compressSequencesAndLiterals_internal: nbSeqs=%zu, litSize=%zu", nbSequences, litSize); - if (cctx->appliedParams.blockDelimiters == ZSTD_sf_noBlockDelimiters) { - RETURN_ERROR(GENERIC, "This mode is only compatible with explicit delimiters"); - } assert(cctx->appliedParams.searchForExternalRepcodes != ZSTD_ps_auto); + DEBUGLOG(4, "ZSTD_compressSequencesAndLiterals_internal: nbSeqs=%zu, litSize=%zu", nbSequences, litSize); + RETURN_ERROR_IF(nbSequences == 0, externalSequences_invalid, "Requires at least 1 end-of-block"); + /* Special case: empty frame */ - if (remaining == 0) { + if ((nbSequences == 1) && (inSeqs[0].litLength == 0)) { U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1); RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header"); MEM_writeLE32(op, cBlockHeader24); @@ -7262,23 +7207,21 @@ ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx, cSize += ZSTD_blockHeaderSize; } - while (remaining) { + while (nbSequences) { size_t compressedSeqsSize, cBlockSize, litConsumed; - size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters, - cctx->blockSize, remaining, - inSeqs, nbSequences, seqPos); - U32 const lastBlock = (blockSize == remaining); - FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size"); - assert(blockSize <= remaining); + size_t nbBlockSeqs = getNbSequencesFor1Block(inSeqs, nbSequences); + U32 const lastBlock = (nbBlockSeqs == nbSequences); + FORWARD_IF_ERROR(nbBlockSeqs, "Error while trying to determine nb of sequences for a block"); + assert(nbBlockSeqs <= nbSequences); ZSTD_resetSeqStore(&cctx->seqStore); - litConsumed = convertBlockSequences(cctx, - &seqPos, - inSeqs, nbSequences, - blockSize, + litConsumed = ZSTD_convertBlockSequences(cctx, + inSeqs, nbBlockSeqs, repcodeResolution); FORWARD_IF_ERROR(litConsumed, "Bad sequence conversion"); RETURN_ERROR_IF(litConsumed > litSize, externalSequences_invalid, "discrepancy between literals buffer and Sequences"); + inSeqs += nbBlockSeqs; + nbSequences -= nbBlockSeqs; /* Note: when blockSize is very small, other variant send it uncompressed. * Here, we still send the sequences, because we don't have the original source to send it uncompressed. @@ -7286,16 +7229,18 @@ ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx, * but that's complex and costly memory intensive, which goes against the objectives of this variant. */ RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "not enough dstCapacity to write a new compressed block"); - compressedSeqsSize = ZSTD_entropyCompressSeqStore_wExtLitBuffer( + + compressedSeqsSize = ZSTD_entropyCompressSeqStore_internal( op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize, literals, litConsumed, - blockSize, &cctx->seqStore, &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy, &cctx->appliedParams, cctx->tmpWorkspace, cctx->tmpWkspSize /* statically allocated in resetCCtx */, cctx->bmi2); FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed"); + /* note: the spec forbids for any compressed block to be larger than maximum block size */ + if (compressedSeqsSize > cctx->blockSizeMax) compressedSeqsSize = 0; DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize); litSize -= litConsumed; literals = (const char*)literals + litConsumed; @@ -7331,7 +7276,6 @@ ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx, break; } else { op += cBlockSize; - remaining -= blockSize; dstCapacity -= cBlockSize; cctx->isFirstBlock = 0; } @@ -7347,7 +7291,7 @@ size_t ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const ZSTD_Sequence* inSeqs, size_t inSeqsSize, - const void* literals, size_t litSize, size_t srcSize) + const void* literals, size_t litSize) { BYTE* op = (BYTE*)dst; size_t cSize = 0; @@ -7355,15 +7299,18 @@ ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx, /* Transparent initialization stage, same as compressStream2() */ DEBUGLOG(4, "ZSTD_compressSequencesAndLiterals (dstCapacity=%zu)", dstCapacity); assert(cctx != NULL); - FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed"); + FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_continue, ZSTD_CONTENTSIZE_UNKNOWN), "CCtx initialization failed"); + if (cctx->appliedParams.blockDelimiters == ZSTD_sf_noBlockDelimiters) { + RETURN_ERROR(frameParameter_unsupported, "This mode is only compatible with explicit delimiters"); + } if (cctx->appliedParams.fParams.checksumFlag) { - RETURN_ERROR(frameParameter_unsupported, "this mode is incompatible with frame checksum"); + RETURN_ERROR(frameParameter_unsupported, "this mode is not compatible with frame checksum"); } /* Begin writing output, starting with frame header */ { size_t const frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, - &cctx->appliedParams, srcSize, cctx->dictID); + &cctx->appliedParams, ZSTD_CONTENTSIZE_UNKNOWN, cctx->dictID); op += frameHeaderSize; assert(frameHeaderSize <= dstCapacity); dstCapacity -= frameHeaderSize; @@ -7374,7 +7321,7 @@ ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx, { size_t const cBlocksSize = ZSTD_compressSequencesAndLiterals_internal(cctx, op, dstCapacity, inSeqs, inSeqsSize, - literals, litSize, srcSize); + literals, litSize); FORWARD_IF_ERROR(cBlocksSize, "Compressing blocks failed!"); cSize += cBlocksSize; assert(cBlocksSize <= dstCapacity); diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index bbcf4a7d5..2d4ecc89f 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -484,7 +484,7 @@ struct ZSTD_CCtx_s { size_t dictContentSize; ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */ - size_t blockSize; + size_t blockSizeMax; unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */ unsigned long long consumedSrcSize; unsigned long long producedCSize; @@ -1528,15 +1528,11 @@ typedef struct { size_t posInSrc; /* Number of bytes given by sequences provided so far */ } ZSTD_SequencePosition; -size_t -ZSTD_convertBlockSequences_wBlockDelim(ZSTD_CCtx* cctx, - ZSTD_SequencePosition* seqPos, +/* for benchmark */ +size_t ZSTD_convertBlockSequences(ZSTD_CCtx* cctx, const ZSTD_Sequence* const inSeqs, size_t nbSequences, - size_t blockSize, int const repcodeResolution); -/* for tests only */ -void CCTX_resetSeqStore(ZSTD_CCtx* cctx); /* ============================================================== * Private declarations diff --git a/lib/zstd.h b/lib/zstd.h index 68e78b3ca..3fb02619c 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1675,6 +1675,7 @@ ZSTD_compressSequences(ZSTD_CCtx* cctx, * but it also features the following limitations: * - Only supports explicit delimiter mode * - Not compatible with frame checksum, which must disabled + * - Does not write the content size in frame header * - If any block is incompressible, will fail and return an error * - @litSize must be == sum of all @.litLength fields in @inSeqs. Any discrepancy will generate an error. * - the buffer @literals must be larger than @litSize by at least 8 bytes. @@ -1684,7 +1685,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const ZSTD_Sequence* inSeqs, size_t nbSequences, - const void* literals, size_t litSize, size_t srcSize); + const void* literals, size_t litSize); /*! ZSTD_writeSkippableFrame() : diff --git a/tests/Makefile b/tests/Makefile index f24d5cb7b..982181de8 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -26,6 +26,9 @@ export ZSTD_LEGACY_SUPPORT DEBUGLEVEL ?= 2 export DEBUGLEVEL # transmit value to sub-makefiles +.PHONY: default +default: fullbench + LIBZSTD_MK_DIR := ../lib include $(LIBZSTD_MK_DIR)/libzstd.mk @@ -78,9 +81,6 @@ FUZZERTEST ?= -T200s ZSTDRTTEST = --test-large-data DECODECORPUS_TESTTIME ?= -T30 -.PHONY: default -default: fullbench - .PHONY: all all: fullbench fuzzer zstreamtest paramgrill datagen decodecorpus roundTripCrash poolTests diff --git a/tests/fullbench.c b/tests/fullbench.c index c42c923c1..3f5044c71 100644 --- a/tests/fullbench.c +++ b/tests/fullbench.c @@ -623,9 +623,9 @@ local_compressSequencesAndLiterals(const void* input, size_t inputSize, ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_repcodeResolution, ZSTD_ps_enable); #endif assert(12 + nbSeqs * sizeof(ZSTD_Sequence) + nbLiterals == inputSize); (void)inputSize; - (void)payload; + (void)payload; (void)srcSize; - return ZSTD_compressSequencesAndLiterals(g_zcc, dst, dstCapacity, seqs, nbSeqs, literals, nbLiterals, srcSize); + return ZSTD_compressSequencesAndLiterals(g_zcc, dst, dstCapacity, seqs, nbSeqs, literals, nbLiterals); } static PrepResult prepConvertSequences(const void* src, size_t srcSize, int cLevel) @@ -669,22 +669,21 @@ local_convertSequences(const void* input, size_t inputSize, void* dst, size_t dstCapacity, void* payload) { - ZSTD_SequencePosition seqPos = { 0, 0 , 0 }; const char* ip = input; size_t const blockSize = MEM_read32(ip); size_t const nbSeqs = MEM_read32(ip+=4); const ZSTD_Sequence* seqs = (const ZSTD_Sequence*)(const void*)(ip+=4); ZSTD_CCtx_reset(g_zcc, ZSTD_reset_session_and_parameters); - CCTX_resetSeqStore(g_zcc); + ZSTD_resetSeqStore(&g_zcc->seqStore); ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_blockDelimiters, ZSTD_sf_explicitBlockDelimiters); # if 0 /* for tests */ ZSTD_CCtx_setParameter(g_zcc, ZSTD_c_repcodeResolution, ZSTD_ps_enable); #endif assert(8 + nbSeqs * sizeof(ZSTD_Sequence) == inputSize); (void)inputSize; (void)dst; (void)dstCapacity; - (void)payload; + (void)payload; (void)blockSize; - return ZSTD_convertBlockSequences_wBlockDelim(g_zcc, &seqPos, seqs, nbSeqs, blockSize, 0); + return ZSTD_convertBlockSequences(g_zcc, seqs, nbSeqs, 0); } diff --git a/tests/fuzzer.c b/tests/fuzzer.c index b20031708..09572e909 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -3909,35 +3909,21 @@ static int basicUnitTests(U32 const seed, double compressibility) FUZ_transferLiterals(litBuffer, decompressSize, CNBuffer, srcSize, seqs, nbSeqs); /* not enough literals: must fail */ - compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize-1, srcSize); + compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize-1); if (!ZSTD_isError(compressedSize)) { DISPLAY("ZSTD_compressSequencesAndLiterals() should have failed: not enough literals provided\n"); goto _output_error; } /* too many literals: must fail */ - compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize+1, srcSize); + compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize+1); if (!ZSTD_isError(compressedSize)) { DISPLAY("ZSTD_compressSequencesAndLiterals() should have failed: too many literals provided\n"); goto _output_error; } - /* too short srcSize: must fail */ - compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize, srcSize-1); - if (!ZSTD_isError(compressedSize)) { - DISPLAY("ZSTD_compressSequencesAndLiterals() should have failed: srcSize is too short\n"); - goto _output_error; - } - - /* too large srcSize: must fail */ - compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, src, litSize, srcSize+1); - if (!ZSTD_isError(compressedSize)) { - DISPLAY("ZSTD_compressSequencesAndLiterals() should have failed: srcSize is too short\n"); - goto _output_error; - } - /* correct amount of literals: should compress successfully */ - compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, litBuffer, litSize, srcSize); + compressedSize = ZSTD_compressSequencesAndLiterals(cctx, dst, dstCapacity, seqs, nbSeqs, litBuffer, litSize); if (ZSTD_isError(compressedSize)) { DISPLAY("Error in ZSTD_compressSequencesAndLiterals()\n"); goto _output_error;