1
0
mirror of https://github.com/facebook/zstd.git synced 2025-03-06 00:43:59 +02:00

ZSTD_compressSequencesAndLiterals() now supports multi-blocks frames.

This commit is contained in:
Yann Collet 2024-12-16 18:05:40 -08:00
parent 5164d44dab
commit 31b5ef2539
3 changed files with 23 additions and 24 deletions

View File

@ -7104,7 +7104,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* cctx,
*/
FORCE_INLINE_TEMPLATE size_t
ZSTD_transferSequencesOnly_wBlockDelim_internal(ZSTD_CCtx* cctx,
ZSTD_SequencePosition* seqPos, size_t* litConsumedPtr,
ZSTD_SequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t nbSequences,
size_t blockSize,
int const repcodeResolution,
@ -7197,37 +7197,36 @@ ZSTD_transferSequencesOnly_wBlockDelim_internal(ZSTD_CCtx* cctx,
ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(Repcodes_t));
seqPos->idx = idx+1;
*litConsumedPtr = litConsumed;
return blockSize;
return litConsumed;
}
typedef size_t (*ZSTD_transferSequencesOnly_f) (ZSTD_CCtx* cctx,
ZSTD_SequencePosition* seqPos, size_t* litConsumedPtr,
ZSTD_SequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t nbSequences,
size_t blockSize,
int const repcodeResolution);
static size_t
ZSTD_transferSequencesOnly_wBlockDelim(ZSTD_CCtx* cctx,
ZSTD_SequencePosition* seqPos, size_t* litConsumedPtr,
ZSTD_SequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t nbSequences,
size_t blockSize,
int const repcodeResolution)
{
return ZSTD_transferSequencesOnly_wBlockDelim_internal(cctx,
seqPos, litConsumedPtr, inSeqs, nbSequences, blockSize,
seqPos, inSeqs, nbSequences, blockSize,
repcodeResolution, 0);
}
static size_t
ZSTD_transferSequencesOnly_wBlockDelim_andCheckSequences(ZSTD_CCtx* cctx,
ZSTD_SequencePosition* seqPos, size_t* litConsumedPtr,
ZSTD_SequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t nbSequences,
size_t blockSize,
int const repcodeResolution)
{
return ZSTD_transferSequencesOnly_wBlockDelim_internal(cctx,
seqPos, litConsumedPtr, inSeqs, nbSequences, blockSize,
seqPos, inSeqs, nbSequences, blockSize,
repcodeResolution, 1);
}
@ -7269,28 +7268,26 @@ ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx,
inSeqs, nbSequences, seqPos);
U32 const lastBlock = (blockSize == remaining);
FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size");
RETURN_ERROR_IF(!lastBlock, srcSize_wrong, "Only supports single block");
assert(blockSize <= remaining);
ZSTD_resetSeqStore(&cctx->seqStore);
blockSize = transfer(cctx,
&seqPos, &litConsumed,
litConsumed = transfer(cctx,
&seqPos,
inSeqs, nbSequences,
blockSize,
repcodeResolution);
RETURN_ERROR_IF(blockSize != remaining, externalSequences_invalid, "Must consume the entire block");
RETURN_ERROR_IF(litConsumed != litSize, externalSequences_invalid, "Must consume the exact amount of literals provided");
FORWARD_IF_ERROR(blockSize, "Bad sequence copy");
FORWARD_IF_ERROR(litConsumed, "Bad sequence conversion");
RETURN_ERROR_IF(litConsumed > litSize, externalSequences_invalid, "discrepancy between literals buffer and Sequences");
/* Note: when blockSize is very small, other variant send it uncompressed.
* Here, we still send the sequences, because we don't have the source to send it uncompressed.
* In theory, it would be possible to reproduce the source from the sequences,
* but that's pretty complex and memory intensive, which goes against the principles of this variant. */
* Here, we still send the sequences, because we don't have the original source to send it uncompressed.
* One could imagine it possible to reproduce the source from the sequences,
* but that's complex and costly memory intensive, which goes against the objectives of this variant. */
RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "not enough dstCapacity to write a new compressed block");
compressedSeqsSize = ZSTD_entropyCompressSeqStore_wExtLitBuffer(
op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,
literals, litSize,
literals, litConsumed,
blockSize,
&cctx->seqStore,
&cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
@ -7299,17 +7296,19 @@ ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx,
cctx->bmi2);
FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");
DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize);
litSize -= litConsumed;
literals = (const char*)literals + litConsumed;
/* Note: difficult to check source for RLE block when only Literals are provided,
* but it could be considered from analyzing the sequence directly */
if (compressedSeqsSize == 0) {
/* Sending uncompressed blocks is difficult, because the source is not provided.
/* Sending uncompressed blocks is out of reach, because the source is not provided.
* In theory, one could use the sequences to regenerate the source, like a decompressor,
* but it's complex, and memory hungry, killing the purpose of this variant.
* Current outcome: generate an error code.
*/
RETURN_ERROR(dstSize_tooSmall, "Data is not compressible"); /* note: error code might be misleading */
RETURN_ERROR(dstSize_tooSmall, "Data is not compressible"); /* note: error code could be clearer */
} else {
U32 cBlockHeader;
assert(compressedSeqsSize > 1); /* no RLE */
@ -7338,6 +7337,7 @@ ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx,
DEBUGLOG(5, "cSize running total: %zu (remaining dstCapacity=%zu)", cSize, dstCapacity);
}
RETURN_ERROR_IF(litSize != 0, externalSequences_invalid, "literals must be entirely and exactly consumed");
DEBUGLOG(4, "cSize final total: %zu", cSize);
return cSize;
}

View File

@ -1674,10 +1674,9 @@ ZSTD_compressSequences(ZSTD_CCtx* cctx,
* It's a speed optimization, useful when the right conditions are met,
* but it also features the following limitations:
* - Only supports explicit delimiter mode
* - Supports 1 block only (max input 128 KB)
* - Not compatible with frame checksum, which must disabled
* - Can fail (return an error) when input data cannot be compress sufficiently
* - @litSize must be == sum of all @.litLength fields in @inSeqs. Discrepancy will generate an error.
* - If any block is incompressible, will fail and return an error
* - @litSize must be == sum of all @.litLength fields in @inSeqs. Any discrepancy will generate an error.
* - the buffer @literals must be larger than @litSize by at least 8 bytes.
* @return : final compressed size, or a ZSTD error code.
*/

View File

@ -3880,7 +3880,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
DISPLAYLEVEL(3, "test%3i : ZSTD_compressSequencesAndLiterals : ", testNb++);
{
const size_t srcSize = 100 KB;
const size_t srcSize = 500 KB;
const BYTE* const src = (BYTE*)CNBuffer;
BYTE* const dst = (BYTE*)compressedBuffer;
const size_t dstCapacity = ZSTD_compressBound(srcSize);