From 37706a677c09a1051f8b02361928c475bd094e67 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 25 Oct 2024 20:29:15 -0700 Subject: [PATCH] added a test test both that the new parameter works as intended, and that the over-split protection works as intended --- lib/compress/zstd_compress.c | 7 ++-- lib/compress/zstd_preSplit.c | 1 + tests/fuzzer.c | 63 ++++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index fbc11349e..ca9e5edff 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2114,7 +2114,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, { ZSTD_cwksp* const ws = &zc->workspace; DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d useBlockSplitter=%d", - (U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder, (int)params->useBlockSplitter); + (U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder, (int)params->postBlockSplitter); assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); zc->isFirstBlock = 1; @@ -4520,7 +4520,10 @@ static size_t ZSTD_optimalBlockSize(ZSTD_CCtx* cctx, const void* src, size_t src * require verified savings to allow pre-splitting. * Note: as a consequence, the first full block is not split. */ - if (savings < 3) return 128 KB; + if (savings < 3) { + DEBUGLOG(6, "don't attempt splitting: savings (%lli) too low", savings); + return 128 KB; + } /* apply @splitLevel, or use default value (which depends on @strat). * note that splitting heuristic is still conditioned by @savings >= 3, * so the first block will not reach this code path */ diff --git a/lib/compress/zstd_preSplit.c b/lib/compress/zstd_preSplit.c index d25773950..d820c20ac 100644 --- a/lib/compress/zstd_preSplit.c +++ b/lib/compress/zstd_preSplit.c @@ -229,6 +229,7 @@ size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize, int level, void* workspace, size_t wkspSize) { + DEBUGLOG(6, "ZSTD_splitBlock (level=%i)", level); assert(0<=level && level<=4); if (level == 0) return ZSTD_splitBlock_fromBorders(blockStart, blockSize, workspace, wkspSize); diff --git a/tests/fuzzer.c b/tests/fuzzer.c index f5a894354..e32c0130d 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -559,6 +559,67 @@ static void test_setCParams(unsigned tnb) DISPLAYLEVEL(3, "OK \n"); } +static void test_blockSplitter_incompressibleExpansionProtection(unsigned testNb, unsigned seed) +{ + DISPLAYLEVEL(3, "test%3i : Check block splitter doesn't oversplit incompressible data (seed %u): ", testNb, seed); + { ZSTD_CCtx* cctx = ZSTD_createCCtx(); + size_t const srcSize = 256 * 1024; /* needs to be at least 2 blocks */ + void* incompressible = malloc(srcSize); + size_t const dstCapacity = ZSTD_compressBound(srcSize); + void* cBuffer = malloc(dstCapacity); + size_t const chunkSize = 8 KB; + size_t const nbChunks = srcSize / chunkSize; + size_t chunkNb, cSizeNoSplit, cSizeWithSplit; + assert(cctx != NULL); + assert(incompressible != NULL); + assert(cBuffer != NULL); + + /* let's fill input with random noise (incompressible) */ + RDG_genBuffer(incompressible, srcSize, 0.0, 0.0, seed); + DISPLAYLEVEL(4, "(hash: %llx) ", XXH64(incompressible, srcSize, 0)); + + /* this pattern targets the fastest _byChunk variant's sampling (level 3). + * manually checked that, without the @savings protection, it would over-split. + */ + for (chunkNb=0; chunkNb