1
0
mirror of https://github.com/facebook/zstd.git synced 2025-03-06 16:56:49 +02:00

added a test

test both that the new parameter works as intended,
and that the over-split protection works as intended
This commit is contained in:
Yann Collet 2024-10-25 20:29:15 -07:00
parent 226ae73311
commit 37706a677c
3 changed files with 69 additions and 2 deletions

View File

@ -2114,7 +2114,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
{ {
ZSTD_cwksp* const ws = &zc->workspace; ZSTD_cwksp* const ws = &zc->workspace;
DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d useBlockSplitter=%d", DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d useBlockSplitter=%d",
(U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder, (int)params->useBlockSplitter); (U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder, (int)params->postBlockSplitter);
assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
zc->isFirstBlock = 1; zc->isFirstBlock = 1;
@ -4520,7 +4520,10 @@ static size_t ZSTD_optimalBlockSize(ZSTD_CCtx* cctx, const void* src, size_t src
* require verified savings to allow pre-splitting. * require verified savings to allow pre-splitting.
* Note: as a consequence, the first full block is not split. * Note: as a consequence, the first full block is not split.
*/ */
if (savings < 3) return 128 KB; if (savings < 3) {
DEBUGLOG(6, "don't attempt splitting: savings (%lli) too low", savings);
return 128 KB;
}
/* apply @splitLevel, or use default value (which depends on @strat). /* apply @splitLevel, or use default value (which depends on @strat).
* note that splitting heuristic is still conditioned by @savings >= 3, * note that splitting heuristic is still conditioned by @savings >= 3,
* so the first block will not reach this code path */ * so the first block will not reach this code path */

View File

@ -229,6 +229,7 @@ size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
int level, int level,
void* workspace, size_t wkspSize) void* workspace, size_t wkspSize)
{ {
DEBUGLOG(6, "ZSTD_splitBlock (level=%i)", level);
assert(0<=level && level<=4); assert(0<=level && level<=4);
if (level == 0) if (level == 0)
return ZSTD_splitBlock_fromBorders(blockStart, blockSize, workspace, wkspSize); return ZSTD_splitBlock_fromBorders(blockStart, blockSize, workspace, wkspSize);

View File

@ -559,6 +559,67 @@ static void test_setCParams(unsigned tnb)
DISPLAYLEVEL(3, "OK \n"); DISPLAYLEVEL(3, "OK \n");
} }
static void test_blockSplitter_incompressibleExpansionProtection(unsigned testNb, unsigned seed)
{
DISPLAYLEVEL(3, "test%3i : Check block splitter doesn't oversplit incompressible data (seed %u): ", testNb, seed);
{ ZSTD_CCtx* cctx = ZSTD_createCCtx();
size_t const srcSize = 256 * 1024; /* needs to be at least 2 blocks */
void* incompressible = malloc(srcSize);
size_t const dstCapacity = ZSTD_compressBound(srcSize);
void* cBuffer = malloc(dstCapacity);
size_t const chunkSize = 8 KB;
size_t const nbChunks = srcSize / chunkSize;
size_t chunkNb, cSizeNoSplit, cSizeWithSplit;
assert(cctx != NULL);
assert(incompressible != NULL);
assert(cBuffer != NULL);
/* let's fill input with random noise (incompressible) */
RDG_genBuffer(incompressible, srcSize, 0.0, 0.0, seed);
DISPLAYLEVEL(4, "(hash: %llx) ", XXH64(incompressible, srcSize, 0));
/* this pattern targets the fastest _byChunk variant's sampling (level 3).
* manually checked that, without the @savings protection, it would over-split.
*/
for (chunkNb=0; chunkNb<nbChunks; chunkNb++) {
BYTE* const p = (BYTE*)incompressible + chunkNb * chunkSize;
size_t const samplingRate = 43;
int addOrRemove = chunkNb % 2;
size_t n;
for (n=0; n<chunkSize; n+=samplingRate) {
if (addOrRemove) {
p[n] &= 0x80;
} else {
p[n] |= 0x80;
}
}
}
/* run first without splitting */
ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockSplitter_level, 1 /* no split */);
cSizeNoSplit = ZSTD_compress2(cctx, cBuffer, dstCapacity, incompressible, srcSize);
/* run with sample43 splitter, check it's still the same */
ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockSplitter_level, 3 /* sample43, fastest _byChunk variant */);
cSizeWithSplit = ZSTD_compress2(cctx, cBuffer, dstCapacity, incompressible, srcSize);
if (cSizeWithSplit != cSizeNoSplit) {
DISPLAYLEVEL(1, "invalid compressed size: cSizeWithSplit %u != %u cSizeNoSplit \n",
(unsigned)cSizeWithSplit, (unsigned)cSizeNoSplit);
abort();
}
DISPLAYLEVEL(4, "compressed size: cSizeWithSplit %u == %u cSizeNoSplit : ",
(unsigned)cSizeWithSplit, (unsigned)cSizeNoSplit);
free(incompressible);
free(cBuffer);
ZSTD_freeCCtx(cctx);
}
DISPLAYLEVEL(3, "OK \n");
}
/* ============================================================= */
static int basicUnitTests(U32 const seed, double compressibility) static int basicUnitTests(U32 const seed, double compressibility)
{ {
size_t const CNBuffSize = 5 MB; size_t const CNBuffSize = 5 MB;
@ -1374,6 +1435,8 @@ static int basicUnitTests(U32 const seed, double compressibility)
} }
DISPLAYLEVEL(3, "OK \n"); DISPLAYLEVEL(3, "OK \n");
test_blockSplitter_incompressibleExpansionProtection(testNb++, seed);
DISPLAYLEVEL(3, "test%3d : superblock uncompressible data: too many nocompress superblocks : ", testNb++); DISPLAYLEVEL(3, "test%3d : superblock uncompressible data: too many nocompress superblocks : ", testNb++);
{ {
ZSTD_CCtx* const cctx = ZSTD_createCCtx(); ZSTD_CCtx* const cctx = ZSTD_createCCtx();