1
0
mirror of https://github.com/facebook/zstd.git synced 2025-03-07 01:10:04 +02:00

Merge pull request #2150 from terrelln/ldm-dict-reset

[ldm] Reset loadedDictEnd when the context is reset
This commit is contained in:
Nick Terrell 2020-05-18 18:33:01 -07:00 committed by GitHub
commit 9778f46014
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 86 additions and 10 deletions

View File

@ -1576,6 +1576,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
ZSTD_window_init(&zc->ldmState.window);
ZSTD_window_clear(&zc->ldmState.window);
zc->ldmState.loadedDictEnd = 0;
}
DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));

View File

@ -463,6 +463,8 @@ size_t ZSTD_ldm_generateSequences(
U32 const correction = ZSTD_window_correctOverflow(
&ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
/* invalidate dictionaries on overflow correction */
ldmState->loadedDictEnd = 0;
}
/* 2. We enforce the maximum offset allowed.
*
@ -471,6 +473,12 @@ size_t ZSTD_ldm_generateSequences(
* TODO: * Test the chunk size.
* * Try invalidation after the sequence generation and test the
* the offset against maxDist directly.
*
* NOTE: Because of dictionaries + sequence splitting we MUST make sure
* that any offset used is valid at the END of the sequence, since it may
* be split into two sequences. This condition holds when using
* ZSTD_window_enforceMaxDist(), but if we move to checking offsets
* against maxDist directly, we'll have to carefully handle that case.
*/
ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
/* 3. Generate the sequences for the chunk, and get newLeftoverSize. */

View File

@ -493,7 +493,6 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState,
ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
/* Reset the window */
ZSTD_window_init(&serialState->ldmState.window);
serialState->ldmWindow = serialState->ldmState.window;
/* Resize tables and output space if necessary. */
if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) {
ZSTD_free(serialState->ldmState.hashTable, cMem);
@ -508,12 +507,20 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState,
/* Zero the tables */
memset(serialState->ldmState.hashTable, 0, hashSize);
memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
/* Update window state and fill hash table with dict */
if (dictSize > 0) {
BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, &params.ldmParams);
serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
}
/* Initialize serialState's copy of ldmWindow. */
serialState->ldmWindow = serialState->ldmState.window;
}
/* Update window state and fill hash table with dict */
if (params.ldmParams.enableLdm && dict) {
ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, (const BYTE*)dict + dictSize, &params.ldmParams);
}
serialState->params = params;

View File

@ -631,17 +631,77 @@ static int basicUnitTests(U32 const seed, double compressibility)
DISPLAYLEVEL(3, "test%3i : testing dict compression with enableLdm and forceMaxWindow : ", testNb++);
{
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
void* dict = (void*)malloc(CNBuffSize);
int nbWorkers;
RDG_genBuffer(dict, CNBuffSize, 0.5, 0.5, seed);
RDG_genBuffer(CNBuffer, CNBuffSize, 0.6, 0.6, seed);
for (nbWorkers = 0; nbWorkers < 3; ++nbWorkers) {
RDG_genBuffer(dict, CNBuffSize, 0.5, 0.5, seed);
RDG_genBuffer(CNBuffer, CNBuffSize, 0.6, 0.6, seed);
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceMaxWindow, 1));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1));
assert(!ZSTD_isError(ZSTD_compress_usingDict(cctx, compressedBuffer, compressedBufferSize,
CNBuffer, CNBuffSize, dict, CNBuffSize, 3)));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, ZSTD_c_nbWorkers));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceMaxWindow, 1));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1));
CHECK_Z(ZSTD_CCtx_refPrefix(cctx, dict, CNBuffSize));
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
CHECK_Z(cSize);
CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dict, CNBuffSize));
}
ZSTD_freeCCtx(cctx);
ZSTD_freeDCtx(dctx);
free(dict);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : testing ldm dictionary gets invalidated : ", testNb++);
{
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
void* dict = (void*)malloc(CNBuffSize);
size_t const kWindowLog = 10;
size_t const kWindowSize = (size_t)1 << kWindowLog;
size_t const dictSize = kWindowSize * 10;
size_t const srcSize1 = kWindowSize / 2;
size_t const srcSize2 = kWindowSize * 10;
int nbWorkers;
if (CNBuffSize < dictSize) goto _output_error;
RDG_genBuffer(dict, dictSize, 0.5, 0.5, seed);
RDG_genBuffer(CNBuffer, srcSize1 + srcSize2, 0.5, 0.5, seed);
/* Enable checksum to verify round trip. */
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
/* Disable content size to skip single-pass decompression. */
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, (int)kWindowLog));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_ldmMinMatch, 32));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_ldmHashRateLog, 1));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_ldmHashLog, 12));
for (nbWorkers = 0; nbWorkers < 3; ++nbWorkers) {
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, nbWorkers));
/* Round trip once with a dictionary. */
CHECK_Z(ZSTD_CCtx_refPrefix(cctx, dict, dictSize));
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, srcSize1);
CHECK_Z(cSize);
CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dict, dictSize));
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, srcSize2);
/* Streaming decompression to catch out of bounds offsets. */
{
ZSTD_inBuffer in = {compressedBuffer, cSize, 0};
ZSTD_outBuffer out = {decodedBuffer, CNBuffSize, 0};
size_t const dSize = ZSTD_decompressStream(dctx, &out, &in);
CHECK_Z(dSize);
if (dSize != 0) goto _output_error;
}
}
ZSTD_freeCCtx(cctx);
ZSTD_freeDCtx(dctx);
free(dict);
}
DISPLAYLEVEL(3, "OK \n");