mirror of
https://github.com/facebook/zstd.git
synced 2025-03-07 01:10:04 +02:00
Merge pull request #2150 from terrelln/ldm-dict-reset
[ldm] Reset loadedDictEnd when the context is reset
This commit is contained in:
commit
9778f46014
@ -1576,6 +1576,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
||||
|
||||
ZSTD_window_init(&zc->ldmState.window);
|
||||
ZSTD_window_clear(&zc->ldmState.window);
|
||||
zc->ldmState.loadedDictEnd = 0;
|
||||
}
|
||||
|
||||
DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
|
||||
|
@ -463,6 +463,8 @@ size_t ZSTD_ldm_generateSequences(
|
||||
U32 const correction = ZSTD_window_correctOverflow(
|
||||
&ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
|
||||
ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
|
||||
/* invalidate dictionaries on overflow correction */
|
||||
ldmState->loadedDictEnd = 0;
|
||||
}
|
||||
/* 2. We enforce the maximum offset allowed.
|
||||
*
|
||||
@ -471,6 +473,12 @@ size_t ZSTD_ldm_generateSequences(
|
||||
* TODO: * Test the chunk size.
|
||||
* * Try invalidation after the sequence generation and test the
|
||||
* the offset against maxDist directly.
|
||||
*
|
||||
* NOTE: Because of dictionaries + sequence splitting we MUST make sure
|
||||
* that any offset used is valid at the END of the sequence, since it may
|
||||
* be split into two sequences. This condition holds when using
|
||||
* ZSTD_window_enforceMaxDist(), but if we move to checking offsets
|
||||
* against maxDist directly, we'll have to carefully handle that case.
|
||||
*/
|
||||
ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
|
||||
/* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
|
||||
|
@ -493,7 +493,6 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState,
|
||||
ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
|
||||
/* Reset the window */
|
||||
ZSTD_window_init(&serialState->ldmState.window);
|
||||
serialState->ldmWindow = serialState->ldmState.window;
|
||||
/* Resize tables and output space if necessary. */
|
||||
if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) {
|
||||
ZSTD_free(serialState->ldmState.hashTable, cMem);
|
||||
@ -508,12 +507,20 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState,
|
||||
/* Zero the tables */
|
||||
memset(serialState->ldmState.hashTable, 0, hashSize);
|
||||
memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
|
||||
|
||||
/* Update window state and fill hash table with dict */
|
||||
if (dictSize > 0) {
|
||||
BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
|
||||
ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
|
||||
ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, ¶ms.ldmParams);
|
||||
serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
|
||||
}
|
||||
|
||||
/* Initialize serialState's copy of ldmWindow. */
|
||||
serialState->ldmWindow = serialState->ldmState.window;
|
||||
}
|
||||
|
||||
/* Update window state and fill hash table with dict */
|
||||
if (params.ldmParams.enableLdm && dict) {
|
||||
ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
|
||||
ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, (const BYTE*)dict + dictSize, ¶ms.ldmParams);
|
||||
}
|
||||
|
||||
serialState->params = params;
|
||||
|
@ -631,17 +631,77 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
||||
DISPLAYLEVEL(3, "test%3i : testing dict compression with enableLdm and forceMaxWindow : ", testNb++);
|
||||
{
|
||||
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
|
||||
void* dict = (void*)malloc(CNBuffSize);
|
||||
int nbWorkers;
|
||||
|
||||
RDG_genBuffer(dict, CNBuffSize, 0.5, 0.5, seed);
|
||||
RDG_genBuffer(CNBuffer, CNBuffSize, 0.6, 0.6, seed);
|
||||
for (nbWorkers = 0; nbWorkers < 3; ++nbWorkers) {
|
||||
RDG_genBuffer(dict, CNBuffSize, 0.5, 0.5, seed);
|
||||
RDG_genBuffer(CNBuffer, CNBuffSize, 0.6, 0.6, seed);
|
||||
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceMaxWindow, 1));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1));
|
||||
assert(!ZSTD_isError(ZSTD_compress_usingDict(cctx, compressedBuffer, compressedBufferSize,
|
||||
CNBuffer, CNBuffSize, dict, CNBuffSize, 3)));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, ZSTD_c_nbWorkers));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceMaxWindow, 1));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1));
|
||||
CHECK_Z(ZSTD_CCtx_refPrefix(cctx, dict, CNBuffSize));
|
||||
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
|
||||
CHECK_Z(cSize);
|
||||
CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dict, CNBuffSize));
|
||||
}
|
||||
|
||||
ZSTD_freeCCtx(cctx);
|
||||
ZSTD_freeDCtx(dctx);
|
||||
free(dict);
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : testing ldm dictionary gets invalidated : ", testNb++);
|
||||
{
|
||||
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
|
||||
void* dict = (void*)malloc(CNBuffSize);
|
||||
size_t const kWindowLog = 10;
|
||||
size_t const kWindowSize = (size_t)1 << kWindowLog;
|
||||
size_t const dictSize = kWindowSize * 10;
|
||||
size_t const srcSize1 = kWindowSize / 2;
|
||||
size_t const srcSize2 = kWindowSize * 10;
|
||||
int nbWorkers;
|
||||
|
||||
if (CNBuffSize < dictSize) goto _output_error;
|
||||
|
||||
RDG_genBuffer(dict, dictSize, 0.5, 0.5, seed);
|
||||
RDG_genBuffer(CNBuffer, srcSize1 + srcSize2, 0.5, 0.5, seed);
|
||||
|
||||
/* Enable checksum to verify round trip. */
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
|
||||
/* Disable content size to skip single-pass decompression. */
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, (int)kWindowLog));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_ldmMinMatch, 32));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_ldmHashRateLog, 1));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_ldmHashLog, 12));
|
||||
|
||||
for (nbWorkers = 0; nbWorkers < 3; ++nbWorkers) {
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, nbWorkers));
|
||||
/* Round trip once with a dictionary. */
|
||||
CHECK_Z(ZSTD_CCtx_refPrefix(cctx, dict, dictSize));
|
||||
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, srcSize1);
|
||||
CHECK_Z(cSize);
|
||||
CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dict, dictSize));
|
||||
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, srcSize2);
|
||||
/* Streaming decompression to catch out of bounds offsets. */
|
||||
{
|
||||
ZSTD_inBuffer in = {compressedBuffer, cSize, 0};
|
||||
ZSTD_outBuffer out = {decodedBuffer, CNBuffSize, 0};
|
||||
size_t const dSize = ZSTD_decompressStream(dctx, &out, &in);
|
||||
CHECK_Z(dSize);
|
||||
if (dSize != 0) goto _output_error;
|
||||
}
|
||||
}
|
||||
|
||||
ZSTD_freeCCtx(cctx);
|
||||
ZSTD_freeDCtx(dctx);
|
||||
free(dict);
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
Loading…
x
Reference in New Issue
Block a user