1
0
mirror of https://github.com/facebook/zstd.git synced 2025-03-05 16:36:02 +02:00

zstdmt : fix : loading prefix from previous segments

There used to be a (very small) chance that
loading prefix from previous segment
would be confused with a real zstd dictionary.
For that to happen, the prefix needs to start
with the same value as dictionary magic.
That's 1 chance in 4 billions if all values have equal probability.
But in fact, since some values are more common (0x00000000 for example)
others are less common, and dictionary magic was selected to be one of them,
so probabilities are likely even lower.

Anyway, this risk is no down to zero
by adding a new CCtx parameter : ZSTD_p_forceRawDict

Current parameter policy : the parameter "stick" to its CCtx,
so any dictionary loading after ZSTD_p_forceRawDict is set
will be loaded in "raw" ("content only") mode,
even if CCtx is re-used multiple times with multiple different dictionary.
It's up to the user to reset this value differently if it needs so.
This commit is contained in:
Yann Collet 2017-02-23 23:42:12 -08:00
parent 831b4890ce
commit 14312d833e
4 changed files with 11 additions and 5 deletions

View File

@ -365,7 +365,8 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v
</p></pre><BR>
<pre><b>typedef enum {
ZSTD_p_forceWindow </b>/* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0) */<b>
ZSTD_p_forceWindow, </b>/* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0) */<b>
ZSTD_p_forceRawDict </b>/* Force loading dictionary in "content-only" mode (no header analysis) */<b>
} ZSTD_CCtxParameter;
</b></pre><BR>
<pre><b>size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned value);

View File

@ -62,6 +62,7 @@ struct ZSTD_CCtx_s {
U32 hashLog3; /* dispatch table : larger == faster, more memory */
U32 loadedDictEnd; /* index of end of dictionary */
U32 forceWindow; /* force back-references to respect limit of 1<<wLog, even for dictionary */
U32 forceRawDict; /* Force loading dictionary in "content-only" mode (no header analysis) */
ZSTD_compressionStage_e stage;
U32 rep[ZSTD_REP_NUM];
U32 repToConfirm[ZSTD_REP_NUM];
@ -124,6 +125,7 @@ size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned
switch(param)
{
case ZSTD_p_forceWindow : cctx->forceWindow = value>0; cctx->loadedDictEnd = 0; return 0;
case ZSTD_p_forceRawDict : cctx->forceRawDict = value>0; return 0;
default: return ERROR(parameter_unknown);
}
}
@ -2613,8 +2615,9 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, si
{
if ((dict==NULL) || (dictSize<=8)) return 0;
/* default : dict is pure content */
if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return ZSTD_loadDictionaryContent(zc, dict, dictSize);
/* dict as pure content */
if ((MEM_readLE32(dict) != ZSTD_DICT_MAGIC) || (zc->forceRawDict))
return ZSTD_loadDictionaryContent(zc, dict, dictSize);
zc->dictID = zc->params.fParams.noDictIDFlag ? 0 : MEM_readLE32((const char*)dict+4);
/* known magic number : dict is parsed for entropy stats and content */

View File

@ -236,8 +236,9 @@ void ZSTDMT_compressChunk(void* jobDescription)
if (job->cdict) DEBUGLOG(3, "using CDict ");
if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
} else { /* srcStart points at reloaded section */
size_t const dictModeError = ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceRawDict, 1); /* Force loading dictionary in "content-only" mode (no header analysis) */
size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, 0);
if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
if (ZSTD_isError(initError) || ZSTD_isError(dictModeError)) { job->cSize = initError; goto _endJob; }
ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1);
}
if (!job->firstChunk) { /* flush and overwrite frame header when it's not first segment */

View File

@ -462,7 +462,8 @@ ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
typedef enum {
ZSTD_p_forceWindow /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0) */
ZSTD_p_forceWindow, /* Force back-references to remain < windowSize, even when referencing Dictionary content (default:0) */
ZSTD_p_forceRawDict /* Force loading dictionary in "content-only" mode (no header analysis) */
} ZSTD_CCtxParameter;
/*! ZSTD_setCCtxParameter() :
* Set advanced parameters, selected through enum ZSTD_CCtxParameter