mirror of
https://github.com/facebook/zstd.git
synced 2025-03-07 01:10:04 +02:00
opt: init statistics from dictionary
instead of starting from fake "default" statistics.
This commit is contained in:
parent
74b1c75d64
commit
1a26ec6e8d
@ -143,6 +143,11 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
|
||||
} } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
|
||||
if (remaining != 1) return ERROR(corruption_detected);
|
||||
if (bitCount > 32) return ERROR(corruption_detected);
|
||||
/* zeroise the rest */
|
||||
{ unsigned symbNb = charnum;
|
||||
for (symbNb=charnum; symbNb <= *maxSVPtr; symbNb++)
|
||||
normalizedCounter[symbNb] = 0;
|
||||
}
|
||||
*maxSVPtr = charnum-1;
|
||||
|
||||
ip += (bitCount+7)>>3;
|
||||
|
@ -143,7 +143,10 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
|
||||
for (s=0; s<=maxSymbolValue; s++) {
|
||||
switch (normalizedCounter[s])
|
||||
{
|
||||
case 0: break;
|
||||
case 0:
|
||||
/* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
|
||||
symbolTT[s].deltaNbBits = (tableLog+1) << 16;
|
||||
break;
|
||||
|
||||
case -1:
|
||||
case 1:
|
||||
|
@ -2396,7 +2396,8 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
|
||||
if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
|
||||
if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
|
||||
/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
|
||||
CHECK_E( FSE_buildCTable_wksp(bs->entropy.offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, workspace, HUF_WORKSPACE_SIZE),
|
||||
/* fill all offset symbols to avoid garbage at end of table */
|
||||
CHECK_E( FSE_buildCTable_wksp(bs->entropy.offcodeCTable, offcodeNCount, MaxOff, offcodeLog, workspace, HUF_WORKSPACE_SIZE),
|
||||
dictionary_corrupted);
|
||||
dictPtr += offcodeHeaderSize;
|
||||
}
|
||||
|
@ -35,7 +35,6 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr,
|
||||
optPtr->priceType = zop_dynamic;
|
||||
|
||||
if (optPtr->litLengthSum == 0) { /* first block : init */
|
||||
unsigned u;
|
||||
if (srcSize <= 1024) /* heuristic */
|
||||
optPtr->priceType = zop_predef;
|
||||
|
||||
@ -47,29 +46,85 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr,
|
||||
assert(optPtr->priceType == zop_dynamic);
|
||||
}
|
||||
|
||||
assert(optPtr->litFreq != NULL);
|
||||
assert(optPtr->symbolCosts != NULL);
|
||||
optPtr->litSum = 0;
|
||||
{ unsigned lit;
|
||||
for (lit=0; lit<=MaxLit; lit++) {
|
||||
U32 const scaleLog = 12; /* scale to 4K */
|
||||
U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->hufCTable, lit);
|
||||
assert(bitCost < scaleLog);
|
||||
optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
|
||||
optPtr->litSum += optPtr->litFreq[lit];
|
||||
} }
|
||||
|
||||
{ unsigned ll;
|
||||
FSE_CState_t llstate;
|
||||
FSE_initCState(&llstate, optPtr->symbolCosts->litlengthCTable);
|
||||
optPtr->litLengthSum = 0;
|
||||
for (ll=0; ll<=MaxLL; ll++) {
|
||||
U32 const scaleLog = 11; /* scale to 2K */
|
||||
U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
|
||||
assert(bitCost < scaleLog);
|
||||
optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
|
||||
optPtr->litLengthSum += optPtr->litLengthFreq[ll];
|
||||
} }
|
||||
|
||||
{ unsigned ml;
|
||||
FSE_CState_t mlstate;
|
||||
FSE_initCState(&mlstate, optPtr->symbolCosts->matchlengthCTable);
|
||||
optPtr->matchLengthSum = 0;
|
||||
for (ml=0; ml<=MaxML; ml++) {
|
||||
U32 const scaleLog = 11; /* scale to 2K */
|
||||
U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
|
||||
assert(bitCost < scaleLog);
|
||||
optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
|
||||
optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
|
||||
} }
|
||||
|
||||
{ unsigned of;
|
||||
FSE_CState_t ofstate;
|
||||
FSE_initCState(&ofstate, optPtr->symbolCosts->offcodeCTable);
|
||||
optPtr->offCodeSum = 0;
|
||||
for (of=0; of<=MaxOff; of++) {
|
||||
U32 const scaleLog = 11; /* scale to 2K */
|
||||
U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
|
||||
assert(bitCost < scaleLog);
|
||||
optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
|
||||
optPtr->offCodeSum += optPtr->offCodeFreq[of];
|
||||
} }
|
||||
|
||||
} else { /* not a dictionary */
|
||||
|
||||
assert(optPtr->litFreq != NULL);
|
||||
optPtr->litSum = 0;
|
||||
{ unsigned lit = MaxLit;
|
||||
FSE_count(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
|
||||
for (lit=0; lit<=MaxLit; lit++) {
|
||||
optPtr->litFreq[lit] = 1 + (optPtr->litFreq[lit] >> (ZSTD_FREQ_DIV+1));
|
||||
optPtr->litSum += optPtr->litFreq[lit];
|
||||
} }
|
||||
|
||||
{ unsigned ll;
|
||||
for (ll=0; ll<=MaxLL; ll++)
|
||||
optPtr->litLengthFreq[ll] = 1;
|
||||
optPtr->litLengthSum = MaxLL+1;
|
||||
}
|
||||
|
||||
{ unsigned ml;
|
||||
for (ml=0; ml<=MaxML; ml++)
|
||||
optPtr->matchLengthFreq[ml] = 1;
|
||||
optPtr->matchLengthSum = MaxML+1;
|
||||
}
|
||||
|
||||
{ unsigned of;
|
||||
for (of=0; of<=MaxOff; of++)
|
||||
optPtr->offCodeFreq[of] = 1;
|
||||
optPtr->offCodeSum = MaxOff+1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
assert(optPtr->litFreq != NULL);
|
||||
{ unsigned max = MaxLit;
|
||||
FSE_count(optPtr->litFreq, &max, src, srcSize); /* use raw first block to init statistics */
|
||||
}
|
||||
optPtr->litSum = 0;
|
||||
for (u=0; u<=MaxLit; u++) {
|
||||
optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> (ZSTD_FREQ_DIV+1));
|
||||
optPtr->litSum += optPtr->litFreq[u];
|
||||
}
|
||||
|
||||
for (u=0; u<=MaxLL; u++)
|
||||
optPtr->litLengthFreq[u] = 1;
|
||||
optPtr->litLengthSum = MaxLL+1;
|
||||
for (u=0; u<=MaxML; u++)
|
||||
optPtr->matchLengthFreq[u] = 1;
|
||||
optPtr->matchLengthSum = MaxML+1;
|
||||
for (u=0; u<=MaxOff; u++)
|
||||
optPtr->offCodeFreq[u] = 1;
|
||||
optPtr->offCodeSum = (MaxOff+1);
|
||||
|
||||
} else { /* new block : re-use previous statistics, scaled down */
|
||||
unsigned u;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user