mirror of
https://github.com/facebook/zstd.git
synced 2025-03-07 09:26:03 +02:00
new statistics update policy
small general compression ratio improvement for btopt+ strategies/
This commit is contained in:
parent
23a9368c45
commit
08ceda3dfc
@ -79,21 +79,6 @@ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* ZSTD_downscaleStat() :
|
|
||||||
* reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
|
|
||||||
* return the resulting sum of elements */
|
|
||||||
static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus)
|
|
||||||
{
|
|
||||||
U32 s, sum=0;
|
|
||||||
DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1);
|
|
||||||
assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
|
|
||||||
for (s=0; s<lastEltIndex+1; s++) {
|
|
||||||
table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
|
|
||||||
sum += table[s];
|
|
||||||
}
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
static U32 sum_u32(const unsigned table[], size_t nbElts)
|
static U32 sum_u32(const unsigned table[], size_t nbElts)
|
||||||
{
|
{
|
||||||
size_t n;
|
size_t n;
|
||||||
@ -104,6 +89,31 @@ static U32 sum_u32(const unsigned table[], size_t nbElts)
|
|||||||
return total;
|
return total;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
|
||||||
|
{
|
||||||
|
U32 s, sum=0;
|
||||||
|
DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
|
||||||
|
assert(shift < 30);
|
||||||
|
for (s=0; s<lastEltIndex+1; s++) {
|
||||||
|
table[s] = 1 + (table[s] >> shift);
|
||||||
|
sum += table[s];
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ZSTD_scaleStats() :
|
||||||
|
* reduce all elements in table is sum too large
|
||||||
|
* return the resulting sum of elements */
|
||||||
|
static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
|
||||||
|
{
|
||||||
|
U32 const prevsum = sum_u32(table, lastEltIndex+1);
|
||||||
|
U32 const factor = prevsum >> logTarget;
|
||||||
|
DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
|
||||||
|
assert(logTarget < 30);
|
||||||
|
if (factor <= 1) return prevsum;
|
||||||
|
return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
|
||||||
|
}
|
||||||
|
|
||||||
/* ZSTD_rescaleFreqs() :
|
/* ZSTD_rescaleFreqs() :
|
||||||
* if first block (detected by optPtr->litLengthSum == 0) : init statistics
|
* if first block (detected by optPtr->litLengthSum == 0) : init statistics
|
||||||
* take hints from dictionary if there is one
|
* take hints from dictionary if there is one
|
||||||
@ -185,7 +195,7 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|||||||
if (compressedLiterals) {
|
if (compressedLiterals) {
|
||||||
unsigned lit = MaxLit;
|
unsigned lit = MaxLit;
|
||||||
HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
|
HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
|
||||||
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
|
optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
{ unsigned const baseLLfreqs[MaxLL+1] = {
|
{ unsigned const baseLLfreqs[MaxLL+1] = {
|
||||||
@ -219,10 +229,10 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|||||||
} else { /* new block : re-use previous statistics, scaled down */
|
} else { /* new block : re-use previous statistics, scaled down */
|
||||||
|
|
||||||
if (compressedLiterals)
|
if (compressedLiterals)
|
||||||
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
|
optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 10);
|
||||||
optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
|
optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 9);
|
||||||
optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
|
optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 9);
|
||||||
optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
|
optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 9);
|
||||||
}
|
}
|
||||||
|
|
||||||
ZSTD_setBasePrices(optPtr, optLevel);
|
ZSTD_setBasePrices(optPtr, optLevel);
|
||||||
|
@ -70,6 +70,8 @@ static const size_t maxMemory = (sizeof(size_t)==4) ?
|
|||||||
#define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush(NULL); }
|
#define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush(NULL); }
|
||||||
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
|
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
|
||||||
/* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : + progression; 4 : + information */
|
/* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : + progression; 4 : + information */
|
||||||
|
#define OUTPUT(...) { fprintf(stdout, __VA_ARGS__); fflush(NULL); }
|
||||||
|
#define OUTPUTLEVEL(l, ...) if (displayLevel>=l) { OUTPUT(__VA_ARGS__); }
|
||||||
|
|
||||||
|
|
||||||
/* *************************************
|
/* *************************************
|
||||||
@ -429,9 +431,9 @@ BMK_benchMemAdvancedNoAlloc(
|
|||||||
dctxprep.dictBuffer = dictBuffer;
|
dctxprep.dictBuffer = dictBuffer;
|
||||||
dctxprep.dictBufferSize = dictBufferSize;
|
dctxprep.dictBufferSize = dictBufferSize;
|
||||||
|
|
||||||
DISPLAYLEVEL(2, "\r%70s\r", ""); /* blank line */
|
OUTPUTLEVEL(2, "\r%70s\r", ""); /* blank line */
|
||||||
assert(srcSize < UINT_MAX);
|
assert(srcSize < UINT_MAX);
|
||||||
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u -> \r", marks[markNb], displayName, (unsigned)srcSize);
|
OUTPUTLEVEL(2, "%2s-%-17.17s :%10u -> \r", marks[markNb], displayName, (unsigned)srcSize);
|
||||||
|
|
||||||
while (!(compressionCompleted && decompressionCompleted)) {
|
while (!(compressionCompleted && decompressionCompleted)) {
|
||||||
if (!compressionCompleted) {
|
if (!compressionCompleted) {
|
||||||
@ -453,7 +455,7 @@ BMK_benchMemAdvancedNoAlloc(
|
|||||||
|
|
||||||
{ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
|
{ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
|
||||||
assert(cSize < UINT_MAX);
|
assert(cSize < UINT_MAX);
|
||||||
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f), %6.*f MB/s\r",
|
OUTPUTLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f), %6.*f MB/s\r",
|
||||||
marks[markNb], displayName,
|
marks[markNb], displayName,
|
||||||
(unsigned)srcSize, (unsigned)cSize,
|
(unsigned)srcSize, (unsigned)cSize,
|
||||||
ratioAccuracy, ratio,
|
ratioAccuracy, ratio,
|
||||||
@ -476,7 +478,7 @@ BMK_benchMemAdvancedNoAlloc(
|
|||||||
}
|
}
|
||||||
|
|
||||||
{ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
|
{ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
|
||||||
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f), %6.*f MB/s, %6.1f MB/s \r",
|
OUTPUTLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f), %6.*f MB/s, %6.1f MB/s \r",
|
||||||
marks[markNb], displayName,
|
marks[markNb], displayName,
|
||||||
(unsigned)srcSize, (unsigned)cSize,
|
(unsigned)srcSize, (unsigned)cSize,
|
||||||
ratioAccuracy, ratio,
|
ratioAccuracy, ratio,
|
||||||
@ -537,13 +539,13 @@ BMK_benchMemAdvancedNoAlloc(
|
|||||||
double const cSpeed = (double)benchResult.cSpeed / MB_UNIT;
|
double const cSpeed = (double)benchResult.cSpeed / MB_UNIT;
|
||||||
double const dSpeed = (double)benchResult.dSpeed / MB_UNIT;
|
double const dSpeed = (double)benchResult.dSpeed / MB_UNIT;
|
||||||
if (adv->additionalParam) {
|
if (adv->additionalParam) {
|
||||||
DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, adv->additionalParam);
|
OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, adv->additionalParam);
|
||||||
} else {
|
} else {
|
||||||
DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName);
|
OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DISPLAYLEVEL(2, "%2i#\n", cLevel);
|
OUTPUTLEVEL(2, "%2i#\n", cLevel);
|
||||||
} /* Bench */
|
} /* Bench */
|
||||||
|
|
||||||
benchResult.cMem = (1ULL << (comprParams->windowLog)) + ZSTD_sizeof_CCtx(cctx);
|
benchResult.cMem = (1ULL << (comprParams->windowLog)) + ZSTD_sizeof_CCtx(cctx);
|
||||||
@ -672,7 +674,7 @@ static BMK_benchOutcome_t BMK_benchCLevel(const void* srcBuffer, size_t benchedS
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (displayLevel == 1 && !adv->additionalParam) /* --quiet mode */
|
if (displayLevel == 1 && !adv->additionalParam) /* --quiet mode */
|
||||||
DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n",
|
OUTPUT("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n",
|
||||||
ZSTD_VERSION_STRING, ZSTD_GIT_COMMIT_STRING,
|
ZSTD_VERSION_STRING, ZSTD_GIT_COMMIT_STRING,
|
||||||
(unsigned)benchedSize, adv->nbSeconds, (unsigned)(adv->blockSize>>10));
|
(unsigned)benchedSize, adv->nbSeconds, (unsigned)(adv->blockSize>>10));
|
||||||
|
|
||||||
@ -762,7 +764,7 @@ static int BMK_loadFiles(void* buffer, size_t bufferSize,
|
|||||||
}
|
}
|
||||||
{ FILE* const f = fopen(fileNamesTable[n], "rb");
|
{ FILE* const f = fopen(fileNamesTable[n], "rb");
|
||||||
if (f==NULL) RETURN_ERROR_INT(10, "impossible to open file %s", fileNamesTable[n]);
|
if (f==NULL) RETURN_ERROR_INT(10, "impossible to open file %s", fileNamesTable[n]);
|
||||||
DISPLAYLEVEL(2, "Loading %s... \r", fileNamesTable[n]);
|
OUTPUTLEVEL(2, "Loading %s... \r", fileNamesTable[n]);
|
||||||
if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n; /* buffer too small - stop after this file */
|
if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n; /* buffer too small - stop after this file */
|
||||||
{ size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
|
{ size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
|
||||||
if (readSize != (size_t)fileSize) RETURN_ERROR_INT(11, "could not read %s", fileNamesTable[n]);
|
if (readSize != (size_t)fileSize) RETURN_ERROR_INT(11, "could not read %s", fileNamesTable[n]);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user