1
0
mirror of https://github.com/facebook/zstd.git synced 2025-03-07 01:10:04 +02:00

level tuning

This commit is contained in:
Yann Collet 2015-11-03 09:49:30 +01:00
parent 3564487614
commit 050efba81b
3 changed files with 79 additions and 68 deletions

View File

@ -184,7 +184,7 @@ static size_t ZSTD_HC_hashPtr(const void* p, U32 h, U32 mls)
}
}
#define NEXT_IN_CHAIN(d) chainTable[(d) & chainMask] /* flexible, CHAINSIZE dependent */
#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask]
/* *************************************
@ -204,7 +204,7 @@ static U32 ZSTD_HC_insertAndFindFirstIndex (ZSTD_HC_CCtx* zc, const BYTE* ip, U
while(idx < target)
{
size_t h = ZSTD_HC_hashPtr(base+idx, hashLog, mls);
NEXT_IN_CHAIN(idx) = hashTable[h];
NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
hashTable[h] = idx;
idx++;
}
@ -217,8 +217,8 @@ static U32 ZSTD_HC_insertAndFindFirstIndex (ZSTD_HC_CCtx* zc, const BYTE* ip, U
FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
size_t ZSTD_HC_insertAndFindBestMatch (
ZSTD_HC_CCtx* zc, /* Index table will be updated */
const BYTE* ip, const BYTE* const iLimit,
const BYTE** matchpos,
const BYTE* const ip, const BYTE* const iLimit,
size_t* offsetPtr,
const U32 maxNbAttempts, const U32 matchLengthSearch)
{
U32* const chainTable = zc->chainTable;
@ -237,17 +237,22 @@ size_t ZSTD_HC_insertAndFindBestMatch (
/* HC4 match finder */
matchIndex = ZSTD_HC_insertAndFindFirstIndex (zc, ip, matchLengthSearch);
while ((matchIndex>=lowLimit) && (nbAttempts))
while ((matchIndex>lowLimit) && (nbAttempts))
{
nbAttempts--;
if (matchIndex >= dictLimit)
{
match = base + matchIndex;
if (*(match+ml) == *(ip+ml)
&& (MEM_read32(match) == MEM_read32(ip)))
if ( (match[ml] == ip[ml])
&& (MEM_read32(match) == MEM_read32(ip)) ) /* ensures minimum match of 4 */
{
const size_t mlt = ZSTD_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH;
if (mlt > ml) { ml = mlt; *matchpos = match; if (ip+ml >= iLimit) break; }
if (mlt > ml)
//if (((int)(4*mlt) - (int)ZSTD_highbit((U32)(ip-match)+1)) > ((int)(4*ml) - (int)ZSTD_highbit((U32)((*offsetPtr)+1))))
{
ml = mlt; *offsetPtr = ip-match;
if (ip+ml >= iLimit) break;
}
}
}
else
@ -261,12 +266,12 @@ size_t ZSTD_HC_insertAndFindBestMatch (
mlt = ZSTD_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH;
if ((ip+mlt == vLimit) && (vLimit < iLimit))
mlt += ZSTD_count(ip+mlt, base+dictLimit, iLimit);
if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */
if (mlt > ml) { ml = mlt; *offsetPtr = (ip-base) - matchIndex; }
}
}
if (base + matchIndex <= ip - chainSize) break;
matchIndex = NEXT_IN_CHAIN(matchIndex);
matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
}
return ml;
@ -276,15 +281,15 @@ size_t ZSTD_HC_insertAndFindBestMatch (
FORCE_INLINE size_t ZSTD_HC_insertAndFindBestMatch_selectMLS (
ZSTD_HC_CCtx* zc, /* Index table will be updated */
const BYTE* ip, const BYTE* const iLimit,
const BYTE** matchpos,
size_t* offsetPtr,
const U32 maxNbAttempts, const U32 matchLengthSearch)
{
switch(matchLengthSearch)
{
default :
case 4 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, matchpos, maxNbAttempts, 4);
case 5 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, matchpos, maxNbAttempts, 5);
case 6 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, matchpos, maxNbAttempts, 6);
case 4 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
case 5 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
case 6 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
}
}
@ -297,7 +302,6 @@ size_t ZSTD_HC_compressBlock_lazydeep(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDs
const BYTE* anchor = istart;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
const BYTE* match = istart;
size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE;
const U32 maxSearches = 1 << ctx->params.searchLog;
@ -311,7 +315,7 @@ size_t ZSTD_HC_compressBlock_lazydeep(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDs
while (ip <= ilimit)
{
size_t matchLength;
size_t offset;
size_t offset=999999;
const BYTE* start;
/* try to find a first match */
@ -330,11 +334,10 @@ size_t ZSTD_HC_compressBlock_lazydeep(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDs
}
offset_2 = offset_1;
matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls);
matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &offset, maxSearches, mls);
if (!matchLength) { ip++; continue; }
/* let's try to find a better solution */
offset = ip - match;
start = ip;
while (ip<ilimit)
@ -349,8 +352,8 @@ size_t ZSTD_HC_compressBlock_lazydeep(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDs
matchLength = ml2, offset = 0, start = ip;
}
{
size_t ml2 = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls);
size_t offset2 = ip - match;
size_t offset2=999999;
size_t ml2 = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &offset2, maxSearches, mls);
int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */
int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 4);
if (gain2 > gain1)
@ -360,6 +363,7 @@ size_t ZSTD_HC_compressBlock_lazydeep(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDs
}
}
/* let's find an even better one */
if (ip<ilimit)
{
ip ++;
@ -367,15 +371,15 @@ size_t ZSTD_HC_compressBlock_lazydeep(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDs
{
size_t ml2 = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_1, iend) + MINMATCH;
int gain2 = (int)(ml2 * 4);
int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 2);
int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1);
if (gain2 > gain1)
matchLength = ml2, offset = 0, start = ip;
}
{
size_t ml2 = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls);
size_t offset2 = ip - match;
size_t offset2=999999;
size_t ml2 = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &offset2, maxSearches, mls);
int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1)); /* raw approx */
int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 8);
int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7);
if (gain2 > gain1)
{
matchLength = ml2, offset = offset2, start = ip;
@ -383,7 +387,7 @@ size_t ZSTD_HC_compressBlock_lazydeep(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDs
}
}
}
break; /* nothing found : store previous one */
break; /* nothing found : store previous solution */
}
/* store sequence */
@ -418,7 +422,6 @@ size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSiz
const BYTE* anchor = istart;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
const BYTE* match = istart;
size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE;
const U32 maxSearches = 1 << ctx->params.searchLog;
@ -432,7 +435,7 @@ size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSiz
while (ip <= ilimit)
{
size_t matchLength;
size_t offset;
size_t offset=0;
const BYTE* start;
/* try to find a first match */
@ -451,11 +454,10 @@ size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSiz
}
offset_2 = offset_1;
matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls);
matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &offset, maxSearches, mls);
if (!matchLength) { ip++; continue; }
/* let's try to find a better solution */
offset = ip - match;
start = ip;
while (ip<ilimit)
@ -473,8 +475,8 @@ size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSiz
}
}
{
size_t ml2 = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls);
size_t offset2 = ip - match;
size_t offset2=999999;
size_t ml2 = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &offset2, maxSearches, mls);
int gain2 = (int)(ml2*3 - ZSTD_highbit((U32)offset2+1)); /* raw approx */
int gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 3);
if (gain2 > gain1)
@ -519,7 +521,6 @@ size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstS
const BYTE* anchor = istart;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
const BYTE* match = istart;
size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE;
const U32 maxSearches = 1 << ctx->params.searchLog;
@ -562,12 +563,13 @@ size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstS
/* search */
{
size_t matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls);
size_t offset=999999;
size_t matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &offset, maxSearches, mls);
if (!matchLength) { ip++; continue; }
/* store sequence */
{
size_t litLength = ip-anchor;
offset_1 = ip-match;
offset_1 = offset;
ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset_1, matchLength-MINMATCH);
ip += matchLength;
anchor = ip;
@ -591,9 +593,17 @@ size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstS
size_t ZSTD_HC_compressBlock(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
{
if (ctx->params.strategy == ZSTD_HC_greedy)
return ZSTD_HC_compressBlock_greedy(ctx, dst, maxDstSize, src, srcSize);
return ZSTD_HC_compressBlock_lazy(ctx, dst, maxDstSize, src, srcSize);
switch(ctx->params.strategy)
{
case ZSTD_HC_greedy:
return ZSTD_HC_compressBlock_greedy(ctx, dst, maxDstSize, src, srcSize);
case ZSTD_HC_lazy:
return ZSTD_HC_compressBlock_lazy(ctx, dst, maxDstSize, src, srcSize);
case ZSTD_HC_lazydeep:
return ZSTD_HC_compressBlock_lazydeep(ctx, dst, maxDstSize, src, srcSize);
default :
return ERROR(GENERIC); /* unknown block compressor */
}
}

View File

@ -53,7 +53,7 @@ typedef struct
U32 hashLog; /* dispatch table : larger == more memory, faster*/
U32 searchLog; /* nb of searches : larger == more compression, slower*/
U32 searchLength; /* size of matches : larger == faster decompression */
ZSTD_HC_strategy strategy; /* greedy, lazy (stronger, slower) */
ZSTD_HC_strategy strategy; /* greedy, lazy, lazydeep */
} ZSTD_HC_parameters;
/* parameters boundaries */
@ -73,7 +73,7 @@ typedef struct
* Advanced function
***************************************/
/** ZSTD_HC_compress_advanced
* Same as ZSTD_HC_compressCCtx(), but can fine-tune each compression parameter */
* Same as ZSTD_HC_compressCCtx(), with fine-tune control of each compression parameter */
size_t ZSTD_HC_compress_advanced (ZSTD_HC_CCtx* ctx,
void* dst, size_t maxDstSize,
const void* src, size_t srcSize,
@ -94,35 +94,36 @@ size_t ZSTD_HC_compressEnd(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize);
#define ZSTD_HC_MAX_CLEVEL 26
static const ZSTD_HC_parameters ZSTD_HC_defaultParameters[ZSTD_HC_MAX_CLEVEL+1] = {
/* W, C, H, S, L, strat */
{ 18, 12, 14, 1, 4, ZSTD_HC_greedy }, /* level 0 - never used */
{ 18, 12, 14, 1, 4, ZSTD_HC_greedy }, /* level 1 - in fact redirected towards zstd fast */
{ 18, 12, 15, 2, 4, ZSTD_HC_greedy }, /* level 2 */
{ 19, 13, 17, 3, 5, ZSTD_HC_greedy }, /* level 3 */
{ 20, 18, 19, 2, 5, ZSTD_HC_greedy }, /* level 4 */
{ 20, 19, 19, 3, 5, ZSTD_HC_greedy }, /* level 5 */
{ 20, 18, 20, 3, 5, ZSTD_HC_lazy }, /* level 6 */
{ 20, 18, 20, 4, 5, ZSTD_HC_lazy }, /* level 7 */
{ 21, 19, 20, 4, 5, ZSTD_HC_lazy }, /* level 8 */
{ 21, 19, 20, 5, 5, ZSTD_HC_lazy }, /* level 9 */
{ 21, 20, 20, 5, 5, ZSTD_HC_lazy }, /* level 10 */
{ 21, 21, 20, 5, 5, ZSTD_HC_lazy }, /* level 11 */
{ 22, 20, 22, 6, 5, ZSTD_HC_lazy }, /* level 12 */
{ 22, 21, 22, 6, 5, ZSTD_HC_lazy }, /* level 13 */
{ 23, 21, 22, 6, 5, ZSTD_HC_lazy }, /* level 14 */
{ 23, 21, 23, 7, 5, ZSTD_HC_lazy }, /* level 15 */
{ 23, 22, 22, 6, 5, ZSTD_HC_lazy }, /* level 16 */
{ 23, 22, 22, 7, 5, ZSTD_HC_lazy }, /* level 17 */
{ 23, 23, 22, 7, 5, ZSTD_HC_lazy }, /* level 18 */
{ 23, 22, 23, 8, 5, ZSTD_HC_lazy }, /* level 19 */
{ 23, 23, 23, 8, 5, ZSTD_HC_lazy }, /* level 20 */
{ 23, 23, 23, 8, 5, ZSTD_HC_lazy }, /* level 21 */
{ 24, 24, 24, 8, 5, ZSTD_HC_lazy }, /* level 22 */
{ 24, 23, 23, 9, 5, ZSTD_HC_lazy }, /* level 23 */
{ 24, 24, 24, 9, 5, ZSTD_HC_lazy }, /* level 24 */
{ 24, 24, 24, 9, 5, ZSTD_HC_lazy }, /* level 25 */
{ 24, 24, 24, 10, 5, ZSTD_HC_lazy }, /* level 26 */ /* ZSTD_HC_MAX_CLEVEL */
{ 18, 12, 14, 1, 4, ZSTD_HC_greedy }, /* level 0 - never used */
{ 18, 12, 14, 1, 4, ZSTD_HC_greedy }, /* level 1 - in fact redirected towards zstd fast */
{ 18, 12, 15, 2, 4, ZSTD_HC_greedy }, /* level 2 */
{ 19, 13, 17, 3, 5, ZSTD_HC_greedy }, /* level 3 */
{ 20, 18, 19, 2, 5, ZSTD_HC_greedy }, /* level 4 */
{ 20, 18, 19, 2, 5, ZSTD_HC_lazy }, /* level 5 */
{ 20, 18, 20, 3, 5, ZSTD_HC_lazy }, /* level 6 */
{ 20, 18, 20, 4, 5, ZSTD_HC_lazy }, /* level 7 */
{ 21, 19, 20, 4, 5, ZSTD_HC_lazy }, /* level 8 */
{ 21, 19, 20, 5, 5, ZSTD_HC_lazy }, /* level 9 */
{ 21, 20, 20, 5, 5, ZSTD_HC_lazy }, /* level 10 */
{ 21, 20, 20, 5, 5, ZSTD_HC_lazydeep }, /* level 11 */
{ 22, 20, 22, 5, 5, ZSTD_HC_lazydeep }, /* level 12 */
{ 22, 20, 22, 6, 5, ZSTD_HC_lazydeep }, /* level 13 */
{ 21, 21, 22, 6, 5, ZSTD_HC_lazydeep }, /* level 14 */
{ 22, 21, 22, 6, 5, ZSTD_HC_lazydeep }, /* level 15 */
{ 22, 21, 23, 7, 5, ZSTD_HC_lazydeep }, /* level 16 */
{ 23, 21, 23, 7, 5, ZSTD_HC_lazydeep }, /* level 17 */
{ 23, 22, 23, 7, 5, ZSTD_HC_lazydeep }, /* level 18 */
{ 23, 22, 23, 7, 5, ZSTD_HC_lazydeep }, /* level 19 */
{ 23, 22, 23, 8, 5, ZSTD_HC_lazydeep }, /* level 20 */
{ 23, 22, 23, 8, 5, ZSTD_HC_lazydeep }, /* level 21 */
{ 23, 23, 24, 8, 5, ZSTD_HC_lazydeep }, /* level 22 */
{ 24, 24, 24, 8, 5, ZSTD_HC_lazydeep }, /* level 23 */
{ 23, 23, 23, 9, 5, ZSTD_HC_lazydeep }, /* level 24 */
{ 24, 23, 23, 9, 5, ZSTD_HC_lazydeep }, /* level 25 */
{ 24, 24, 24, 9, 5, ZSTD_HC_lazydeep }, /* level 26 */
};
#if defined (__cplusplus)
}
#endif

View File

@ -83,7 +83,7 @@
#define MB *(1 <<20)
#define GB *(1U<<30)
#define MAX_MEM (2 GB - 64 MB)
static const size_t maxMemory = sizeof(size_t)==4 ? (2 GB - 64 MB) : 8ULL GB;
#define DEFAULT_CHUNKSIZE (4 MB)
static U32 g_compressibilityDefault = 50;
@ -401,7 +401,7 @@ static size_t BMK_findMaxMem(U64 requiredMem)
requiredMem = (((requiredMem >> 26) + 1) << 26);
requiredMem += 2 * step;
if (requiredMem > MAX_MEM) requiredMem = MAX_MEM;
if (requiredMem > maxMemory) requiredMem = maxMemory;
while (!testmem)
{