1
0
mirror of https://github.com/facebook/zstd.git synced 2025-03-06 16:56:49 +02:00

implemented first prefetch

based on dictID.
dictContent is prefetched up to 32 KB
(no contentSize adaptation)
This commit is contained in:
Yann Collet 2018-09-11 17:23:44 -07:00
parent 51a246da82
commit 63a519dbf6
3 changed files with 98 additions and 48 deletions

5
NEWS
View File

@ -1,3 +1,8 @@
v1.3.6
perf: much faster dictionary builder, by @jenniferliu
api : reduced DDict size by 2 KB
misc: tests/paramgrill, a parameter optimizer, by @GeorgeLu97
v1.3.5
perf: much faster dictionary compression, by @felixhandte
perf: small quality improvement for dictionary generation, by @terrelln

View File

@ -97,12 +97,19 @@
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0)
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
# define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0)
# define PREFETCH(ptr) __builtin_prefetch(ptr, 0 /* rw==read */, 0 /* locality */)
# else
# define PREFETCH(ptr) /* disabled */
# endif
#endif /* NO_PREFETCH */
#define PREFETCH_AREA(ptr, size) { \
size_t pos; \
for (pos=0; pos<size; pos++) { \
PREFETCH( (const char*)(const void*)ptr + pos); \
} \
}
/* disable warnings */
#ifdef _MSC_VER /* Visual Studio */
# include <intrin.h> /* For Visual 2005 */

View File

@ -40,7 +40,6 @@
# define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_DEFAULTMAX) + 1)
#endif
/*!
* NO_FORWARD_PROGRESS_MAX :
* maximum allowed nb of calls to ZSTD_decompressStream() and ZSTD_decompress_generic()
@ -52,11 +51,12 @@
# define ZSTD_NO_FORWARD_PROGRESS_MAX 16
#endif
/*-*******************************************************
* Dependencies
*********************************************************/
#include <string.h> /* memcpy, memmove, memset */
#include "cpu.h"
#include "cpu.h" /* prefetch */
#include "mem.h" /* low level memory routines */
#define FSE_STATIC_LINKING_ONLY
#include "fse.h"
@ -138,7 +138,6 @@ struct ZSTD_DCtx_s
U32 fseEntropy;
XXH64_state_t xxhState;
size_t headerSize;
U32 dictID;
ZSTD_format_e format;
const BYTE* litPtr;
ZSTD_customMem customMem;
@ -147,9 +146,13 @@ struct ZSTD_DCtx_s
size_t staticSize;
int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
/* streaming */
/* dictionary */
ZSTD_DDict* ddictLocal;
const ZSTD_DDict* ddict;
U32 dictID;
int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
/* streaming */
ZSTD_dStreamStage streamStage;
char* inBuff;
size_t inBuffSize;
@ -200,6 +203,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
dctx->ddict = NULL;
dctx->ddictLocal = NULL;
dctx->ddictIsCold = 0;
dctx->inBuff = NULL;
dctx->inBuffSize = 0;
dctx->outBuffSize = 0;
@ -574,6 +578,12 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
{
case set_repeat:
if (dctx->litEntropy==0) return ERROR(dictionary_corrupted);
/* prefetch huffman table if cold */
if (dctx->ddictIsCold) {
PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
}
/* fall-through */
case set_compressed:
if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
@ -886,7 +896,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
symbolEncodingType_e type, U32 max, U32 maxLog,
const void* src, size_t srcSize,
const U32* baseValue, const U32* nbAdditionalBits,
const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable)
const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
int ddictIsCold)
{
switch(type)
{
@ -905,6 +916,12 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
return 0;
case set_repeat:
if (!flagRepeatTable) return ERROR(corruption_detected);
/* prefetch FSE table if used */
if (ddictIsCold) {
const void* const pStart = *DTablePtr;
size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
PREFETCH_AREA(pStart, pSize);
}
return 0;
case set_compressed :
{ U32 tableLog;
@ -989,7 +1006,8 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
LLtype, MaxLL, LLFSELog,
ip, iend-ip,
LL_base, LL_bits,
LL_defaultDTable, dctx->fseEntropy);
LL_defaultDTable, dctx->fseEntropy,
dctx->ddictIsCold);
if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
ip += llhSize;
}
@ -998,7 +1016,8 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
OFtype, MaxOff, OffFSELog,
ip, iend-ip,
OF_base, OF_bits,
OF_defaultDTable, dctx->fseEntropy);
OF_defaultDTable, dctx->fseEntropy,
dctx->ddictIsCold);
if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
ip += ofhSize;
}
@ -1007,11 +1026,13 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
MLtype, MaxML, MLFSELog,
ip, iend-ip,
ML_base, ML_bits,
ML_defaultDTable, dctx->fseEntropy);
ML_defaultDTable, dctx->fseEntropy,
dctx->ddictIsCold);
if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
ip += mlhSize;
}
}
dctx->ddictIsCold = 0;
return ip-istart;
}
@ -1679,7 +1700,8 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
/* isLongOffset must be true if there are long offsets.
* Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
* We don't expect that to be the case in 64-bit mode.
* In block mode, window size is not known, so we have to be conservative. (note: but it could be evaluated from current-lowLimit)
* In block mode, window size is not known, so we have to be conservative.
* (note: but it could be evaluated from current-lowLimit)
*/
ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)));
DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
@ -2193,8 +2215,8 @@ static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dict
return 0;
}
/* ZSTD_loadEntropy() :
* dict : must point at beginning of a valid zstd dictionary
/*! ZSTD_loadEntropy() :
* dict : must point at beginning of a valid zstd dictionary.
* @return : size of entropy tables read */
static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy,
const void* const dict, size_t const dictSize)
@ -2206,13 +2228,11 @@ static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy,
assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */
dictPtr += 8; /* skip header = magic + dictID */
ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, LLTable) == 0);
ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == sizeof(entropy->LLTable));
ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == sizeof(entropy->LLTable) + sizeof(entropy->OFTable));
ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, hufTable) == sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable));
ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, hufTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE);
{ void* const workspace = entropy; /* use fse tables as temporary workspace; implies fse table precede huffTable at beginning of entropy */
size_t const workspaceSize = offsetof(ZSTD_entropyDTables_t, hufTable);
ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable));
ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable));
ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE);
{ void* const workspace = &entropy->LLTable; /* use fse tables as temporary workspace; implies fse tables are grouped together */
size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable);
size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
dictPtr, dictEnd - dictPtr,
workspace, workspaceSize);
@ -2292,7 +2312,6 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict
return ZSTD_refDictContent(dctx, dict, dictSize);
}
/* Note : this function cannot fail */
size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
{
assert(dctx != NULL);
@ -2346,28 +2365,44 @@ static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict)
return ddict->dictSize;
}
size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dstDCtx, const ZSTD_DDict* ddict)
size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
{
CHECK_F( ZSTD_decompressBegin(dstDCtx) );
if (ddict) { /* support begin on NULL */
dstDCtx->dictID = ddict->dictID;
dstDCtx->prefixStart = ddict->dictContent;
dstDCtx->virtualStart = ddict->dictContent;
dstDCtx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
dstDCtx->previousDstEnd = dstDCtx->dictEnd;
DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict");
assert(dctx != NULL);
if (ddict) {
dctx->ddictIsCold = (dctx->dictID != ddict->dictID);
DEBUGLOG(4, "DDict is %s",
dctx->ddictIsCold ? "~cold~" : "hot!");
}
CHECK_F( ZSTD_decompressBegin(dctx) );
if (ddict) { /* NULL ddict is equivalent to no dictionary */
dctx->dictID = ddict->dictID;
dctx->prefixStart = ddict->dictContent;
dctx->virtualStart = ddict->dictContent;
dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
dctx->previousDstEnd = dctx->dictEnd;
if (ddict->entropyPresent) {
dstDCtx->litEntropy = 1;
dstDCtx->fseEntropy = 1;
dstDCtx->LLTptr = ddict->entropy.LLTable;
dstDCtx->MLTptr = ddict->entropy.MLTable;
dstDCtx->OFTptr = ddict->entropy.OFTable;
dstDCtx->HUFptr = ddict->entropy.hufTable;
dstDCtx->entropy.rep[0] = ddict->entropy.rep[0];
dstDCtx->entropy.rep[1] = ddict->entropy.rep[1];
dstDCtx->entropy.rep[2] = ddict->entropy.rep[2];
dctx->litEntropy = 1;
dctx->fseEntropy = 1;
dctx->LLTptr = ddict->entropy.LLTable;
dctx->MLTptr = ddict->entropy.MLTable;
dctx->OFTptr = ddict->entropy.OFTable;
dctx->HUFptr = ddict->entropy.hufTable;
dctx->entropy.rep[0] = ddict->entropy.rep[0];
dctx->entropy.rep[1] = ddict->entropy.rep[1];
dctx->entropy.rep[2] = ddict->entropy.rep[2];
/* prefetch dictionary content */
if (dctx->ddictIsCold) {
size_t const dictSize = ddict->dictSize;
size_t const pSize = MIN(dictSize, 32 KB); /* proposed heuristic : 8 x frameContentSize => need to know frameContentSize */
const void* const pStart = (const char*)ddict->dictContent + dictSize - pSize;
PREFETCH_AREA(pStart, pSize);
}
} else {
dstDCtx->litEntropy = 0;
dstDCtx->fseEntropy = 0;
dctx->litEntropy = 0;
dctx->fseEntropy = 0;
}
}
return 0;
@ -2604,12 +2639,15 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds)
}
/* *** Initialization *** */
/* *** Initialization *** */
size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; }
size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
const void* dict, size_t dictSize,
ZSTD_dictLoadMethod_e dictLoadMethod,
ZSTD_dictContentType_e dictContentType)
{
if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
ZSTD_freeDDict(dctx->ddictLocal);
@ -2663,13 +2701,6 @@ size_t ZSTD_initDStream(ZSTD_DStream* zds)
return ZSTD_initDStream_usingDict(zds, NULL, 0);
}
size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
{
if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
dctx->ddict = ddict;
return 0;
}
/* ZSTD_initDStream_usingDDict() :
* ddict will just be referenced, and must outlive decompression session
* this function cannot fail */
@ -2708,6 +2739,13 @@ size_t ZSTD_setDStreamParameter(ZSTD_DStream* dctx,
return 0;
}
size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
{
if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
dctx->ddict = ddict;
return 0;
}
size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
{
if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);