1
0
mirror of https://github.com/facebook/zstd.git synced 2025-03-06 16:56:49 +02:00

Reduce RowHash's tag space size by x2 (#3543)

Allocate half the memory for tag space, which means that we get one less slot for an actual tag (needs to be used for next position index).
The results is a slight loss in compression ratio (up to 0.2%) and some regressions/improvements to speed depending on level and sample. In turn, we get to save 16% of the hash table's space (5 bytes per entry instead of 6 bytes per entry).
This commit is contained in:
Yonatan Komornik 2023-03-10 14:15:04 -08:00 committed by GitHub
parent 134d332b10
commit 33e39094e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 320 additions and 321 deletions

View File

@ -1616,7 +1616,7 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
+ ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
+ ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)
? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16))
? ZSTD_cwksp_aligned_alloc_size(hSize)
: 0;
size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
? optPotentialSpace
@ -1968,8 +1968,8 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {
{ /* Row match finder needs an additional table of hashes ("tags") */
size_t const tagTableSize = hSize*sizeof(U16);
ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
size_t const tagTableSize = hSize;
ms->tagTable = (BYTE*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize);
}
{ /* Switch to 32-entry rows if searchLog is 5 (or more) */
@ -2362,7 +2362,7 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
}
/* copy tag table */
if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) {
size_t const tagTableSize = hSize*sizeof(U16);
size_t const tagTableSize = hSize;
ZSTD_memcpy(cctx->blockState.matchState.tagTable,
cdict->matchState.tagTable,
tagTableSize);
@ -4713,7 +4713,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
} else {
assert(params->useRowMatchFinder != ZSTD_ps_auto);
if (params->useRowMatchFinder == ZSTD_ps_enable) {
size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16);
size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog);
ZSTD_memset(ms->tagTable, 0, tagTableSize);
ZSTD_row_update(ms, iend-HASH_READ_SIZE);
DEBUGLOG(4, "Using row-based hash table for lazy dict");

View File

@ -226,7 +226,7 @@ struct ZSTD_matchState_t {
U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
BYTE* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
U32* hashTable;

View File

@ -758,7 +758,6 @@ size_t ZSTD_HcFindBestMatch(
* (SIMD) Row-based matchfinder
***********************************/
/* Constants for row-based hash */
#define ZSTD_ROW_HASH_TAG_OFFSET 16 /* byte offset of hashes in the match state's tagTable from the beginning of a row */
#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
#define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */
@ -801,12 +800,13 @@ U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
/* ZSTD_row_nextIndex():
* Returns the next index to insert at within a tagTable row, and updates the "head"
* value to reflect the update. Essentially cycles backwards from [0, {entries per row})
* value to reflect the update. Essentially cycles backwards from [1, {entries per row})
*/
FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) {
U32 const next = (*tagRow - 1) & rowMask;
*tagRow = (BYTE)next;
return next;
U32 next = (*tagRow-1) & rowMask;
next += (next == 0) ? rowMask : 0; /* skip first position */
*tagRow = (BYTE)next;
return next;
}
/* ZSTD_isAligned():
@ -820,7 +820,7 @@ MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) {
/* ZSTD_row_prefetch():
* Performs prefetching for the hashTable and tagTable at a given row.
*/
FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* tagTable, U32 const relRow, U32 const rowLog) {
FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* tagTable, U32 const relRow, U32 const rowLog) {
PREFETCH_L1(hashTable + relRow);
if (rowLog >= 5) {
PREFETCH_L1(hashTable + relRow + 16);
@ -844,7 +844,7 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
U32 idx, const BYTE* const iLimit)
{
U32 const* const hashTable = ms->hashTable;
U16 const* const tagTable = ms->tagTable;
BYTE const* const tagTable = ms->tagTable;
U32 const hashLog = ms->rowHashLog;
U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1);
U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch);
@ -866,7 +866,7 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
* base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
*/
FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
U16 const* tagTable, BYTE const* base,
BYTE const* tagTable, BYTE const* base,
U32 idx, U32 const hashLog,
U32 const rowLog, U32 const mls)
{
@ -888,7 +888,7 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
U32 const rowMask, U32 const useCache)
{
U32* const hashTable = ms->hashTable;
U16* const tagTable = ms->tagTable;
BYTE* const tagTable = ms->tagTable;
U32 const hashLog = ms->rowHashLog;
const BYTE* const base = ms->window.base;
@ -898,12 +898,11 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
: (U32)ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
U32* const row = hashTable + relRow;
BYTE* tagRow = (BYTE*)(tagTable + relRow); /* Though tagTable is laid out as a table of U16, each tag is only 1 byte.
Explicit cast allows us to get exact desired position within each row */
BYTE* tagRow = tagTable + relRow;
U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
assert(hash == ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls));
((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK;
tagRow[pos] = hash & ZSTD_ROW_HASH_TAG_MASK;
row[pos] = updateStartIdx;
}
}
@ -1059,7 +1058,7 @@ ZSTD_row_getNEONMask(const U32 rowEntries, const BYTE* const src, const BYTE tag
FORCE_INLINE_TEMPLATE ZSTD_VecMask
ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGrouped, const U32 rowEntries)
{
const BYTE* const src = tagRow + ZSTD_ROW_HASH_TAG_OFFSET;
const BYTE* const src = tagRow;
assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
assert(ZSTD_row_matchMaskGroupWidth(rowEntries) * rowEntries <= sizeof(ZSTD_VecMask) * 8);
@ -1144,7 +1143,7 @@ size_t ZSTD_RowFindBestMatch(
const U32 rowLog)
{
U32* const hashTable = ms->hashTable;
U16* const tagTable = ms->tagTable;
BYTE* const tagTable = ms->tagTable;
U32* const hashCache = ms->hashCache;
const U32 hashLog = ms->rowHashLog;
const ZSTD_compressionParameters* const cParams = &ms->cParams;
@ -1188,7 +1187,7 @@ size_t ZSTD_RowFindBestMatch(
if (dictMode == ZSTD_dictMatchState) {
/* Prefetch DMS rows */
U32* const dmsHashTable = dms->hashTable;
U16* const dmsTagTable = dms->tagTable;
BYTE* const dmsTagTable = dms->tagTable;
U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK;
@ -1230,7 +1229,7 @@ size_t ZSTD_RowFindBestMatch(
in ZSTD_row_update_internal() at the next search. */
{
U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag;
tagRow[pos] = (BYTE)tag;
row[pos] = ms->nextToUpdate++;
}

View File

@ -2422,7 +2422,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
3663, 3662, 3661, 3660, 3660,
3660, 3660, 3660 };
size_t const target_wdict_cSize[22+1] = { 2830, 2896, 2893, 2820, 2940,
2950, 2950, 2925, 2900, 2891,
2950, 2950, 2925, 2900, 2892,
2910, 2910, 2910, 2780, 2775,
2765, 2760, 2755, 2754, 2753,
2753, 2753, 2753 };

File diff suppressed because it is too large Load Diff