1
0
mirror of https://github.com/facebook/zstd.git synced 2025-03-07 01:10:04 +02:00

minor improvement to level 3 dictionary compression ratio

This commit is contained in:
Yann Collet 2024-10-15 17:58:33 -07:00
parent e63896eb58
commit c2abfc5ba4

View File

@ -140,7 +140,6 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
U32 idxl1; /* the long match index for ip1 */ U32 idxl1; /* the long match index for ip1 */
const BYTE* matchl0; /* the long match for ip */ const BYTE* matchl0; /* the long match for ip */
const BYTE* matchl0_safe; /* matchl0 or safe address */
const BYTE* matchs0; /* the short match for ip */ const BYTE* matchs0; /* the short match for ip */
const BYTE* matchl1; /* the long match for ip1 */ const BYTE* matchl1; /* the long match for ip1 */
const BYTE* matchs0_safe; /* matchs0 or safe address */ const BYTE* matchs0_safe; /* matchs0 or safe address */
@ -201,15 +200,15 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
* However expression below complies into conditional move. Since * However expression below complies into conditional move. Since
* match is unlikely and we only *branch* on idxl0 > prefixLowestIndex * match is unlikely and we only *branch* on idxl0 > prefixLowestIndex
* if there is a match, all branches become predictable. */ * if there is a match, all branches become predictable. */
matchl0_safe = ZSTD_selectAddr(idxl0, prefixLowestIndex, matchl0, &dummy[0]); { const BYTE* const matchl0_safe = ZSTD_selectAddr(idxl0, prefixLowestIndex, matchl0, &dummy[0]);
/* check prefix long match */ /* check prefix long match */
if (MEM_read64(matchl0_safe) == MEM_read64(ip) && matchl0_safe == matchl0) { if (MEM_read64(matchl0_safe) == MEM_read64(ip) && matchl0_safe == matchl0) {
mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8; mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;
offset = (U32)(ip-matchl0); offset = (U32)(ip-matchl0);
while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */ while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
goto _match_found; goto _match_found;
} } }
idxl1 = hashLong[hl1]; idxl1 = hashLong[hl1];
matchl1 = base + idxl1; matchl1 = base + idxl1;
@ -412,14 +411,12 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
goto _match_stored; goto _match_stored;
} }
if (matchIndexL > prefixLowestIndex) { if ((matchIndexL >= prefixLowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
/* check prefix long match */ /* check prefix long match */
if (MEM_read64(matchLong) == MEM_read64(ip)) { mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; offset = (U32)(ip-matchLong);
offset = (U32)(ip-matchLong); while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ goto _match_found;
goto _match_found;
}
} else if (dictTagsMatchL) { } else if (dictTagsMatchL) {
/* check dictMatchState long match */ /* check dictMatchState long match */
U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS; U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
@ -434,7 +431,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
} } } }
if (matchIndexS > prefixLowestIndex) { if (matchIndexS > prefixLowestIndex) {
/* check prefix short match */ /* short match candidate */
if (MEM_read32(match) == MEM_read32(ip)) { if (MEM_read32(match) == MEM_read32(ip)) {
goto _search_next_long; goto _search_next_long;
} }
@ -464,14 +461,12 @@ _search_next_long:
hashLong[hl3] = curr + 1; hashLong[hl3] = curr + 1;
/* check prefix long +1 match */ /* check prefix long +1 match */
if (matchIndexL3 > prefixLowestIndex) { if ((matchIndexL3 >= prefixLowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1))) {
if (MEM_read64(matchL3) == MEM_read64(ip+1)) { mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; ip++;
ip++; offset = (U32)(ip-matchL3);
offset = (U32)(ip-matchL3); while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ goto _match_found;
goto _match_found;
}
} else if (dictTagsMatchL3) { } else if (dictTagsMatchL3) {
/* check dict long +1 match */ /* check dict long +1 match */
U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS; U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;