diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index 42b38c0d0..070551cad 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -16,7 +16,7 @@ #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */ #include "zstd_ldm_geartab.h" -#define LDM_BUCKET_SIZE_LOG 3 +#define LDM_BUCKET_SIZE_LOG 4 #define LDM_MIN_MATCH_LENGTH 64 #define LDM_HASH_RLOG 7 @@ -133,21 +133,35 @@ done: } void ZSTD_ldm_adjustParameters(ldmParams_t* params, - ZSTD_compressionParameters const* cParams) + const ZSTD_compressionParameters* cParams) { params->windowLog = cParams->windowLog; ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); - if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; - if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; - if (params->hashLog == 0) { - params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG); - assert(params->hashLog <= ZSTD_HASHLOG_MAX); - } if (params->hashRateLog == 0) { - params->hashRateLog = params->windowLog < params->hashLog - ? 0 - : params->windowLog - params->hashLog; + if (params->hashLog > 0) { + /* if params->hashLog is set, derive hashRateLog from it */ + assert(params->hashLog <= ZSTD_HASHLOG_MAX); + if (params->windowLog > params->hashLog) { + params->hashRateLog = params->windowLog - params->hashLog; + } + } else { + assert(1 <= (int)cParams->strategy && (int)cParams->strategy <= 9); + /* mapping from [fast, rate7] to [btultra2, rate4] */ + params->hashRateLog = 7 - (cParams->strategy/3); + } + } + if (params->hashLog == 0) { + params->hashLog = BOUNDED(ZSTD_HASHLOG_MIN, params->windowLog - params->hashRateLog, ZSTD_HASHLOG_MAX); + } + if (params->minMatchLength == 0) { + params->minMatchLength = LDM_MIN_MATCH_LENGTH; + if (cParams->strategy >= ZSTD_btultra) + params->minMatchLength /= 2; + } + if (params->bucketSizeLog==0) { + assert(1 <= (int)cParams->strategy && (int)cParams->strategy <= 9); + params->bucketSizeLog = BOUNDED(LDM_BUCKET_SIZE_LOG, (U32)cParams->strategy, ZSTD_LDM_BUCKETSIZELOG_MAX); } params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); } diff --git a/programs/fileio.c b/programs/fileio.c index 1b8aa8a99..0ecca40d2 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1100,11 +1100,12 @@ static void FIO_adjustParamsForPatchFromMode(FIO_prefs_t* const prefs, FIO_setLdmFlag(prefs, 1); } if (cParams.strategy >= ZSTD_btopt) { - DISPLAYLEVEL(3, "[Optimal parser notes] Consider the following to improve patch size at the cost of speed:\n"); - DISPLAYLEVEL(3, "- Use --single-thread mode in the zstd cli\n"); - DISPLAYLEVEL(3, "- Set a larger targetLength (e.g. --zstd=targetLength=4096)\n"); - DISPLAYLEVEL(3, "- Set a larger chainLog (e.g. --zstd=chainLog=%u)\n", ZSTD_CHAINLOG_MAX); - DISPLAYLEVEL(3, "Also consider playing around with searchLog and hashLog\n"); + DISPLAYLEVEL(4, "[Optimal parser notes] Consider the following to improve patch size at the cost of speed:\n"); + DISPLAYLEVEL(4, "- Set a larger targetLength (e.g. --zstd=targetLength=4096)\n"); + DISPLAYLEVEL(4, "- Set a larger chainLog (e.g. --zstd=chainLog=%u)\n", ZSTD_CHAINLOG_MAX); + DISPLAYLEVEL(4, "- Set a larger LDM hashLog (e.g. --zstd=ldmHashLog=%u)\n", ZSTD_LDM_HASHLOG_MAX); + DISPLAYLEVEL(4, "- Set a smaller LDM rateLog (e.g. --zstd=ldmHashRateLog=%u)\n", ZSTD_LDM_HASHRATELOG_MIN); + DISPLAYLEVEL(4, "Also consider playing around with searchLog and hashLog\n"); } } diff --git a/programs/zstd.1.md b/programs/zstd.1.md index 29b7a5bb7..e5c1b7fd2 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -455,6 +455,17 @@ The list of available _options_: Value 0 is special and means "default": _ovlog_ is automatically determined by `zstd`. In which case, _ovlog_ will range from 6 to 9, depending on selected _strat_. +- `ldmHashRateLog`=_lhrlog_, `lhrlog`=_lhrlog_: + Specify the frequency of inserting entries into the long distance matching + hash table. + + This option is ignored unless long distance matching is enabled. + + Larger values will improve compression speed. Deviating far from the + default value will likely result in a decrease in compression ratio. + + The default value varies between 4 and 7, depending on `strategy`. + - `ldmHashLog`=_lhlog_, `lhlog`=_lhlog_: Specify the maximum size for a hash table used for long distance matching. @@ -463,7 +474,7 @@ The list of available _options_: Bigger hash tables usually improve compression ratio at the expense of more memory during compression and a decrease in compression speed. - The minimum _lhlog_ is 6 and the maximum is 30 (default: 20). + The minimum _lhlog_ is 6 and the maximum is 30 (default: `windowLog - ldmHashRateLog`). - `ldmMinMatch`=_lmml_, `lmml`=_lmml_: Specify the minimum searched length of a match for long distance matching. @@ -472,7 +483,7 @@ The list of available _options_: Larger/very small values usually decrease compression ratio. - The minimum _lmml_ is 4 and the maximum is 4096 (default: 64). + The minimum _lmml_ is 4 and the maximum is 4096 (default: 32 to 64, depending on `strategy`). - `ldmBucketSizeLog`=_lblog_, `lblog`=_lblog_: Specify the size of each bucket for the hash table used for long distance @@ -483,18 +494,8 @@ The list of available _options_: Larger bucket sizes improve collision resolution but decrease compression speed. - The minimum _lblog_ is 1 and the maximum is 8 (default: 3). + The minimum _lblog_ is 1 and the maximum is 8 (default: 4 to 8, depending on `strategy`). -- `ldmHashRateLog`=_lhrlog_, `lhrlog`=_lhrlog_: - Specify the frequency of inserting entries into the long distance matching - hash table. - - This option is ignored unless long distance matching is enabled. - - Larger values will improve compression speed. Deviating far from the - default value will likely result in a decrease in compression ratio. - - The default value is `wlog - lhlog`. ### Example The following parameters sets advanced compression options to something diff --git a/tests/regression/results.csv b/tests/regression/results.csv index 505f4755b..c0d4f4ae7 100644 --- a/tests/regression/results.csv +++ b/tests/regression/results.csv @@ -111,9 +111,9 @@ silesia, level 9, zstdcli, silesia, level 13, zstdcli, 4488438 silesia, level 16, zstdcli, 4358150 silesia, level 19, zstdcli, 4265929 -silesia, long distance mode, zstdcli, 4824875 +silesia, long distance mode, zstdcli, 4824341 silesia, multithreaded, zstdcli, 4833113 -silesia, multithreaded long distance mode, zstdcli, 4824875 +silesia, multithreaded long distance mode, zstdcli, 4824341 silesia, small window log, zstdcli, 7094528 silesia, small hash log, zstdcli, 6527214 silesia, small chain log, zstdcli, 4911647 @@ -137,9 +137,9 @@ silesia.tar, level 13, zstdcli, silesia.tar, level 16, zstdcli, 4357018 silesia.tar, level 19, zstdcli, 4259593 silesia.tar, no source size, zstdcli, 4836000 -silesia.tar, long distance mode, zstdcli, 4828171 +silesia.tar, long distance mode, zstdcli, 4827830 silesia.tar, multithreaded, zstdcli, 4836004 -silesia.tar, multithreaded long distance mode, zstdcli, 4828171 +silesia.tar, multithreaded long distance mode, zstdcli, 4827830 silesia.tar, small window log, zstdcli, 7100110 silesia.tar, small hash log, zstdcli, 6530127 silesia.tar, small chain log, zstdcli, 4915865 @@ -217,9 +217,9 @@ github.tar, level 19, zstdcli, github.tar, level 19 with dict, zstdcli, 32705 github.tar, no source size, zstdcli, 38885 github.tar, no source size with dict, zstdcli, 38115 -github.tar, long distance mode, zstdcli, 40227 +github.tar, long distance mode, zstdcli, 40143 github.tar, multithreaded, zstdcli, 38888 -github.tar, multithreaded long distance mode, zstdcli, 40227 +github.tar, multithreaded long distance mode, zstdcli, 40143 github.tar, small window log, zstdcli, 198539 github.tar, small hash log, zstdcli, 129874 github.tar, small chain log, zstdcli, 41673 @@ -251,9 +251,9 @@ silesia, level 13, advanced silesia, level 16, advanced one pass, 4356799 silesia, level 19, advanced one pass, 4265851 silesia, no source size, advanced one pass, 4832054 -silesia, long distance mode, advanced one pass, 4823833 +silesia, long distance mode, advanced one pass, 4823264 silesia, multithreaded, advanced one pass, 4833065 -silesia, multithreaded long distance mode, advanced one pass, 4824827 +silesia, multithreaded long distance mode, advanced one pass, 4824293 silesia, small window log, advanced one pass, 7094480 silesia, small hash log, advanced one pass, 6525510 silesia, small chain log, advanced one pass, 4912248 @@ -285,9 +285,9 @@ silesia.tar, level 13, advanced silesia.tar, level 16, advanced one pass, 4355572 silesia.tar, level 19, advanced one pass, 4257629 silesia.tar, no source size, advanced one pass, 4829268 -silesia.tar, long distance mode, advanced one pass, 4816169 +silesia.tar, long distance mode, advanced one pass, 4815868 silesia.tar, multithreaded, advanced one pass, 4836000 -silesia.tar, multithreaded long distance mode, advanced one pass, 4828167 +silesia.tar, multithreaded long distance mode, advanced one pass, 4827826 silesia.tar, small window log, advanced one pass, 7100064 silesia.tar, small hash log, advanced one pass, 6530222 silesia.tar, small chain log, advanced one pass, 4915689 @@ -535,9 +535,9 @@ github.tar, level 19 with dict copy, advanced github.tar, level 19 with dict load, advanced one pass, 32428 github.tar, no source size, advanced one pass, 38884 github.tar, no source size with dict, advanced one pass, 37995 -github.tar, long distance mode, advanced one pass, 40242 +github.tar, long distance mode, advanced one pass, 40156 github.tar, multithreaded, advanced one pass, 38884 -github.tar, multithreaded long distance mode, advanced one pass, 40223 +github.tar, multithreaded long distance mode, advanced one pass, 40139 github.tar, small window log, advanced one pass, 198535 github.tar, small hash log, advanced one pass, 129870 github.tar, small chain log, advanced one pass, 41669 @@ -569,9 +569,9 @@ silesia, level 13, advanced silesia, level 16, advanced one pass small out, 4356799 silesia, level 19, advanced one pass small out, 4265851 silesia, no source size, advanced one pass small out, 4832054 -silesia, long distance mode, advanced one pass small out, 4823833 +silesia, long distance mode, advanced one pass small out, 4823264 silesia, multithreaded, advanced one pass small out, 4833065 -silesia, multithreaded long distance mode, advanced one pass small out, 4824827 +silesia, multithreaded long distance mode, advanced one pass small out, 4824293 silesia, small window log, advanced one pass small out, 7094480 silesia, small hash log, advanced one pass small out, 6525510 silesia, small chain log, advanced one pass small out, 4912248 @@ -603,9 +603,9 @@ silesia.tar, level 13, advanced silesia.tar, level 16, advanced one pass small out, 4355572 silesia.tar, level 19, advanced one pass small out, 4257629 silesia.tar, no source size, advanced one pass small out, 4829268 -silesia.tar, long distance mode, advanced one pass small out, 4816169 +silesia.tar, long distance mode, advanced one pass small out, 4815868 silesia.tar, multithreaded, advanced one pass small out, 4836000 -silesia.tar, multithreaded long distance mode, advanced one pass small out, 4828167 +silesia.tar, multithreaded long distance mode, advanced one pass small out, 4827826 silesia.tar, small window log, advanced one pass small out, 7100064 silesia.tar, small hash log, advanced one pass small out, 6530222 silesia.tar, small chain log, advanced one pass small out, 4915689 @@ -853,9 +853,9 @@ github.tar, level 19 with dict copy, advanced github.tar, level 19 with dict load, advanced one pass small out, 32428 github.tar, no source size, advanced one pass small out, 38884 github.tar, no source size with dict, advanced one pass small out, 37995 -github.tar, long distance mode, advanced one pass small out, 40242 +github.tar, long distance mode, advanced one pass small out, 40156 github.tar, multithreaded, advanced one pass small out, 38884 -github.tar, multithreaded long distance mode, advanced one pass small out, 40223 +github.tar, multithreaded long distance mode, advanced one pass small out, 40139 github.tar, small window log, advanced one pass small out, 198535 github.tar, small hash log, advanced one pass small out, 129870 github.tar, small chain log, advanced one pass small out, 41669 @@ -887,9 +887,9 @@ silesia, level 13, advanced silesia, level 16, advanced streaming, 4358094 silesia, level 19, advanced streaming, 4265908 silesia, no source size, advanced streaming, 4835768 -silesia, long distance mode, advanced streaming, 4827592 +silesia, long distance mode, advanced streaming, 4827032 silesia, multithreaded, advanced streaming, 4833065 -silesia, multithreaded long distance mode, advanced streaming, 4824827 +silesia, multithreaded long distance mode, advanced streaming, 4824293 silesia, small window log, advanced streaming, 7110591 silesia, small hash log, advanced streaming, 6525259 silesia, small chain log, advanced streaming, 4911577 @@ -921,9 +921,9 @@ silesia.tar, level 13, advanced silesia.tar, level 16, advanced streaming, 4358029 silesia.tar, level 19, advanced streaming, 4258228 silesia.tar, no source size, advanced streaming, 4846779 -silesia.tar, long distance mode, advanced streaming, 4826177 +silesia.tar, long distance mode, advanced streaming, 4825842 silesia.tar, multithreaded, advanced streaming, 4836000 -silesia.tar, multithreaded long distance mode, advanced streaming, 4828167 +silesia.tar, multithreaded long distance mode, advanced streaming, 4827826 silesia.tar, small window log, advanced streaming, 7117024 silesia.tar, small hash log, advanced streaming, 6529503 silesia.tar, small chain log, advanced streaming, 4915956 @@ -1171,9 +1171,9 @@ github.tar, level 19 with dict copy, advanced github.tar, level 19 with dict load, advanced streaming, 32428 github.tar, no source size, advanced streaming, 38881 github.tar, no source size with dict, advanced streaming, 38111 -github.tar, long distance mode, advanced streaming, 40242 +github.tar, long distance mode, advanced streaming, 40156 github.tar, multithreaded, advanced streaming, 38884 -github.tar, multithreaded long distance mode, advanced streaming, 40223 +github.tar, multithreaded long distance mode, advanced streaming, 40139 github.tar, small window log, advanced streaming, 199553 github.tar, small hash log, advanced streaming, 129870 github.tar, small chain log, advanced streaming, 41669