mirror of
https://github.com/facebook/zstd.git
synced 2025-03-06 16:56:49 +02:00
Merge pull request #4288 from facebook/stronger_patchfrom
Improve compression ratio of the --patch-from mode
This commit is contained in:
commit
d84d70bd04
@ -16,7 +16,7 @@
|
||||
#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
|
||||
#include "zstd_ldm_geartab.h"
|
||||
|
||||
#define LDM_BUCKET_SIZE_LOG 3
|
||||
#define LDM_BUCKET_SIZE_LOG 4
|
||||
#define LDM_MIN_MATCH_LENGTH 64
|
||||
#define LDM_HASH_RLOG 7
|
||||
|
||||
@ -133,21 +133,35 @@ done:
|
||||
}
|
||||
|
||||
void ZSTD_ldm_adjustParameters(ldmParams_t* params,
|
||||
ZSTD_compressionParameters const* cParams)
|
||||
const ZSTD_compressionParameters* cParams)
|
||||
{
|
||||
params->windowLog = cParams->windowLog;
|
||||
ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
|
||||
DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
|
||||
if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
|
||||
if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
|
||||
if (params->hashLog == 0) {
|
||||
params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
|
||||
assert(params->hashLog <= ZSTD_HASHLOG_MAX);
|
||||
}
|
||||
if (params->hashRateLog == 0) {
|
||||
params->hashRateLog = params->windowLog < params->hashLog
|
||||
? 0
|
||||
: params->windowLog - params->hashLog;
|
||||
if (params->hashLog > 0) {
|
||||
/* if params->hashLog is set, derive hashRateLog from it */
|
||||
assert(params->hashLog <= ZSTD_HASHLOG_MAX);
|
||||
if (params->windowLog > params->hashLog) {
|
||||
params->hashRateLog = params->windowLog - params->hashLog;
|
||||
}
|
||||
} else {
|
||||
assert(1 <= (int)cParams->strategy && (int)cParams->strategy <= 9);
|
||||
/* mapping from [fast, rate7] to [btultra2, rate4] */
|
||||
params->hashRateLog = 7 - (cParams->strategy/3);
|
||||
}
|
||||
}
|
||||
if (params->hashLog == 0) {
|
||||
params->hashLog = BOUNDED(ZSTD_HASHLOG_MIN, params->windowLog - params->hashRateLog, ZSTD_HASHLOG_MAX);
|
||||
}
|
||||
if (params->minMatchLength == 0) {
|
||||
params->minMatchLength = LDM_MIN_MATCH_LENGTH;
|
||||
if (cParams->strategy >= ZSTD_btultra)
|
||||
params->minMatchLength /= 2;
|
||||
}
|
||||
if (params->bucketSizeLog==0) {
|
||||
assert(1 <= (int)cParams->strategy && (int)cParams->strategy <= 9);
|
||||
params->bucketSizeLog = BOUNDED(LDM_BUCKET_SIZE_LOG, (U32)cParams->strategy, ZSTD_LDM_BUCKETSIZELOG_MAX);
|
||||
}
|
||||
params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
|
||||
}
|
||||
|
@ -1100,11 +1100,12 @@ static void FIO_adjustParamsForPatchFromMode(FIO_prefs_t* const prefs,
|
||||
FIO_setLdmFlag(prefs, 1);
|
||||
}
|
||||
if (cParams.strategy >= ZSTD_btopt) {
|
||||
DISPLAYLEVEL(3, "[Optimal parser notes] Consider the following to improve patch size at the cost of speed:\n");
|
||||
DISPLAYLEVEL(3, "- Use --single-thread mode in the zstd cli\n");
|
||||
DISPLAYLEVEL(3, "- Set a larger targetLength (e.g. --zstd=targetLength=4096)\n");
|
||||
DISPLAYLEVEL(3, "- Set a larger chainLog (e.g. --zstd=chainLog=%u)\n", ZSTD_CHAINLOG_MAX);
|
||||
DISPLAYLEVEL(3, "Also consider playing around with searchLog and hashLog\n");
|
||||
DISPLAYLEVEL(4, "[Optimal parser notes] Consider the following to improve patch size at the cost of speed:\n");
|
||||
DISPLAYLEVEL(4, "- Set a larger targetLength (e.g. --zstd=targetLength=4096)\n");
|
||||
DISPLAYLEVEL(4, "- Set a larger chainLog (e.g. --zstd=chainLog=%u)\n", ZSTD_CHAINLOG_MAX);
|
||||
DISPLAYLEVEL(4, "- Set a larger LDM hashLog (e.g. --zstd=ldmHashLog=%u)\n", ZSTD_LDM_HASHLOG_MAX);
|
||||
DISPLAYLEVEL(4, "- Set a smaller LDM rateLog (e.g. --zstd=ldmHashRateLog=%u)\n", ZSTD_LDM_HASHRATELOG_MIN);
|
||||
DISPLAYLEVEL(4, "Also consider playing around with searchLog and hashLog\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -455,6 +455,17 @@ The list of available _options_:
|
||||
Value 0 is special and means "default": _ovlog_ is automatically determined by `zstd`.
|
||||
In which case, _ovlog_ will range from 6 to 9, depending on selected _strat_.
|
||||
|
||||
- `ldmHashRateLog`=_lhrlog_, `lhrlog`=_lhrlog_:
|
||||
Specify the frequency of inserting entries into the long distance matching
|
||||
hash table.
|
||||
|
||||
This option is ignored unless long distance matching is enabled.
|
||||
|
||||
Larger values will improve compression speed. Deviating far from the
|
||||
default value will likely result in a decrease in compression ratio.
|
||||
|
||||
The default value varies between 4 and 7, depending on `strategy`.
|
||||
|
||||
- `ldmHashLog`=_lhlog_, `lhlog`=_lhlog_:
|
||||
Specify the maximum size for a hash table used for long distance matching.
|
||||
|
||||
@ -463,7 +474,7 @@ The list of available _options_:
|
||||
Bigger hash tables usually improve compression ratio at the expense of more
|
||||
memory during compression and a decrease in compression speed.
|
||||
|
||||
The minimum _lhlog_ is 6 and the maximum is 30 (default: 20).
|
||||
The minimum _lhlog_ is 6 and the maximum is 30 (default: `windowLog - ldmHashRateLog`).
|
||||
|
||||
- `ldmMinMatch`=_lmml_, `lmml`=_lmml_:
|
||||
Specify the minimum searched length of a match for long distance matching.
|
||||
@ -472,7 +483,7 @@ The list of available _options_:
|
||||
|
||||
Larger/very small values usually decrease compression ratio.
|
||||
|
||||
The minimum _lmml_ is 4 and the maximum is 4096 (default: 64).
|
||||
The minimum _lmml_ is 4 and the maximum is 4096 (default: 32 to 64, depending on `strategy`).
|
||||
|
||||
- `ldmBucketSizeLog`=_lblog_, `lblog`=_lblog_:
|
||||
Specify the size of each bucket for the hash table used for long distance
|
||||
@ -483,18 +494,8 @@ The list of available _options_:
|
||||
Larger bucket sizes improve collision resolution but decrease compression
|
||||
speed.
|
||||
|
||||
The minimum _lblog_ is 1 and the maximum is 8 (default: 3).
|
||||
The minimum _lblog_ is 1 and the maximum is 8 (default: 4 to 8, depending on `strategy`).
|
||||
|
||||
- `ldmHashRateLog`=_lhrlog_, `lhrlog`=_lhrlog_:
|
||||
Specify the frequency of inserting entries into the long distance matching
|
||||
hash table.
|
||||
|
||||
This option is ignored unless long distance matching is enabled.
|
||||
|
||||
Larger values will improve compression speed. Deviating far from the
|
||||
default value will likely result in a decrease in compression ratio.
|
||||
|
||||
The default value is `wlog - lhlog`.
|
||||
|
||||
### Example
|
||||
The following parameters sets advanced compression options to something
|
||||
|
@ -111,9 +111,9 @@ silesia, level 9, zstdcli,
|
||||
silesia, level 13, zstdcli, 4488438
|
||||
silesia, level 16, zstdcli, 4358150
|
||||
silesia, level 19, zstdcli, 4265929
|
||||
silesia, long distance mode, zstdcli, 4824875
|
||||
silesia, long distance mode, zstdcli, 4824341
|
||||
silesia, multithreaded, zstdcli, 4833113
|
||||
silesia, multithreaded long distance mode, zstdcli, 4824875
|
||||
silesia, multithreaded long distance mode, zstdcli, 4824341
|
||||
silesia, small window log, zstdcli, 7094528
|
||||
silesia, small hash log, zstdcli, 6527214
|
||||
silesia, small chain log, zstdcli, 4911647
|
||||
@ -137,9 +137,9 @@ silesia.tar, level 13, zstdcli,
|
||||
silesia.tar, level 16, zstdcli, 4357018
|
||||
silesia.tar, level 19, zstdcli, 4259593
|
||||
silesia.tar, no source size, zstdcli, 4836000
|
||||
silesia.tar, long distance mode, zstdcli, 4828171
|
||||
silesia.tar, long distance mode, zstdcli, 4827830
|
||||
silesia.tar, multithreaded, zstdcli, 4836004
|
||||
silesia.tar, multithreaded long distance mode, zstdcli, 4828171
|
||||
silesia.tar, multithreaded long distance mode, zstdcli, 4827830
|
||||
silesia.tar, small window log, zstdcli, 7100110
|
||||
silesia.tar, small hash log, zstdcli, 6530127
|
||||
silesia.tar, small chain log, zstdcli, 4915865
|
||||
@ -217,9 +217,9 @@ github.tar, level 19, zstdcli,
|
||||
github.tar, level 19 with dict, zstdcli, 32705
|
||||
github.tar, no source size, zstdcli, 38885
|
||||
github.tar, no source size with dict, zstdcli, 38115
|
||||
github.tar, long distance mode, zstdcli, 40227
|
||||
github.tar, long distance mode, zstdcli, 40143
|
||||
github.tar, multithreaded, zstdcli, 38888
|
||||
github.tar, multithreaded long distance mode, zstdcli, 40227
|
||||
github.tar, multithreaded long distance mode, zstdcli, 40143
|
||||
github.tar, small window log, zstdcli, 198539
|
||||
github.tar, small hash log, zstdcli, 129874
|
||||
github.tar, small chain log, zstdcli, 41673
|
||||
@ -251,9 +251,9 @@ silesia, level 13, advanced
|
||||
silesia, level 16, advanced one pass, 4356799
|
||||
silesia, level 19, advanced one pass, 4265851
|
||||
silesia, no source size, advanced one pass, 4832054
|
||||
silesia, long distance mode, advanced one pass, 4823833
|
||||
silesia, long distance mode, advanced one pass, 4823264
|
||||
silesia, multithreaded, advanced one pass, 4833065
|
||||
silesia, multithreaded long distance mode, advanced one pass, 4824827
|
||||
silesia, multithreaded long distance mode, advanced one pass, 4824293
|
||||
silesia, small window log, advanced one pass, 7094480
|
||||
silesia, small hash log, advanced one pass, 6525510
|
||||
silesia, small chain log, advanced one pass, 4912248
|
||||
@ -285,9 +285,9 @@ silesia.tar, level 13, advanced
|
||||
silesia.tar, level 16, advanced one pass, 4355572
|
||||
silesia.tar, level 19, advanced one pass, 4257629
|
||||
silesia.tar, no source size, advanced one pass, 4829268
|
||||
silesia.tar, long distance mode, advanced one pass, 4816169
|
||||
silesia.tar, long distance mode, advanced one pass, 4815868
|
||||
silesia.tar, multithreaded, advanced one pass, 4836000
|
||||
silesia.tar, multithreaded long distance mode, advanced one pass, 4828167
|
||||
silesia.tar, multithreaded long distance mode, advanced one pass, 4827826
|
||||
silesia.tar, small window log, advanced one pass, 7100064
|
||||
silesia.tar, small hash log, advanced one pass, 6530222
|
||||
silesia.tar, small chain log, advanced one pass, 4915689
|
||||
@ -535,9 +535,9 @@ github.tar, level 19 with dict copy, advanced
|
||||
github.tar, level 19 with dict load, advanced one pass, 32428
|
||||
github.tar, no source size, advanced one pass, 38884
|
||||
github.tar, no source size with dict, advanced one pass, 37995
|
||||
github.tar, long distance mode, advanced one pass, 40242
|
||||
github.tar, long distance mode, advanced one pass, 40156
|
||||
github.tar, multithreaded, advanced one pass, 38884
|
||||
github.tar, multithreaded long distance mode, advanced one pass, 40223
|
||||
github.tar, multithreaded long distance mode, advanced one pass, 40139
|
||||
github.tar, small window log, advanced one pass, 198535
|
||||
github.tar, small hash log, advanced one pass, 129870
|
||||
github.tar, small chain log, advanced one pass, 41669
|
||||
@ -569,9 +569,9 @@ silesia, level 13, advanced
|
||||
silesia, level 16, advanced one pass small out, 4356799
|
||||
silesia, level 19, advanced one pass small out, 4265851
|
||||
silesia, no source size, advanced one pass small out, 4832054
|
||||
silesia, long distance mode, advanced one pass small out, 4823833
|
||||
silesia, long distance mode, advanced one pass small out, 4823264
|
||||
silesia, multithreaded, advanced one pass small out, 4833065
|
||||
silesia, multithreaded long distance mode, advanced one pass small out, 4824827
|
||||
silesia, multithreaded long distance mode, advanced one pass small out, 4824293
|
||||
silesia, small window log, advanced one pass small out, 7094480
|
||||
silesia, small hash log, advanced one pass small out, 6525510
|
||||
silesia, small chain log, advanced one pass small out, 4912248
|
||||
@ -603,9 +603,9 @@ silesia.tar, level 13, advanced
|
||||
silesia.tar, level 16, advanced one pass small out, 4355572
|
||||
silesia.tar, level 19, advanced one pass small out, 4257629
|
||||
silesia.tar, no source size, advanced one pass small out, 4829268
|
||||
silesia.tar, long distance mode, advanced one pass small out, 4816169
|
||||
silesia.tar, long distance mode, advanced one pass small out, 4815868
|
||||
silesia.tar, multithreaded, advanced one pass small out, 4836000
|
||||
silesia.tar, multithreaded long distance mode, advanced one pass small out, 4828167
|
||||
silesia.tar, multithreaded long distance mode, advanced one pass small out, 4827826
|
||||
silesia.tar, small window log, advanced one pass small out, 7100064
|
||||
silesia.tar, small hash log, advanced one pass small out, 6530222
|
||||
silesia.tar, small chain log, advanced one pass small out, 4915689
|
||||
@ -853,9 +853,9 @@ github.tar, level 19 with dict copy, advanced
|
||||
github.tar, level 19 with dict load, advanced one pass small out, 32428
|
||||
github.tar, no source size, advanced one pass small out, 38884
|
||||
github.tar, no source size with dict, advanced one pass small out, 37995
|
||||
github.tar, long distance mode, advanced one pass small out, 40242
|
||||
github.tar, long distance mode, advanced one pass small out, 40156
|
||||
github.tar, multithreaded, advanced one pass small out, 38884
|
||||
github.tar, multithreaded long distance mode, advanced one pass small out, 40223
|
||||
github.tar, multithreaded long distance mode, advanced one pass small out, 40139
|
||||
github.tar, small window log, advanced one pass small out, 198535
|
||||
github.tar, small hash log, advanced one pass small out, 129870
|
||||
github.tar, small chain log, advanced one pass small out, 41669
|
||||
@ -887,9 +887,9 @@ silesia, level 13, advanced
|
||||
silesia, level 16, advanced streaming, 4358094
|
||||
silesia, level 19, advanced streaming, 4265908
|
||||
silesia, no source size, advanced streaming, 4835768
|
||||
silesia, long distance mode, advanced streaming, 4827592
|
||||
silesia, long distance mode, advanced streaming, 4827032
|
||||
silesia, multithreaded, advanced streaming, 4833065
|
||||
silesia, multithreaded long distance mode, advanced streaming, 4824827
|
||||
silesia, multithreaded long distance mode, advanced streaming, 4824293
|
||||
silesia, small window log, advanced streaming, 7110591
|
||||
silesia, small hash log, advanced streaming, 6525259
|
||||
silesia, small chain log, advanced streaming, 4911577
|
||||
@ -921,9 +921,9 @@ silesia.tar, level 13, advanced
|
||||
silesia.tar, level 16, advanced streaming, 4358029
|
||||
silesia.tar, level 19, advanced streaming, 4258228
|
||||
silesia.tar, no source size, advanced streaming, 4846779
|
||||
silesia.tar, long distance mode, advanced streaming, 4826177
|
||||
silesia.tar, long distance mode, advanced streaming, 4825842
|
||||
silesia.tar, multithreaded, advanced streaming, 4836000
|
||||
silesia.tar, multithreaded long distance mode, advanced streaming, 4828167
|
||||
silesia.tar, multithreaded long distance mode, advanced streaming, 4827826
|
||||
silesia.tar, small window log, advanced streaming, 7117024
|
||||
silesia.tar, small hash log, advanced streaming, 6529503
|
||||
silesia.tar, small chain log, advanced streaming, 4915956
|
||||
@ -1171,9 +1171,9 @@ github.tar, level 19 with dict copy, advanced
|
||||
github.tar, level 19 with dict load, advanced streaming, 32428
|
||||
github.tar, no source size, advanced streaming, 38881
|
||||
github.tar, no source size with dict, advanced streaming, 38111
|
||||
github.tar, long distance mode, advanced streaming, 40242
|
||||
github.tar, long distance mode, advanced streaming, 40156
|
||||
github.tar, multithreaded, advanced streaming, 38884
|
||||
github.tar, multithreaded long distance mode, advanced streaming, 40223
|
||||
github.tar, multithreaded long distance mode, advanced streaming, 40139
|
||||
github.tar, small window log, advanced streaming, 199553
|
||||
github.tar, small hash log, advanced streaming, 129870
|
||||
github.tar, small chain log, advanced streaming, 41669
|
||||
|
|
Loading…
x
Reference in New Issue
Block a user