1
0
mirror of https://github.com/pgbackrest/pgbackrest.git synced 2024-12-12 10:04:14 +02:00

Limit files that can be bundled.

Limit which files can be added to bundles, which allows resume to work reasonably well. On resume, the bundles are removed and any remaining file is eligible to be to be resumed.

Also reduce the bundle-size default to 20MiB. This is pretty arbitrary, but a smaller default seems better.
This commit is contained in:
David Steele 2022-02-17 07:25:12 -06:00 committed by GitHub
parent 34d649579e
commit efc09db7b9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 120 additions and 65 deletions

View File

@ -128,7 +128,14 @@
<commit subject="Optimization for jsonFromStrInternal()."/>
<commit subject="Simplify manifest file defaults."/>
<commit subject="Simplify filename construction in command/verify module."/>
<commit subject="Bundle files in the repository during backup."/>
<commit subject="Bundle files in the repository during backup.">
<github-issue id="1149"/>
<github-pull-request id="1655"/>
</commit>
<commit subject="Limit files that can be bundled.">
<github-issue id="1149"/>
<github-pull-request id="1662"/>
</commit>
<release-item-contributor-list>
<release-item-contributor id="david.steele"/>

View File

@ -1120,7 +1120,7 @@ option:
internal: true
section: global
type: size
default: 100MiB
default: 20MiB
allow-range: [1MiB, 1PiB]
command:
backup: {}
@ -1131,6 +1131,11 @@ option:
list:
- true
bundle-limit:
inherit: bundle-size
default: 2MiB
allow-range: [8KiB, 1PiB]
checksum-page:
section: global
type: boolean
@ -1176,10 +1181,6 @@ option:
backup: {}
command-role:
main: {}
depend:
option: bundle
list:
- false
start-fast:
section: global

View File

@ -1133,6 +1133,17 @@
<example>10MiB</example>
</config-key>
<!-- ======================================================================================================= -->
<config-key id="bundle-limit" name="Repository Bundle Limit">
<summary>Limit for file bundles.</summary>
<text>
<p>Size limit for files that will be included in bundles. Files larger than this size will be stored separately.</p>
</text>
<example>10MiB</example>
</config-key>
<!-- CONFIG - BACKUP SECTION - CHECKSUM-PAGE KEY -->
<config-key id="checksum-page" name="Page Checksums">
<summary>Validate data page checksums.</summary>

View File

@ -674,7 +674,7 @@ backupResumeFind(const Manifest *manifest, const String *cipherPassBackup)
// Resumable backups do not have backup.manifest
if (!storageExistsP(storageRepo(), manifestFile))
{
const bool resume = cfgOptionTest(cfgOptResume) && cfgOptionBool(cfgOptResume);
const bool resume = cfgOptionBool(cfgOptResume);
bool usable = false;
const String *reason = STRDEF("partially deleted by prior resume or invalid");
Manifest *manifestResume = NULL;
@ -1176,7 +1176,8 @@ backupJobResult(
MEM_CONTEXT_TEMP_BEGIN()
{
const unsigned int processId = protocolParallelJobProcessId(job);
const uint64_t bundleId = bundle ? varUInt64(protocolParallelJobKey(job)) : 0;
const uint64_t bundleId = varType(protocolParallelJobKey(job)) == varTypeUInt64 ?
varUInt64(protocolParallelJobKey(job)) : 0;
PackRead *const jobResult = protocolParallelJobResult(job);
while (!pckReadNullP(jobResult))
@ -1407,7 +1408,8 @@ typedef struct BackupJobData
const bool delta; // Is this a checksum delta backup?
const uint64_t lsnStart; // Starting lsn for the backup
const bool bundle; // Bundle files?
const uint64_t bundleSize; // Target bundle size
uint64_t bundleSize; // Target bundle size
uint64_t bundleLimit; // Limit on files to bundle
uint64_t bundleId; // Bundle id
List *queueList; // List of processing queues
@ -1432,7 +1434,7 @@ backupProcessFilePrimary(RegExp *const standbyExp, const String *const name)
// Comparator to order ManifestFile objects by size, date, and name
static const Manifest *backupProcessQueueComparatorManifest = NULL;
static bool backupProcessQueueComparatorBundle;
static uint64_t backupProcessQueueComparatorBundleSize;
static uint64_t backupProcessQueueComparatorBundleLimit;
static int
backupProcessQueueComparator(const void *item1, const void *item2)
@ -1450,8 +1452,8 @@ backupProcessQueueComparator(const void *item1, const void *item2)
ManifestFile file2 = manifestFileUnpack(backupProcessQueueComparatorManifest, *(const ManifestFilePack **)item2);
// If the size differs then that's enough to determine order
if (!backupProcessQueueComparatorBundle || file1.size >= backupProcessQueueComparatorBundleSize ||
file2.size >= backupProcessQueueComparatorBundleSize)
if (!backupProcessQueueComparatorBundle || file1.size > backupProcessQueueComparatorBundleLimit ||
file2.size > backupProcessQueueComparatorBundleLimit)
{
if (file1.size < file2.size)
FUNCTION_TEST_RETURN(-1);
@ -1594,7 +1596,7 @@ backupProcessQueue(const BackupData *const backupData, Manifest *const manifest,
// Sort the queues
backupProcessQueueComparatorManifest = manifest;
backupProcessQueueComparatorBundle = jobData->bundle;
backupProcessQueueComparatorBundleSize = jobData->bundleSize;
backupProcessQueueComparatorBundleLimit = jobData->bundleLimit;
for (unsigned int queueIdx = 0; queueIdx < lstSize(jobData->queueList); queueIdx++)
lstSort(*(List **)lstGet(jobData->queueList, queueIdx), sortOrderDesc);
@ -1663,6 +1665,8 @@ static ProtocolParallelJob *backupJobCallback(void *data, unsigned int clientIdx
{
List *queue = *(List **)lstGet(jobData->queueList, (unsigned int)queueIdx + queueOffset);
unsigned int fileIdx = 0;
bool bundle = jobData->bundle;
const String *fileName = NULL;
while (fileIdx < lstSize(queue))
{
@ -1682,10 +1686,16 @@ static ProtocolParallelJob *backupJobCallback(void *data, unsigned int clientIdx
String *const repoFile = strCatFmt(strNew(), STORAGE_REPO_BACKUP "/%s/", strZ(jobData->backupLabel));
if (jobData->bundle)
if (bundle && file.size <= jobData->bundleLimit)
strCatFmt(repoFile, MANIFEST_PATH_BUNDLE "/%" PRIu64, jobData->bundleId);
else
{
CHECK(AssertError, fileTotal == 0, "cannot bundle file");
strCatFmt(repoFile, "%s%s", strZ(file.name), strZ(compressExtStr(jobData->compressType)));
fileName = file.name;
bundle = false;
}
pckWriteStrP(param, repoFile);
pckWriteU32P(param, jobData->compressType);
@ -1712,7 +1722,7 @@ static ProtocolParallelJob *backupJobCallback(void *data, unsigned int clientIdx
lstRemoveIdx(queue, fileIdx);
// Break if not bundling or bundle size has been reached
if (!jobData->bundle || fileSize >= jobData->bundleSize)
if (!bundle || fileSize >= jobData->bundleSize)
break;
}
@ -1721,8 +1731,10 @@ static ProtocolParallelJob *backupJobCallback(void *data, unsigned int clientIdx
// Assign job to result
MEM_CONTEXT_PRIOR_BEGIN()
{
result = protocolParallelJobNew(VARUINT64(jobData->bundleId), command);
jobData->bundleId++;
result = protocolParallelJobNew(bundle ? VARUINT64(jobData->bundleId) : VARSTR(fileName), command);
if (bundle)
jobData->bundleId++;
}
MEM_CONTEXT_PRIOR_END();
@ -1775,7 +1787,6 @@ backupProcess(BackupData *backupData, Manifest *manifest, const String *lsnStart
.delta = cfgOptionBool(cfgOptDelta),
.lsnStart = cfgOptionBool(cfgOptOnline) ? pgLsnFromStr(lsnStart) : 0xFFFFFFFFFFFFFFFF,
.bundle = cfgOptionBool(cfgOptBundle),
.bundleSize = cfgOptionTest(cfgOptBundleSize) ? cfgOptionUInt64(cfgOptBundleSize) : 0,
.bundleId = 1,
// Build expression to identify files that can be copied from the standby when standby backup is supported
@ -1786,6 +1797,12 @@ backupProcess(BackupData *backupData, Manifest *manifest, const String *lsnStart
strZ(pgXactPath(backupData->version)))),
};
if (jobData.bundle)
{
jobData.bundleSize = cfgOptionUInt64(cfgOptBundleSize);
jobData.bundleLimit = cfgOptionUInt64(cfgOptBundleLimit);
}
// If this is a full backup or hard-linked and paths are supported then create all paths explicitly so that empty paths will
// exist in to repo. Also create tablespace symlinks when symlinks are available. This makes it possible for the user to
// make a copy of the backup path and get a valid cluster.

View File

@ -54,6 +54,7 @@ Option constants
#define CFGOPT_BACKUP_STANDBY "backup-standby"
#define CFGOPT_BUFFER_SIZE "buffer-size"
#define CFGOPT_BUNDLE "bundle"
#define CFGOPT_BUNDLE_LIMIT "bundle-limit"
#define CFGOPT_BUNDLE_SIZE "bundle-size"
#define CFGOPT_CHECKSUM_PAGE "checksum-page"
#define CFGOPT_CIPHER_PASS "cipher-pass"
@ -128,7 +129,7 @@ Option constants
#define CFGOPT_TLS_SERVER_PORT "tls-server-port"
#define CFGOPT_TYPE "type"
#define CFG_OPTION_TOTAL 152
#define CFG_OPTION_TOTAL 153
/***********************************************************************************************************************************
Option value constants
@ -366,6 +367,7 @@ typedef enum
cfgOptBackupStandby,
cfgOptBufferSize,
cfgOptBundle,
cfgOptBundleLimit,
cfgOptBundleSize,
cfgOptChecksumPage,
cfgOptCipherPass,

View File

@ -16,7 +16,6 @@ static const StringPub parseRuleValueStr[] =
PARSE_RULE_STRPUB("/var/log/pgbackrest"),
PARSE_RULE_STRPUB("/var/spool/pgbackrest"),
PARSE_RULE_STRPUB("1"),
PARSE_RULE_STRPUB("100MiB"),
PARSE_RULE_STRPUB("128MiB"),
PARSE_RULE_STRPUB("15"),
PARSE_RULE_STRPUB("1800"),
@ -24,6 +23,8 @@ static const StringPub parseRuleValueStr[] =
PARSE_RULE_STRPUB("1GiB"),
PARSE_RULE_STRPUB("1MiB"),
PARSE_RULE_STRPUB("2"),
PARSE_RULE_STRPUB("20MiB"),
PARSE_RULE_STRPUB("2MiB"),
PARSE_RULE_STRPUB("3"),
PARSE_RULE_STRPUB("443"),
PARSE_RULE_STRPUB("5432"),
@ -64,7 +65,6 @@ typedef enum
parseRuleValStrQT_FS_var_FS_log_FS_pgbackrest_QT,
parseRuleValStrQT_FS_var_FS_spool_FS_pgbackrest_QT,
parseRuleValStrQT_1_QT,
parseRuleValStrQT_100MiB_QT,
parseRuleValStrQT_128MiB_QT,
parseRuleValStrQT_15_QT,
parseRuleValStrQT_1800_QT,
@ -72,6 +72,8 @@ typedef enum
parseRuleValStrQT_1GiB_QT,
parseRuleValStrQT_1MiB_QT,
parseRuleValStrQT_2_QT,
parseRuleValStrQT_20MiB_QT,
parseRuleValStrQT_2MiB_QT,
parseRuleValStrQT_3_QT,
parseRuleValStrQT_443_QT,
parseRuleValStrQT_5432_QT,
@ -239,6 +241,7 @@ static const int64_t parseRuleValueInt[] =
1024,
3600,
5432,
8192,
8432,
15000,
16384,
@ -259,8 +262,8 @@ static const int64_t parseRuleValueInt[] =
8388608,
9999999,
16777216,
20971520,
86400000,
104857600,
134217728,
604800000,
1073741824,
@ -287,6 +290,7 @@ typedef enum
parseRuleValInt1024,
parseRuleValInt3600,
parseRuleValInt5432,
parseRuleValInt8192,
parseRuleValInt8432,
parseRuleValInt15000,
parseRuleValInt16384,
@ -307,8 +311,8 @@ typedef enum
parseRuleValInt8388608,
parseRuleValInt9999999,
parseRuleValInt16777216,
parseRuleValInt20971520,
parseRuleValInt86400000,
parseRuleValInt104857600,
parseRuleValInt134217728,
parseRuleValInt604800000,
parseRuleValInt1073741824,
@ -1175,6 +1179,45 @@ static const ParseRuleOption parseRuleOption[CFG_OPTION_TOTAL] =
),
),
// -----------------------------------------------------------------------------------------------------------------------------
PARSE_RULE_OPTION
(
PARSE_RULE_OPTION_NAME("bundle-limit"),
PARSE_RULE_OPTION_TYPE(cfgOptTypeSize),
PARSE_RULE_OPTION_RESET(true),
PARSE_RULE_OPTION_REQUIRED(true),
PARSE_RULE_OPTION_SECTION(cfgSectionGlobal),
PARSE_RULE_OPTION_COMMAND_ROLE_MAIN_VALID_LIST
(
PARSE_RULE_OPTION_COMMAND(cfgCmdBackup)
),
PARSE_RULE_OPTIONAL
(
PARSE_RULE_OPTIONAL_GROUP
(
PARSE_RULE_OPTIONAL_DEPEND
(
PARSE_RULE_VAL_OPT(cfgOptBundle),
PARSE_RULE_VAL_BOOL_TRUE,
),
PARSE_RULE_OPTIONAL_ALLOW_RANGE
(
PARSE_RULE_VAL_INT(parseRuleValInt8192),
PARSE_RULE_VAL_INT(parseRuleValInt1125899906842624),
),
PARSE_RULE_OPTIONAL_DEFAULT
(
PARSE_RULE_VAL_INT(parseRuleValInt2097152),
PARSE_RULE_VAL_STR(parseRuleValStrQT_2MiB_QT),
),
),
),
),
// -----------------------------------------------------------------------------------------------------------------------------
PARSE_RULE_OPTION
(
@ -1207,8 +1250,8 @@ static const ParseRuleOption parseRuleOption[CFG_OPTION_TOTAL] =
PARSE_RULE_OPTIONAL_DEFAULT
(
PARSE_RULE_VAL_INT(parseRuleValInt104857600),
PARSE_RULE_VAL_STR(parseRuleValStrQT_100MiB_QT),
PARSE_RULE_VAL_INT(parseRuleValInt20971520),
PARSE_RULE_VAL_STR(parseRuleValStrQT_20MiB_QT),
),
),
),
@ -7781,12 +7824,6 @@ static const ParseRuleOption parseRuleOption[CFG_OPTION_TOTAL] =
(
PARSE_RULE_OPTIONAL_GROUP
(
PARSE_RULE_OPTIONAL_DEPEND
(
PARSE_RULE_VAL_OPT(cfgOptBundle),
PARSE_RULE_VAL_BOOL_FALSE,
),
PARSE_RULE_OPTIONAL_DEFAULT
(
PARSE_RULE_VAL_BOOL_TRUE,
@ -9166,6 +9203,7 @@ static const ConfigOption optionResolveOrder[] =
cfgOptBackupStandby,
cfgOptBufferSize,
cfgOptBundle,
cfgOptBundleLimit,
cfgOptBundleSize,
cfgOptChecksumPage,
cfgOptCipherPass,

View File

@ -1696,30 +1696,6 @@ testRun(void)
TEST_STORAGE_LIST_EMPTY(storageRepo(), STORAGE_REPO_BACKUP, .comment = "check backup path removed");
manifestResume->pub.data.backupOptionCompressType = compressTypeNone;
// -------------------------------------------------------------------------------------------------------------------------
TEST_TITLE("cannot resume when bundling");
argList = strLstNew();
hrnCfgArgRawZ(argList, cfgOptStanza, "test1");
hrnCfgArgRawZ(argList, cfgOptRepoPath, TEST_PATH "/repo");
hrnCfgArgRawZ(argList, cfgOptPgPath, "/pg");
hrnCfgArgRawZ(argList, cfgOptRepoRetentionFull, "1");
hrnCfgArgRawStrId(argList, cfgOptType, backupTypeFull);
hrnCfgArgRawBool(argList, cfgOptBundle, true);
HRN_CFG_LOAD(cfgCmdBackup, argList);
manifestSave(
manifestResume,
storageWriteIo(
storageNewWriteP(
storageRepoWrite(), STRDEF(STORAGE_REPO_BACKUP "/20191003-105320F/" BACKUP_MANIFEST_FILE INFO_COPY_EXT))));
TEST_RESULT_PTR(backupResumeFind(manifest, NULL), NULL, "find resumable backup");
TEST_RESULT_LOG("P00 INFO: backup '20191003-105320F' cannot be resumed: resume is disabled");
TEST_STORAGE_LIST_EMPTY(storageRepo(), STORAGE_REPO_BACKUP, .comment = "check backup path removed");
}
// *****************************************************************************************************************************
@ -2992,6 +2968,7 @@ testRun(void)
// Set to a smaller values than the defaults allow
cfgOptionSet(cfgOptBundleSize, cfgSourceParam, VARINT64(PG_PAGE_SIZE_DEFAULT));
cfgOptionSet(cfgOptBundleLimit, cfgSourceParam, VARINT64(PG_PAGE_SIZE_DEFAULT));
// Zeroed file which passes page checksums
Buffer *relation = bufNew(PG_PAGE_SIZE_DEFAULT * 3);
@ -3021,13 +2998,13 @@ testRun(void)
"P00 INFO: check archive for segment 0000000105DB8EB000000000\n"
"P00 DETAIL: store zero-length file " TEST_PATH "/pg1/pg_tblspc/32768/PG_11_201809051/1/5\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/base/1/2 (24KB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/base/1/1 (8KB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/global/pg_control (8KB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/stuff.conf (12B, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/postgresql.auto.conf (12B, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/postgresql.conf (11B, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/PG_VERSION (2B, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/bigish.dat (8.0KB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/base/1/1 (8KB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/global/pg_control (8KB, [PCT]) checksum [SHA1]\n"
"P00 INFO: execute non-exclusive pg_stop_backup() and wait for all WAL segments to archive\n"
"P00 INFO: backup stop archive = 0000000105DB8EB000000001, lsn = 5db8eb0/180000\n"
"P00 DETAIL: wrote 'backup_label' file returned from pg_stop_backup()\n"
@ -3042,16 +3019,18 @@ testRun(void)
testBackupValidate(storageRepo(), STRDEF(STORAGE_REPO_BACKUP "/latest")),
". {link, d=20191030-014640F}\n"
"bundle {path}\n"
"bundle/1/pg_data/base/1/2 {file, s=24576}\n"
"bundle/2/pg_data/base/1/1 {file, s=8192}\n"
"bundle/3/pg_data/global/pg_control {file, s=8192}\n"
"bundle/4/pg_data/PG_VERSION {file, s=2}\n"
"bundle/4/pg_data/postgresql.auto.conf {file, s=12}\n"
"bundle/4/pg_data/postgresql.conf {file, s=11}\n"
"bundle/4/pg_data/stuff.conf {file, s=12}\n"
"bundle/5/pg_data/bigish.dat {file, s=8191}\n"
"bundle/1/pg_data/PG_VERSION {file, s=2}\n"
"bundle/1/pg_data/postgresql.auto.conf {file, s=12}\n"
"bundle/1/pg_data/postgresql.conf {file, s=11}\n"
"bundle/1/pg_data/stuff.conf {file, s=12}\n"
"bundle/2/pg_data/bigish.dat {file, s=8191}\n"
"bundle/3/pg_data/base/1/1 {file, s=8192}\n"
"bundle/4/pg_data/global/pg_control {file, s=8192}\n"
"pg_data {path}\n"
"pg_data/backup_label.gz {file, s=17}\n"
"pg_data/base {path}\n"
"pg_data/base/1 {path}\n"
"pg_data/base/1/2.gz {file, s=24576}\n"
"pg_data/pg_wal {path}\n"
"pg_data/pg_wal/0000000105DB8EB000000000.gz {file, s=1048576}\n"
"pg_data/pg_wal/0000000105DB8EB000000001.gz {file, s=1048576}\n"