1
0
mirror of https://github.com/pgbackrest/pgbackrest.git synced 2025-03-03 14:52:21 +02:00

Fix resume after partial delete of backup by prior resume.

If files other than backup.manifest.copy were left in a backup path by a prior resume then the next resume would skip the backup rather than removing it. Since the backup path still existed, it would be found during backup label generation and cause an error if it appeared to be later than the new backup label. This occurred if the skipped backup was full.

The error was only likely on object stores such as S3 because of the order of file deletion. Posix file systems delete from the bottom up because directories containing files cannot be deleted. Object stores do not have directories so files are deleted in whatever order they are provided by the list command. However, the issue can be reproduced on a Posix file system by manually deleting backup.manifest.copy from a resumable backup path.

Fix the issue by removing the resumable backup if it has no manifest files. Also add a new warning message for this condition.

Note that this issue could be resolved by running expire or a new full backup.
This commit is contained in:
David Steele 2021-01-12 12:38:32 -05:00 committed by GitHub
parent 96fd678662
commit aeee83044d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 75 additions and 47 deletions

View File

@ -14,6 +14,17 @@
<release-list>
<release date="XXXX-XX-XX" version="2.32dev" title="UNDER DEVELOPMENT">
<release-core-list>
<release-bug-list>
<release-item>
<release-item-contributor-list>
<release-item-ideator id="tom.swartz"/>
<release-item-reviewer id="cynthia.shang"/>
</release-item-contributor-list>
<p>Fix resume after partial delete of backup by prior resume.</p>
</release-item>
</release-bug-list>
<release-development-list>
<release-item>
<release-item-contributor-list>
@ -9492,6 +9503,11 @@
<contributor-id type="github">gintoddic</contributor-id>
</contributor>
<contributor id="tom.swartz">
<contributor-name-display>Tom Swartz</contributor-name-display>
<contributor-id type="github">tomswartz07</contributor-id>
</contributor>
<contributor id="tomasz.kontusz">
<contributor-name-display>Tomasz Kontusz</contributor-name-display>
<contributor-id type="github">ktosiek</contributor-id>

View File

@ -692,65 +692,72 @@ backupResumeFind(const Manifest *manifest, const String *cipherPassBackup)
const String *backupLabel = strLstGet(backupList, 0);
const String *manifestFile = strNewFmt(STORAGE_REPO_BACKUP "/%s/" BACKUP_MANIFEST_FILE, strZ(backupLabel));
// Resumable backups have a copy of the manifest but no main
if (storageExistsP(storageRepo(), strNewFmt("%s" INFO_COPY_EXT, strZ(manifestFile))) &&
!storageExistsP(storageRepo(), manifestFile))
// Resumable backups do not have backup.manifest
if (!storageExistsP(storageRepo(), manifestFile))
{
bool usable = false;
const String *reason = STRDEF("resume is disabled");
const String *reason = STRDEF("partially deleted by prior resume or invalid");
Manifest *manifestResume = NULL;
// Attempt to read the manifest file in the resumable backup to see if it can be used. If any error at all occurs
// then the backup will be considered unusable and a resume will not be attempted.
if (cfgOptionBool(cfgOptResume))
// Resumable backups must have backup.manifest.copy
if (storageExistsP(storageRepo(), strNewFmt("%s" INFO_COPY_EXT, strZ(manifestFile))))
{
reason = strNewFmt("unable to read %s" INFO_COPY_EXT, strZ(manifestFile));
reason = STRDEF("resume is disabled");
TRY_BEGIN()
// Attempt to read the manifest file in the resumable backup to see if it can be used. If any error at all
// occurs then the backup will be considered unusable and a resume will not be attempted.
if (cfgOptionBool(cfgOptResume))
{
manifestResume = manifestLoadFile(
storageRepo(), manifestFile, cipherType(cfgOptionStr(cfgOptRepoCipherType)), cipherPassBackup);
const ManifestData *manifestResumeData = manifestData(manifestResume);
reason = strNewFmt("unable to read %s" INFO_COPY_EXT, strZ(manifestFile));
// Check pgBackRest version. This allows the resume implementation to be changed with each version of
// pgBackRest at the expense of users losing a resumable back after an upgrade, which seems worth the cost.
if (!strEq(manifestResumeData->backrestVersion, manifestData(manifest)->backrestVersion))
TRY_BEGIN()
{
reason = strNewFmt(
"new " PROJECT_NAME " version '%s' does not match resumable " PROJECT_NAME " version '%s'",
strZ(manifestData(manifest)->backrestVersion), strZ(manifestResumeData->backrestVersion));
manifestResume = manifestLoadFile(
storageRepo(), manifestFile, cipherType(cfgOptionStr(cfgOptRepoCipherType)), cipherPassBackup);
const ManifestData *manifestResumeData = manifestData(manifestResume);
// Check pgBackRest version. This allows the resume implementation to be changed with each version of
// pgBackRest at the expense of users losing a resumable back after an upgrade, which seems worth the
// cost.
if (!strEq(manifestResumeData->backrestVersion, manifestData(manifest)->backrestVersion))
{
reason = strNewFmt(
"new " PROJECT_NAME " version '%s' does not match resumable " PROJECT_NAME " version '%s'",
strZ(manifestData(manifest)->backrestVersion), strZ(manifestResumeData->backrestVersion));
}
// Check backup type because new backup label must be the same type as resume backup label
else if (manifestResumeData->backupType != backupType(cfgOptionStr(cfgOptType)))
{
reason = strNewFmt(
"new backup type '%s' does not match resumable backup type '%s'",
strZ(cfgOptionStr(cfgOptType)), strZ(backupTypeStr(manifestResumeData->backupType)));
}
// Check prior backup label ??? Do we really care about the prior backup label?
else if (!strEq(manifestResumeData->backupLabelPrior, manifestData(manifest)->backupLabelPrior))
{
reason = strNewFmt(
"new prior backup label '%s' does not match resumable prior backup label '%s'",
manifestResumeData->backupLabelPrior ? strZ(manifestResumeData->backupLabelPrior) : "<undef>",
manifestData(manifest)->backupLabelPrior ?
strZ(manifestData(manifest)->backupLabelPrior) : "<undef>");
}
// Check compression. Compression can't be changed between backups so resume won't work either.
else if (
manifestResumeData->backupOptionCompressType != compressTypeEnum(cfgOptionStr(cfgOptCompressType)))
{
reason = strNewFmt(
"new compression '%s' does not match resumable compression '%s'",
strZ(compressTypeStr(compressTypeEnum(cfgOptionStr(cfgOptCompressType)))),
strZ(compressTypeStr(manifestResumeData->backupOptionCompressType)));
}
else
usable = true;
}
// Check backup type because new backup label must be the same type as resume backup label
else if (manifestResumeData->backupType != backupType(cfgOptionStr(cfgOptType)))
CATCH_ANY()
{
reason = strNewFmt(
"new backup type '%s' does not match resumable backup type '%s'", strZ(cfgOptionStr(cfgOptType)),
strZ(backupTypeStr(manifestResumeData->backupType)));
}
// Check prior backup label ??? Do we really care about the prior backup label?
else if (!strEq(manifestResumeData->backupLabelPrior, manifestData(manifest)->backupLabelPrior))
{
reason = strNewFmt(
"new prior backup label '%s' does not match resumable prior backup label '%s'",
manifestResumeData->backupLabelPrior ? strZ(manifestResumeData->backupLabelPrior) : "<undef>",
manifestData(manifest)->backupLabelPrior ?
strZ(manifestData(manifest)->backupLabelPrior) : "<undef>");
}
// Check compression. Compression can't be changed between backups so resume won't work either.
else if (manifestResumeData->backupOptionCompressType != compressTypeEnum(cfgOptionStr(cfgOptCompressType)))
{
reason = strNewFmt(
"new compression '%s' does not match resumable compression '%s'",
strZ(compressTypeStr(compressTypeEnum(cfgOptionStr(cfgOptCompressType)))),
strZ(compressTypeStr(manifestResumeData->backupOptionCompressType)));
}
else
usable = true;
TRY_END();
}
CATCH_ANY()
{
}
TRY_END();
}
// If the backup is usable then return the manifest

View File

@ -1263,15 +1263,20 @@ testRun(void)
harnessCfgLoad(cfgCmdBackup, argList);
// -------------------------------------------------------------------------------------------------------------------------
TEST_TITLE("cannot resume empty directory");
TEST_TITLE("cannot resume when manifest and copy are missing");
storagePathCreateP(storageRepoWrite(), STRDEF(STORAGE_REPO_BACKUP "/20191003-105320F"));
TEST_RESULT_PTR(backupResumeFind((Manifest *)1, NULL), NULL, "find resumable backup");
TEST_RESULT_LOG(
"P00 WARN: backup '20191003-105320F' cannot be resumed: partially deleted by prior resume or invalid");
// -------------------------------------------------------------------------------------------------------------------------
TEST_TITLE("cannot resume when resume is disabled");
storagePathCreateP(storageRepoWrite(), STRDEF(STORAGE_REPO_BACKUP "/20191003-105320F"));
cfgOptionSet(cfgOptResume, cfgSourceParam, BOOL_FALSE_VAR);
storagePutP(