1
0
mirror of https://github.com/pgbackrest/pgbackrest.git synced 2025-04-21 11:57:01 +02:00

Add backup verification to internal verify command.

This is phase 2 of verify command development (phase 1 was processing the archives and phase 3 will be reconciling the archives and backups). In this phase the backups are verified by verifying each file listed in the manifest for the backup and creating a result set with the list of invalid files, if any. A summary is then rendered.

Unit tests have been added and duplicate tests have been removed.
This commit is contained in:
Cynthia Shang 2021-01-26 11:21:36 -05:00 committed by GitHub
parent 5d48dd2fb3
commit 2e60b93709
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 1457 additions and 224 deletions

View File

@ -48,6 +48,15 @@
<p>Partial multi-repository implementation.</p> <p>Partial multi-repository implementation.</p>
</release-item> </release-item>
<release-item>
<release-item-contributor-list>
<release-item-contributor id="cynthia.shang"/>
<release-item-reviewer id="david.steele"/>
</release-item-contributor-list>
<p>Add backup verification to internal verify command.</p>
</release-item>
<release-item> <release-item>
<release-item-contributor-list> <release-item-contributor-list>
<release-item-reviewer id="cynthia.shang"/> <release-item-reviewer id="cynthia.shang"/>

View File

@ -5,8 +5,9 @@ Verify the contents of the repository.
***********************************************************************************************************************************/ ***********************************************************************************************************************************/
#include "build.auto.h" #include "build.auto.h"
#include <unistd.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "command/archive/common.h" #include "command/archive/common.h"
#include "command/check/common.h" #include "command/check/common.h"
@ -36,11 +37,17 @@ Data Types and Structures
#define FUNCTION_LOG_VERIFY_ARCHIVE_RESULT_FORMAT(value, buffer, bufferSize) \ #define FUNCTION_LOG_VERIFY_ARCHIVE_RESULT_FORMAT(value, buffer, bufferSize) \
objToLog(&value, "VerifyArchiveResult", buffer, bufferSize) objToLog(&value, "VerifyArchiveResult", buffer, bufferSize)
#define FUNCTION_LOG_VERIFY_BACKUP_RESULT_TYPE \
VerifyBackupResult
#define FUNCTION_LOG_VERIFY_BACKUP_RESULT_FORMAT(value, buffer, bufferSize) \
objToLog(&value, "VerifyBackupResult", buffer, bufferSize)
// Structure for verifying repository info files // Structure for verifying repository info files
typedef struct VerifyInfoFile typedef struct VerifyInfoFile
{ {
InfoBackup *backup; // Backup.info file contents InfoBackup *backup; // Backup.info file contents
InfoArchive *archive; // Archive.info file contents InfoArchive *archive; // Archive.info file contents
Manifest *manifest; // Manifest file contents
const String *checksum; // File checksum const String *checksum; // File checksum
int errorCode; // Error code else 0 for no error int errorCode; // Error code else 0 for no error
} VerifyInfoFile; } VerifyInfoFile;
@ -70,6 +77,31 @@ typedef struct VerifyInvalidFile
VerifyResult reason; // Reason file is invalid (e.g. incorrect checksum) VerifyResult reason; // Reason file is invalid (e.g. incorrect checksum)
} VerifyInvalidFile; } VerifyInvalidFile;
// Status result of a backup
typedef enum
{
backupValid, // Default: All files in backup label repo passed verification
backupInvalid, // One of more files in backup label repo failed verification
backupMissingManifest, // Backup manifest missing (backup may have expired)
backupInProgress, // Backup appeared to be in progress (so was skipped)
} VerifyBackupResultStatus;
typedef struct VerifyBackupResult
{
String *backupLabel; // Label assigned to the backup
VerifyBackupResultStatus status; // Final status of the backup
bool fileVerifyComplete; // Have all the files of the backup completed verification?
unsigned int totalFileManifest; // Total number of backup files in the manifest
unsigned int totalFileVerify; // Total number of backup files being verified
unsigned int totalFileValid; // Total number of backup files that were verified and valid
String *backupPrior; // Prior backup that this backup depends on, if any
unsigned int pgId; // PG id will be used to find WAL for the backup in the repo
unsigned int pgVersion; // PG version will be used with PG id to find WAL in the repo
String *archiveStart; // First WAL segment in the backup
String *archiveStop; // Last WAL segment in the backup
List *invalidFileList; // List of invalid files found in the backup
} VerifyBackupResult;
// Job data stucture for processing and results collection // Job data stucture for processing and results collection
typedef struct VerifyJobData typedef struct VerifyJobData
{ {
@ -78,14 +110,49 @@ typedef struct VerifyJobData
StringList *walPathList; // WAL path list for a single archive id StringList *walPathList; // WAL path list for a single archive id
StringList *walFileList; // WAL file list for a single WAL path StringList *walFileList; // WAL file list for a single WAL path
StringList *backupList; // List of backups to verify StringList *backupList; // List of backups to verify
Manifest *manifest; // Manifest contents with list of files to verify
unsigned int manifestFileIdx; // Index of the file within the manifest file list to process
String *currentBackup; // In progress backup, if any String *currentBackup; // In progress backup, if any
const InfoPg *pgHistory; // Database history list const InfoPg *pgHistory; // Database history list
bool backupProcessing; // Are we processing WAL or are we processing backup bool backupProcessing; // Are we processing WAL or are we processing backups
const String *manifestCipherPass; // Cipher pass for reading backup manifests
const String *walCipherPass; // Cipher pass for reading WAL files const String *walCipherPass; // Cipher pass for reading WAL files
const String *backupCipherPass; // Cipher pass for reading backup files referenced in a manifest
unsigned int jobErrorTotal; // Total errors that occurred during the job execution unsigned int jobErrorTotal; // Total errors that occurred during the job execution
List *archiveIdResultList; // Archive results List *archiveIdResultList; // Archive results
List *backupResultList; // Backup results
} VerifyJobData; } VerifyJobData;
/***********************************************************************************************************************************
Helper function to add a file to an invalid file list
***********************************************************************************************************************************/
static void
verifyInvalidFileAdd(List *invalidFileList, VerifyResult reason, const String *fileName)
{
FUNCTION_TEST_BEGIN();
FUNCTION_TEST_PARAM(LIST, invalidFileList); // Invalid file list to add the filename to
FUNCTION_TEST_PARAM(ENUM, reason); // Reason for invalid file
FUNCTION_TEST_PARAM(STRING, fileName); // Name of invalid file
FUNCTION_TEST_END();
ASSERT(invalidFileList != NULL);
ASSERT(fileName != NULL);
MEM_CONTEXT_BEGIN(lstMemContext(invalidFileList))
{
VerifyInvalidFile invalidFile =
{
.fileName = strDup(fileName),
.reason = reason,
};
lstAdd(invalidFileList, &invalidFile);
}
MEM_CONTEXT_END();
FUNCTION_TEST_RETURN_VOID();
}
/*********************************************************************************************************************************** /***********************************************************************************************************************************
Load a file into memory Load a file into memory
***********************************************************************************************************************************/ ***********************************************************************************************************************************/
@ -142,8 +209,10 @@ verifyInfoFile(const String *pathFileName, bool keepFile, const String *cipherPa
{ {
if (strBeginsWith(pathFileName, INFO_BACKUP_PATH_FILE_STR)) if (strBeginsWith(pathFileName, INFO_BACKUP_PATH_FILE_STR))
result.backup = infoBackupMove(infoBackupNewLoad(infoRead), memContextPrior()); result.backup = infoBackupMove(infoBackupNewLoad(infoRead), memContextPrior());
else else if (strBeginsWith(pathFileName, INFO_ARCHIVE_PATH_FILE_STR))
result.archive = infoArchiveMove(infoArchiveNewLoad(infoRead), memContextPrior()); result.archive = infoArchiveMove(infoArchiveNewLoad(infoRead), memContextPrior());
else
result.manifest = manifestMove(manifestNewLoad(infoRead), memContextPrior());
} }
else else
ioReadDrain(infoRead); ioReadDrain(infoRead);
@ -277,6 +346,136 @@ verifyBackupInfoFile(void)
FUNCTION_LOG_RETURN(INFO_BACKUP, result); FUNCTION_LOG_RETURN(INFO_BACKUP, result);
} }
/***********************************************************************************************************************************
Get the manifest file
***********************************************************************************************************************************/
static Manifest *
verifyManifestFile(
VerifyBackupResult *backupResult, const String *cipherPass, bool currentBackup, const InfoPg *pgHistory,
unsigned int *jobErrorTotal)
{
FUNCTION_LOG_BEGIN(logLevelDebug);
FUNCTION_TEST_PARAM_P(VERIFY_BACKUP_RESULT, backupResult); // The result set for the backup being processed
FUNCTION_TEST_PARAM(STRING, cipherPass); // Passphrase to access the manifest file
FUNCTION_LOG_PARAM(BOOL, currentBackup); // Is this possibly a backup currently in progress?
FUNCTION_TEST_PARAM(INFO_PG, pgHistory); // Database history
FUNCTION_TEST_PARAM_P(UINT, jobErrorTotal); // Pointer to the overall job error total
FUNCTION_LOG_END();
Manifest *result = NULL;
MEM_CONTEXT_TEMP_BEGIN()
{
String *fileName = strNewFmt(STORAGE_REPO_BACKUP "/%s/" BACKUP_MANIFEST_FILE, strZ(backupResult->backupLabel));
// Get the main manifest file
VerifyInfoFile verifyManifestInfo = verifyInfoFile(fileName, true, cipherPass);
// If the main file did not error, then report on the copy's status and check checksums
if (verifyManifestInfo.errorCode == 0)
{
result = verifyManifestInfo.manifest;
// The current in-progress backup is only notional until the main file is checked because the backup may have
// completed by the time the main manifest is checked here. So having a main manifest file means this backup is not
// (or is no longer) the currentBackup.
currentBackup = false;
// Attempt to load the copy and report on it's status but don't keep it in memory
VerifyInfoFile verifyManifestInfoCopy = verifyInfoFile(
strNewFmt("%s%s", strZ(fileName), INFO_COPY_EXT), false, cipherPass);
// If the copy loaded successfuly, then check the checksums
if (verifyManifestInfoCopy.errorCode == 0)
{
// If the manifest and manifest.copy checksums don't match each other than one (or both) of the files could be
// corrupt so log a warning but trust main
if (!strEq(verifyManifestInfo.checksum, verifyManifestInfoCopy.checksum))
LOG_WARN_FMT("backup '%s' manifest.copy does not match manifest", strZ(backupResult->backupLabel));
}
}
else
{
// If this might be an in-progress backup and the main manifest is simply missing, it is assumed the backup is an
// actual in-progress backup and verification is skipped, otherwise, if the main is not simply missing, or this is not
// an in-progress backup then attempt to load the copy.
if (!(currentBackup && verifyManifestInfo.errorCode == errorTypeCode(&FileMissingError)))
{
currentBackup = false;
VerifyInfoFile verifyManifestInfoCopy = verifyInfoFile(
strNewFmt("%s%s", strZ(fileName), INFO_COPY_EXT), true, cipherPass);
// If loaded successfully, then return the copy as usable
if (verifyManifestInfoCopy.errorCode == 0)
{
LOG_WARN_FMT("%s/backup.manifest is missing or unusable, using copy", strZ(backupResult->backupLabel));
result = verifyManifestInfoCopy.manifest;
}
else if (verifyManifestInfo.errorCode == errorTypeCode(&FileMissingError) &&
verifyManifestInfoCopy.errorCode == errorTypeCode(&FileMissingError))
{
backupResult->status = backupMissingManifest;
LOG_WARN_FMT("manifest missing for '%s' - backup may have expired", strZ(backupResult->backupLabel));
}
}
else
{
backupResult->status = backupInProgress;
LOG_INFO_FMT("backup '%s' appears to be in progress, skipping", strZ(backupResult->backupLabel));
}
}
// If found a usable manifest then check that the database it was based on is in the history
if (result != NULL)
{
bool found = false;
const ManifestData *manData = manifestData(result);
// Confirm the PG database information from the manifest is in the history list
for (unsigned int infoPgIdx = 0; infoPgIdx < infoPgDataTotal(pgHistory); infoPgIdx++)
{
InfoPgData pgHistoryData = infoPgData(pgHistory, infoPgIdx);
if (pgHistoryData.id == manData->pgId && pgHistoryData.systemId == manData->pgSystemId &&
pgHistoryData.version == manData->pgVersion)
{
found = true;
break;
}
}
// If the PG data is not found in the backup.info history, then error and reset the result
if (!found)
{
LOG_ERROR_FMT(
errorTypeCode(&FileInvalidError),
"'%s' may not be recoverable - PG data (id %u, version %s, system-id %" PRIu64 ") is not in the backup.info"
" history, skipping",
strZ(backupResult->backupLabel), manData->pgId, strZ(pgVersionToStr(manData->pgVersion)), manData->pgSystemId);
manifestFree(result);
result = NULL;
}
else
manifestMove(result, memContextPrior());
}
// If the result is NULL and the backup status has not yet been set, then the backup is unusable (invalid)
if (result == NULL && backupResult->status == backupValid)
{
backupResult->status = backupInvalid;
(*jobErrorTotal)++;
}
}
MEM_CONTEXT_TEMP_END();
FUNCTION_LOG_RETURN(MANIFEST, result);
}
/*********************************************************************************************************************************** /***********************************************************************************************************************************
Check the history in the info files Check the history in the info files
***********************************************************************************************************************************/ ***********************************************************************************************************************************/
@ -411,7 +610,7 @@ verifyCreateArchiveIdRange(VerifyArchiveResult *archiveIdResult, StringList *wal
{ {
.start = strDup(walSegment), .start = strDup(walSegment),
.stop = strDup(walSegment), .stop = strDup(walSegment),
.invalidFileList = lstNewP(sizeof(VerifyInvalidFile), .comparator = lstComparatorStr), .invalidFileList = lstNewP(sizeof(VerifyInvalidFile), .comparator = lstComparatorStr),
}; };
lstAdd(archiveIdResult->walRangeList, &walRangeNew); lstAdd(archiveIdResult->walRangeList, &walRangeNew);
@ -470,7 +669,7 @@ verifyArchive(void *data)
VerifyArchiveResult archiveIdResult = VerifyArchiveResult archiveIdResult =
{ {
.archiveId = strDup(archiveId), .archiveId = strDup(archiveId),
.walRangeList = lstNewP(sizeof(VerifyWalRange), .comparator = lstComparatorStr), .walRangeList = lstNewP(sizeof(VerifyWalRange), .comparator = lstComparatorStr),
}; };
lstAdd(jobData->archiveIdResultList, &archiveIdResult); lstAdd(jobData->archiveIdResultList, &archiveIdResult);
@ -561,8 +760,9 @@ verifyArchive(void *data)
protocolCommandParamAdd(command, VARUINT64(archiveResult->pgWalInfo.size)); protocolCommandParamAdd(command, VARUINT64(archiveResult->pgWalInfo.size));
protocolCommandParamAdd(command, VARSTR(jobData->walCipherPass)); protocolCommandParamAdd(command, VARSTR(jobData->walCipherPass));
// Assign job to result // Assign job to result, prepending the archiveId to the key for consistency with backup processing
result = protocolParallelJobNew(VARSTR(filePathName), command); result = protocolParallelJobNew(
VARSTR(strNewFmt("%s/%s", strZ(archiveResult->archiveId), strZ(filePathName))), command);
// Remove the file to process from the list // Remove the file to process from the list
strLstRemoveIdx(jobData->walFileList, 0); strLstRemoveIdx(jobData->walFileList, 0);
@ -610,6 +810,229 @@ verifyArchive(void *data)
FUNCTION_TEST_RETURN(result); FUNCTION_TEST_RETURN(result);
} }
/***********************************************************************************************************************************
Verify the job data backups
***********************************************************************************************************************************/
static ProtocolParallelJob *
verifyBackup(void *data)
{
FUNCTION_TEST_BEGIN();
FUNCTION_TEST_PARAM_P(VOID, data);
FUNCTION_TEST_END();
ProtocolParallelJob *result = NULL;
VerifyJobData *jobData = data;
// Process backup files, if any
while (strLstSize(jobData->backupList) > 0)
{
result = NULL;
// If result list is empty or the last processed is not equal to the backup being processed, then intialize the backup
// data and results
if (lstSize(jobData->backupResultList) == 0 ||
!strEq(((VerifyBackupResult *)lstGetLast(jobData->backupResultList))->backupLabel, strLstGet(jobData->backupList, 0)))
{
MEM_CONTEXT_BEGIN(lstMemContext(jobData->backupResultList))
{
VerifyBackupResult backupResultNew =
{
.backupLabel = strDup(strLstGet(jobData->backupList, 0)),
.invalidFileList = lstNewP(sizeof(VerifyInvalidFile), .comparator = lstComparatorStr),
};
// Add the backup to the result list
lstAdd(jobData->backupResultList, &backupResultNew);
}
MEM_CONTEXT_END();
// Get the result just added so it can be updated directly
VerifyBackupResult *backupResult = lstGetLast(jobData->backupResultList);
// If currentBackup is set (meaning the newest backup label on disk was not in the db:current section when the
// backup.info file was read) and this is the same label, then set inProgessBackup to true, else false.
// inProgressBackup may be changed in verifyManifestFile if a main backup.manifest exists since that would indicate the
// backup completed during the verify process.
bool inProgressBackup = strEq(jobData->currentBackup, backupResult->backupLabel);
// Get a usable backup manifest file
Manifest *manifest = verifyManifestFile(
backupResult, jobData->manifestCipherPass, inProgressBackup, jobData->pgHistory, &jobData->jobErrorTotal);
// If a usable backup.manifest file is not found
if (manifest == NULL)
{
// Remove this backup from the processing list
strLstRemoveIdx(jobData->backupList, 0);
// No files to process so continue to the next backup in the list
continue;
}
// Initialize the backup results and manifest for processing
else
{
// Move the manifest to the jobData for processing
jobData->manifest = manifestMove(manifest, jobData->memContext);
// Initialize the jobData
MEM_CONTEXT_BEGIN(jobData->memContext)
{
// Get the cipher subpass used to decrypt files in the backup and initialize the file list index
jobData->backupCipherPass = strDup(manifestCipherSubPass(jobData->manifest));
jobData->manifestFileIdx = 0;
}
MEM_CONTEXT_END();
const ManifestData *manData = manifestData(jobData->manifest);
MEM_CONTEXT_BEGIN(lstMemContext(jobData->backupResultList))
{
backupResult->totalFileManifest = manifestFileTotal(jobData->manifest);
backupResult->backupPrior = strDup(manData->backupLabelPrior);
backupResult->pgId = manData->pgId;
backupResult->pgVersion = manData->pgVersion;
backupResult->archiveStart = strDup(manData->archiveStart);
backupResult->archiveStop = strDup(manData->archiveStop);
}
MEM_CONTEXT_END();
}
}
VerifyBackupResult *backupResult = lstGetLast(jobData->backupResultList);
// Process any files in the manifest
if (jobData->manifestFileIdx < manifestFileTotal(jobData->manifest))
{
do
{
const ManifestFile *fileData = manifestFile(jobData->manifest, jobData->manifestFileIdx);
String *filePathName = NULL;
// Track the files verified in order to determine when the processing of the backup is complete
backupResult->totalFileVerify++;
// Check if the file is referenced in a prior backup
if (fileData->reference != NULL)
{
// If the prior backup is not in the result list, then that backup was never processed (likely due to the --set
// option) so verify the file
unsigned int backupPriorIdx = lstFindIdx(jobData->backupResultList, &fileData->reference);
if (backupPriorIdx == LIST_NOT_FOUND)
{
filePathName = strNewFmt(
STORAGE_REPO_BACKUP "/%s/%s%s", strZ(fileData->reference), strZ(fileData->name),
strZ(compressExtStr((manifestData(jobData->manifest))->backupOptionCompressType)));
}
// Else the backup this file references has a result so check the processing state for the referenced backup
else
{
VerifyBackupResult *backupResultPrior = lstGet(jobData->backupResultList, backupPriorIdx);
// If the verify-state of the backup is not complete then verify the file
if (!backupResultPrior->fileVerifyComplete)
{
filePathName = strNewFmt(
STORAGE_REPO_BACKUP "/%s/%s%s", strZ(fileData->reference), strZ(fileData->name),
strZ(compressExtStr((manifestData(jobData->manifest))->backupOptionCompressType)));
}
// Else skip verification
else
{
String *priorFile = strNewFmt(
"%s/%s%s", strZ(fileData->reference), strZ(fileData->name),
strZ(compressExtStr((manifestData(jobData->manifest))->backupOptionCompressType)));
unsigned int backupPriorInvalidIdx = lstFindIdx(backupResultPrior->invalidFileList, &priorFile);
// If the file is in the invalid file list of the prior backup where it is referenced then add the file
// as invalid to this backup result and set the backup result status; since already logged an error on
// this file, don't log again
if (backupPriorInvalidIdx != LIST_NOT_FOUND)
{
VerifyInvalidFile *invalidFile = lstGet(
backupResultPrior->invalidFileList, backupPriorInvalidIdx);
verifyInvalidFileAdd(backupResult->invalidFileList, invalidFile->reason, invalidFile->fileName);
backupResult->status = backupInvalid;
}
// Else the file in the prior backup was valid so increment the total valid files for this backup
else
{
backupResult->totalFileValid++;
}
}
}
}
// Else file is not referenced in a prior backup
else
{
filePathName = strNewFmt(
STORAGE_REPO_BACKUP "/%s/%s%s", strZ(backupResult->backupLabel), strZ(fileData->name),
strZ(compressExtStr((manifestData(jobData->manifest))->backupOptionCompressType)));
}
// If constructed file name is not null then send it off for processing
if (filePathName != NULL)
{
// Set up the job
ProtocolCommand *command = protocolCommandNew(PROTOCOL_COMMAND_VERIFY_FILE_STR);
protocolCommandParamAdd(command, VARSTR(filePathName));
// If the checksum is not present in the manifest, it will be calculated by manifest load
protocolCommandParamAdd(command, VARSTRZ(fileData->checksumSha1));
protocolCommandParamAdd(command, VARUINT64(fileData->size));
protocolCommandParamAdd(command, VARSTR(jobData->backupCipherPass));
// Assign job to result (prepend backup label being processed to the key since some files are in a prior backup)
result = protocolParallelJobNew(
VARSTR(strNewFmt("%s/%s", strZ(backupResult->backupLabel), strZ(filePathName))), command);
}
// Increment the index to point to the next file
jobData->manifestFileIdx++;
// If this was the last file to process for this backup, then free the manifest and remove this backup from the
// processing list
if (jobData->manifestFileIdx == backupResult->totalFileManifest)
{
manifestFree(jobData->manifest);
jobData->manifest = NULL;
strLstRemoveIdx(jobData->backupList, 0);
}
// If a job was found to be processed then break out to process it
if (result != NULL)
break;
}
while (jobData->manifestFileIdx < backupResult->totalFileManifest);
}
else
{
// Nothing to process so report an error, free the manifest, set the status, and remove the backup from processing list
LOG_ERROR_FMT(
errorTypeCode(&FileInvalidError), "backup '%s' manifest does not contain any target files to verify",
strZ(backupResult->backupLabel));
jobData->jobErrorTotal++;
manifestFree(jobData->manifest);
jobData->manifest = NULL;
backupResult->status = backupInvalid;
strLstRemoveIdx(jobData->backupList, 0);
}
// If a job was found to be processed then break out to process it
if (result != NULL)
break;
}
FUNCTION_TEST_RETURN(result);
}
/*********************************************************************************************************************************** /***********************************************************************************************************************************
Process the job data Process the job data
***********************************************************************************************************************************/ ***********************************************************************************************************************************/
@ -626,7 +1049,6 @@ verifyJobCallback(void *data, unsigned int clientIdx)
// Initialize the result // Initialize the result
ProtocolParallelJob *result = NULL; ProtocolParallelJob *result = NULL;
// Get a new job if there are any left
MEM_CONTEXT_TEMP_BEGIN() MEM_CONTEXT_TEMP_BEGIN()
{ {
VerifyJobData *jobData = data; VerifyJobData *jobData = data;
@ -634,8 +1056,17 @@ verifyJobCallback(void *data, unsigned int clientIdx)
if (!jobData->backupProcessing) if (!jobData->backupProcessing)
{ {
result = protocolParallelJobMove(verifyArchive(data), memContextPrior()); result = protocolParallelJobMove(verifyArchive(data), memContextPrior());
// Set the backupProcessing flag if the archive processing is finished so backup processing can begin immediately after
jobData->backupProcessing = strLstSize(jobData->archiveIdList) == 0; jobData->backupProcessing = strLstSize(jobData->archiveIdList) == 0;
} }
if (jobData->backupProcessing)
{
// Only begin backup verification if the last archive result was processed
if (result == NULL)
result = protocolParallelJobMove(verifyBackup(data), memContextPrior());
}
} }
MEM_CONTEXT_TEMP_END(); MEM_CONTEXT_TEMP_END();
@ -661,7 +1092,7 @@ verifyErrorMsg(VerifyResult verifyResult)
else if (verifyResult == verifySizeInvalid) else if (verifyResult == verifySizeInvalid)
result = strCatZ(result, "invalid size"); result = strCatZ(result, "invalid size");
else else
result = strCatZ(result, "invalid verify"); result = strCatZ(result, "invalid result");
FUNCTION_TEST_RETURN(result); FUNCTION_TEST_RETURN(result);
} }
@ -670,19 +1101,21 @@ verifyErrorMsg(VerifyResult verifyResult)
Helper function to output a log message based on job result that is not verifyOk and return an error count Helper function to output a log message based on job result that is not verifyOk and return an error count
***********************************************************************************************************************************/ ***********************************************************************************************************************************/
static unsigned int static unsigned int
verifyLogInvalidResult(VerifyResult verifyResult, unsigned int processId, String *filePathName) verifyLogInvalidResult(const String *fileType, VerifyResult verifyResult, unsigned int processId, const String *filePathName)
{ {
FUNCTION_TEST_BEGIN(); FUNCTION_TEST_BEGIN();
FUNCTION_TEST_PARAM(STRING, fileType); // Indicates archive or backup file
FUNCTION_TEST_PARAM(ENUM, verifyResult); // Result code from the verifyFile() function FUNCTION_TEST_PARAM(ENUM, verifyResult); // Result code from the verifyFile() function
FUNCTION_TEST_PARAM(UINT, processId); // Process Id reporting the result FUNCTION_TEST_PARAM(UINT, processId); // Process Id reporting the result
FUNCTION_TEST_PARAM(STRING, filePathName); // File for which results are being reported FUNCTION_TEST_PARAM(STRING, filePathName); // File for which results are being reported
FUNCTION_TEST_END(); FUNCTION_TEST_END();
ASSERT(fileType != NULL);
ASSERT(filePathName != NULL); ASSERT(filePathName != NULL);
// Log a warning because the WAL may have gone missing if expire came through and removed it // Log a warning because the WAL may have gone missing if expire came through and removed it
// legitimately so it is not necessarily an error so the jobErrorTotal should not be incremented // legitimately so it is not necessarily an error so the jobErrorTotal should not be incremented
if (verifyResult == verifyFileMissing) if (strEq(fileType, STORAGE_REPO_ARCHIVE_STR) && verifyResult == verifyFileMissing)
{ {
LOG_WARN_PID_FMT(processId, "%s '%s'", strZ(verifyErrorMsg(verifyResult)), strZ(filePathName)); LOG_WARN_PID_FMT(processId, "%s '%s'", strZ(verifyErrorMsg(verifyResult)), strZ(filePathName));
FUNCTION_TEST_RETURN(0); FUNCTION_TEST_RETURN(0);
@ -787,25 +1220,23 @@ verifyAddInvalidWalFile(List *walRangeList, VerifyResult fileResult, String *fil
ASSERT(fileName != NULL); ASSERT(fileName != NULL);
ASSERT(walSegment != NULL); ASSERT(walSegment != NULL);
for (unsigned int walIdx = 0; walIdx < lstSize(walRangeList); walIdx++) MEM_CONTEXT_TEMP_BEGIN()
{ {
VerifyWalRange *walRange = lstGet(walRangeList, walIdx); for (unsigned int walIdx = 0; walIdx < lstSize(walRangeList); walIdx++)
// If the WAL segment is less/equal to the stop file then it falls in this range since ranges are sorted by stop file in
// ascending order, therefore first one found is the range
if (strCmp(walRange->stop, walSegment) >= 0)
{ {
VerifyInvalidFile invalidFile = VerifyWalRange *walRange = lstGet(walRangeList, walIdx);
{
.fileName = strDup(fileName),
.reason = fileResult,
};
// Add the file to the range where it was found and exit the loop // If the WAL segment is less/equal to the stop file then it falls in this range since ranges are sorted by stop file in
lstAdd(walRange->invalidFileList, &invalidFile); // ascending order, therefore first one found is the range
break; if (strCmp(walRange->stop, walSegment) >= 0)
{
// Add the file to the range where it was found and exit the loop
verifyInvalidFileAdd(walRange->invalidFileList, fileResult, fileName);
break;
}
} }
} }
MEM_CONTEXT_TEMP_END();
FUNCTION_TEST_RETURN_VOID(); FUNCTION_TEST_RETURN_VOID();
} }
@ -814,43 +1245,122 @@ verifyAddInvalidWalFile(List *walRangeList, VerifyResult fileResult, String *fil
Render the results of the verify command Render the results of the verify command
***********************************************************************************************************************************/ ***********************************************************************************************************************************/
static String * static String *
verifyRender(List *archiveIdResultList) verifyRender(List *archiveIdResultList, List *backupResultList)
{ {
FUNCTION_TEST_BEGIN(); FUNCTION_TEST_BEGIN();
FUNCTION_TEST_PARAM(LIST, archiveIdResultList); // Result list for all archive Ids in the repo FUNCTION_TEST_PARAM(LIST, archiveIdResultList); // Result list for all archive Ids in the repo
FUNCTION_TEST_PARAM(LIST, backupResultList); // Result list for all backups in the repo
FUNCTION_TEST_END(); FUNCTION_TEST_END();
ASSERT(archiveIdResultList != NULL); ASSERT(archiveIdResultList != NULL);
ASSERT(backupResultList != NULL);
String *result = strNew("Results:\n"); String *result = strNew("Results:");
for (unsigned int archiveIdx = 0; archiveIdx < lstSize(archiveIdResultList); archiveIdx++) // Render archive results
if (lstSize(archiveIdResultList) == 0)
strCatZ(result, "\n archiveId: none found");
else
{ {
VerifyArchiveResult *archiveIdResult = lstGet(archiveIdResultList, archiveIdx); for (unsigned int archiveIdx = 0; archiveIdx < lstSize(archiveIdResultList); archiveIdx++)
strCatFmt(
result, "%s archiveId: %s, total WAL checked: %u, total valid WAL: %u", (archiveIdx > 0 ? "\n" : ""),
strZ(archiveIdResult->archiveId), archiveIdResult->totalWalFile, archiveIdResult->totalValidWal);
if (archiveIdResult->totalWalFile > 0)
{ {
unsigned int errMissing = 0; VerifyArchiveResult *archiveIdResult = lstGet(archiveIdResultList, archiveIdx);
unsigned int errChecksum = 0; strCatFmt(
unsigned int errSize = 0; result, "\n archiveId: %s, total WAL checked: %u, total valid WAL: %u", strZ(archiveIdResult->archiveId),
unsigned int errOther = 0; archiveIdResult->totalWalFile, archiveIdResult->totalValidWal);
for (unsigned int walIdx = 0; walIdx < lstSize(archiveIdResult->walRangeList); walIdx++) if (archiveIdResult->totalWalFile > 0)
{ {
VerifyWalRange *walRange = lstGet(archiveIdResult->walRangeList, walIdx); unsigned int errMissing = 0;
unsigned int errChecksum = 0;
unsigned int errSize = 0;
unsigned int errOther = 0;
LOG_DETAIL_FMT( for (unsigned int walIdx = 0; walIdx < lstSize(archiveIdResult->walRangeList); walIdx++)
"archiveId: %s, wal start: %s, wal stop: %s", strZ(archiveIdResult->archiveId), strZ(walRange->start),
strZ(walRange->stop));
unsigned int invalidIdx = 0;
while (invalidIdx < lstSize(walRange->invalidFileList))
{ {
VerifyInvalidFile *invalidFile = lstGet(walRange->invalidFileList, invalidIdx); VerifyWalRange *walRange = lstGet(archiveIdResult->walRangeList, walIdx);
LOG_DETAIL_FMT(
"archiveId: %s, wal start: %s, wal stop: %s", strZ(archiveIdResult->archiveId), strZ(walRange->start),
strZ(walRange->stop));
unsigned int invalidIdx = 0;
while (invalidIdx < lstSize(walRange->invalidFileList))
{
VerifyInvalidFile *invalidFile = lstGet(walRange->invalidFileList, invalidIdx);
if (invalidFile->reason == verifyFileMissing)
errMissing++;
else if (invalidFile->reason == verifyChecksumMismatch)
errChecksum++;
else if (invalidFile->reason == verifySizeInvalid)
errSize++;
else
errOther++;
invalidIdx++;
}
}
strCatFmt(
result, "\n missing: %u, checksum invalid: %u, size invalid: %u, other: %u", errMissing, errChecksum,
errSize, errOther);
}
}
}
// Render backup results
if (lstSize(backupResultList) == 0)
strCatZ(result, "\n backup: none found");
else
{
for (unsigned int backupIdx = 0; backupIdx < lstSize(backupResultList); backupIdx++)
{
VerifyBackupResult *backupResult = lstGet(backupResultList, backupIdx);
String *status = NULL;
switch (backupResult->status)
{
case backupValid:
{
status = strNew("valid");
break;
}
case backupInvalid:
{
status = strNew("invalid");
break;
}
case backupMissingManifest:
{
status = strNew("manifest missing");
break;
}
case backupInProgress:
{
status = strNew("in-progress");
break;
}
}
strCatFmt(
result, "\n backup: %s, status: %s, total files checked: %u, total valid files: %u",
strZ(backupResult->backupLabel), strZ(status), backupResult->totalFileVerify, backupResult->totalFileValid);
if (backupResult->totalFileVerify > 0)
{
unsigned int errMissing = 0;
unsigned int errChecksum = 0;
unsigned int errSize = 0;
unsigned int errOther = 0;
for (unsigned int invalidIdx = 0; invalidIdx < lstSize(backupResult->invalidFileList); invalidIdx++)
{
VerifyInvalidFile *invalidFile = lstGet(backupResult->invalidFileList, invalidIdx);
if (invalidFile->reason == verifyFileMissing) if (invalidFile->reason == verifyFileMissing)
errMissing++; errMissing++;
@ -860,15 +1370,12 @@ verifyRender(List *archiveIdResultList)
errSize++; errSize++;
else else
errOther++; errOther++;
invalidIdx++;
} }
}
strCatFmt( strCatFmt(
result, result, "\n missing: %u, checksum invalid: %u, size invalid: %u, other: %u", errMissing, errChecksum,
"\n missing: %u, checksum invalid: %u, size invalid: %u, other: %u", errSize, errOther);
errMissing, errChecksum, errSize, errOther); }
} }
} }
@ -940,18 +1447,20 @@ verifyProcess(unsigned int *errorTotal)
.walPathList = NULL, .walPathList = NULL,
.walFileList = strLstNew(), .walFileList = strLstNew(),
.pgHistory = infoArchivePg(archiveInfo), .pgHistory = infoArchivePg(archiveInfo),
.manifestCipherPass = infoPgCipherPass(infoBackupPg(backupInfo)),
.walCipherPass = infoPgCipherPass(infoArchivePg(archiveInfo)), .walCipherPass = infoPgCipherPass(infoArchivePg(archiveInfo)),
.archiveIdResultList = lstNewP(sizeof(VerifyArchiveResult), .comparator = archiveIdComparator), .archiveIdResultList = lstNewP(sizeof(VerifyArchiveResult), .comparator = archiveIdComparator),
.backupResultList = lstNewP(sizeof(VerifyBackupResult), .comparator = lstComparatorStr),
}; };
// Get a list of backups in the repo // Get a list of backups in the repo sorted ascending
jobData.backupList = strLstSort( jobData.backupList = strLstSort(
storageListP( storageListP(
storage, STORAGE_REPO_BACKUP_STR, storage, STORAGE_REPO_BACKUP_STR,
.expression = backupRegExpP(.full = true, .differential = true, .incremental = true)), .expression = backupRegExpP(.full = true, .differential = true, .incremental = true)),
sortOrderAsc); sortOrderAsc);
// Get a list of archive Ids in the repo (e.g. 9.4-1, 10-2, etc) sorted by the db-id (number after the dash) // Get a list of archive Ids in the repo (e.g. 9.4-1, 10-2, etc) sorted ascending by the db-id (number after the dash)
jobData.archiveIdList = strLstSort( jobData.archiveIdList = strLstSort(
strLstComparatorSet( strLstComparatorSet(
storageListP(storage, STORAGE_REPO_ARCHIVE_STR, .expression = STRDEF(REGEX_ARCHIVE_DIR_DB_VERSION)), storageListP(storage, STORAGE_REPO_ARCHIVE_STR, .expression = STRDEF(REGEX_ARCHIVE_DIR_DB_VERSION)),
@ -966,6 +1475,10 @@ verifyProcess(unsigned int *errorTotal)
if (strLstSize(jobData.archiveIdList) == 0 || strLstSize(jobData.backupList) == 0) if (strLstSize(jobData.archiveIdList) == 0 || strLstSize(jobData.backupList) == 0)
LOG_WARN_FMT("no %s exist in the repo", strLstSize(jobData.archiveIdList) == 0 ? "archives" : "backups"); LOG_WARN_FMT("no %s exist in the repo", strLstSize(jobData.archiveIdList) == 0 ? "archives" : "backups");
// If there are no archives to process, then set the processing flag to skip to processing the backups
if (strLstSize(jobData.archiveIdList) == 0)
jobData.backupProcessing = true;
// Set current backup if there is one and verify the archive history on disk is in the database history // Set current backup if there is one and verify the archive history on disk is in the database history
jobData.currentBackup = verifySetBackupCheckArchive( jobData.currentBackup = verifySetBackupCheckArchive(
jobData.backupList, backupInfo, jobData.archiveIdList, jobData.pgHistory, &jobData.jobErrorTotal); jobData.backupList, backupInfo, jobData.archiveIdList, jobData.pgHistory, &jobData.jobErrorTotal);
@ -989,33 +1502,68 @@ verifyProcess(unsigned int *errorTotal)
ProtocolParallelJob *job = protocolParallelResult(parallelExec); ProtocolParallelJob *job = protocolParallelResult(parallelExec);
unsigned int processId = protocolParallelJobProcessId(job); unsigned int processId = protocolParallelJobProcessId(job);
StringList *filePathLst = strLstNewSplit(varStr(protocolParallelJobKey(job)), FSLASH_STR); StringList *filePathLst = strLstNewSplit(varStr(protocolParallelJobKey(job)), FSLASH_STR);
// Remove the result and file type identifier and recreate the path file name
const String *resultId = strLstGet(filePathLst, 0);
strLstRemoveIdx(filePathLst, 0);
const String *fileType = strLstGet(filePathLst, 0);
strLstRemoveIdx(filePathLst, 0); strLstRemoveIdx(filePathLst, 0);
String *filePathName = strLstJoin(filePathLst, "/"); String *filePathName = strLstJoin(filePathLst, "/");
// Initialize the result sets
VerifyArchiveResult *archiveIdResult = NULL; VerifyArchiveResult *archiveIdResult = NULL;
VerifyBackupResult *backupResult = NULL;
// Find the archiveId in the list - assert if not found since this should never happen // Get archiveId result data
String *archiveId = strLstGet(filePathLst, 0); if (strEq(fileType, STORAGE_REPO_ARCHIVE_STR))
unsigned int index = lstFindIdx(jobData.archiveIdResultList, &archiveId); {
ASSERT(index != LIST_NOT_FOUND); // Find the archiveId in the list - assert if not found since this should never happen
unsigned int index = lstFindIdx(jobData.archiveIdResultList, &resultId);
ASSERT(index != LIST_NOT_FOUND);
archiveIdResult = lstGet(jobData.archiveIdResultList, index); archiveIdResult = lstGet(jobData.archiveIdResultList, index);
}
// Else get the backup result data
else
{
unsigned int index = lstFindIdx(jobData.backupResultList, &resultId);
ASSERT(index != LIST_NOT_FOUND);
backupResult = lstGet(jobData.backupResultList, index);
}
// The job was successful // The job was successful
if (protocolParallelJobErrorCode(job) == 0) if (protocolParallelJobErrorCode(job) == 0)
{ {
const VerifyResult verifyResult = (VerifyResult)varUIntForce(protocolParallelJobResult(job)); const VerifyResult verifyResult = (VerifyResult)varUIntForce(protocolParallelJobResult(job));
if (verifyResult == verifyOk) // Update the result set for the type of file being processed
archiveIdResult->totalValidWal++; if (strEq(fileType, STORAGE_REPO_ARCHIVE_STR))
{
if (verifyResult == verifyOk)
archiveIdResult->totalValidWal++;
else
{
jobData.jobErrorTotal += verifyLogInvalidResult(
fileType, verifyResult, processId, filePathName);
// Add invalid file to the WAL range
verifyAddInvalidWalFile(
archiveIdResult->walRangeList, verifyResult, filePathName,
strSubN(strLstGet(filePathLst, strLstSize(filePathLst) - 1), 0, WAL_SEGMENT_NAME_SIZE));
}
}
else else
{ {
jobData.jobErrorTotal += verifyLogInvalidResult(verifyResult, processId, filePathName); if (verifyResult == verifyOk)
backupResult->totalFileValid++;
// Add invalid file with reason from result of verifyFile to range list else
verifyAddInvalidWalFile( {
archiveIdResult->walRangeList, verifyResult, filePathName, jobData.jobErrorTotal += verifyLogInvalidResult(
strSubN(strLstGet(filePathLst, strLstSize(filePathLst) - 1), 0, WAL_SEGMENT_NAME_SIZE)); fileType, verifyResult, processId, filePathName);
backupResult->status = backupInvalid;
verifyInvalidFileAdd(backupResult->invalidFileList, verifyResult, filePathName);
}
} }
} }
// Else the job errored // Else the job errored
@ -1029,10 +1577,26 @@ verifyProcess(unsigned int *errorTotal)
jobData.jobErrorTotal++; jobData.jobErrorTotal++;
// Add invalid file with "OtherError" reason to range list // Add invalid file with "OtherError" reason to invalid file list
verifyAddInvalidWalFile( if (strEq(fileType, STORAGE_REPO_ARCHIVE_STR))
archiveIdResult->walRangeList, verifyOtherError, filePathName, {
strSubN(strLstGet(filePathLst, strLstSize(filePathLst) - 1), 0, WAL_SEGMENT_NAME_SIZE)); // Add invalid file to the WAL range
verifyAddInvalidWalFile(
archiveIdResult->walRangeList, verifyOtherError, filePathName,
strSubN(strLstGet(filePathLst, strLstSize(filePathLst) - 1), 0, WAL_SEGMENT_NAME_SIZE));
}
else
{
backupResult->status = backupInvalid;
verifyInvalidFileAdd(backupResult->invalidFileList, verifyOtherError, filePathName);
}
}
// Set backup verification complete for a backup if all files have run through verification
if (strEq(fileType, STORAGE_REPO_BACKUP_STR) &&
backupResult->totalFileVerify == backupResult->totalFileManifest)
{
backupResult->fileVerifyComplete = true;
} }
// Free the job // Free the job
@ -1044,8 +1608,7 @@ verifyProcess(unsigned int *errorTotal)
// ??? Need to do the final reconciliation - checking backup required WAL against, valid WAL // ??? Need to do the final reconciliation - checking backup required WAL against, valid WAL
// Report results // Report results
if (lstSize(jobData.archiveIdResultList) > 0) resultStr = verifyRender(jobData.archiveIdResultList, jobData.backupResultList);
resultStr = verifyRender(jobData.archiveIdResultList);
} }
else else
LOG_WARN("no archives or backups exist in the repo"); LOG_WARN("no archives or backups exist in the repo");

View File

@ -712,7 +712,7 @@ unit:
# ---------------------------------------------------------------------------------------------------------------------------- # ----------------------------------------------------------------------------------------------------------------------------
- name: verify - name: verify
total: 6 total: 8
binReq: true binReq: true
coverage: coverage:

File diff suppressed because it is too large Load Diff