1
0
mirror of https://github.com/pgbackrest/pgbackrest.git synced 2025-01-18 04:58:51 +02:00

Skip files truncated during backup when bundling.

In bundle mode pgBackRest skips files of zero size, that is, it does not queue them for copying.

After splitting the files into bundles, pgBackRest launches one or more processes that directly perform the backup, namely, read the files and, if necessary, write them to the bundles.

If during the time between the distribution of all files among bundles and the direct copying of a file to a bundle, this file of non-zero size was truncated to zero size (for example, when the table was truncated), then pgBackRest still unconditionally places such a zero-size file in the bundle, taking up space in it equal to the size of the headings, and additionally writes the original file size to the manifest.

In debug build an assertion was added, that does not allow zero-size files to be written to bundles, which leads to an error.

To solve the problem, this patch, when reading the next file, loads one buffer from the file to detect if it is zero-size. If so it marks the file as truncated and continues on to the next file.

The advantages of the solution are that, firstly, the assert will not fire on debug builds, and secondly, we will not place zero-size files in bundles, which exactly corresponds to the specification.

The patch adds the backupCopyResultTruncate value to the BackupCopyResult enumeration to use it to indicate the result when a non-zero size file is truncated to zero size during the backup process.
This commit is contained in:
Georgy Shelkovy 2023-12-14 22:21:06 +05:00 committed by GitHub
parent 89d5278b74
commit 02eea555c7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 96 additions and 43 deletions

View File

@ -11,6 +11,17 @@
<p>Add support for alternate compile-time page sizes.</p>
</release-item>
<release-item>
<github-pull-request id="2227"/>
<release-item-contributor-list>
<release-item-contributor id="georgy.shelkovy"/>
<release-item-reviewer id="david.steele"/>
</release-item-contributor-list>
<p>Skip files truncated during backup when bundling.</p>
</release-item>
</release-improvement-list>
</release-core-list>
</release>

View File

@ -1439,7 +1439,7 @@ backupJobResult(
// Format log progress
String *const logProgress = strNew();
if (bundleId != 0 && copyResult != backupCopyResultNoOp)
if (bundleId != 0 && copyResult != backupCopyResultNoOp && copyResult != backupCopyResultTruncate)
strCatFmt(logProgress, "bundle %" PRIu64 "/%" PRIu64 ", ", bundleId, bundleOffset);
// Log original manifest size if copy size differs
@ -1492,7 +1492,9 @@ backupJobResult(
strZ(file.name), strZ(strNewEncode(encodingHex, BUF(file.checksumSha1, HASH_TYPE_SHA1_SIZE))));
}
LOG_DETAIL_PID_FMT(processId, "backup file %s (%s)%s", strZ(fileLog), strZ(logProgress), strZ(logChecksum));
LOG_DETAIL_PID_FMT(
processId, "%s file %s (%s)%s", copyResult != backupCopyResultTruncate ? "backup" : "store truncated",
strZ(fileLog), strZ(logProgress), strZ(logChecksum));
// If the file had page checksums calculated during the copy
ASSERT((!file.checksumPage && checksumPageResult == NULL) || (file.checksumPage && checksumPageResult != NULL));
@ -1576,7 +1578,8 @@ backupJobResult(
file.checksumPageError = checksumPageError;
file.checksumPageErrorList =
checksumPageErrorList != NULL ? jsonFromVar(varNewVarLst(checksumPageErrorList)) : NULL;
file.bundleId = bundleId;
// Truncated file is not put in bundle
file.bundleId = copyResult != backupCopyResultTruncate ? bundleId : 0;
file.bundleOffset = bundleOffset;
file.blockIncrMapSize = blockIncrMapSize;

View File

@ -295,61 +295,99 @@ backupFile(
// Add size filter last to calculate repo size
ioFilterGroupAdd(ioReadFilterGroup(readIo), ioSizeNew());
// Open the source and destination and copy the file
// Open the source
if (ioReadOpen(readIo))
{
// Setup the repo file for write. There is no need to write the file atomically (e.g. via a temp file on
// Posix) because checksums are tested on resume after a failed backup. The path does not need to be synced
// for each file because all paths are synced at the end of the backup. It needs to be created in the prior
// context because it will live longer than a single loop when more than one file is being written.
if (write == NULL)
Buffer *const buffer = bufNew(ioBufferSize());
bool readEof = false;
// Read the first buffer to determine if the file was truncated. Detecting truncation matters only when
// bundling is enabled as otherwise the file will be stored anyway.
ioRead(readIo, buffer);
if (ioReadEof(readIo) && bundleId != 0)
{
MEM_CONTEXT_PRIOR_BEGIN()
{
write = storageNewWriteP(
storageRepoWrite(), repoFile, .compressible = compressible, .noAtomic = true,
.noSyncPath = true);
ioWriteOpen(storageWriteIo(write));
}
MEM_CONTEXT_PRIOR_END();
// Close the source and set eof
ioReadClose(readIo);
readEof = true;
// If the file is zero-length then it was truncated during the backup
if (pckReadU64P(ioFilterGroupResultP(ioReadFilterGroup(readIo), SIZE_FILTER_TYPE, .idx = 0)) == 0)
fileResult->backupCopyResult = backupCopyResultTruncate;
}
// Copy data from source to destination
ioCopyP(readIo, storageWriteIo(write));
// Copy the file in non-bundling mode or if the file is not zero-length
if (fileResult->backupCopyResult != backupCopyResultTruncate)
{
// Setup the repo file for write. There is no need to write the file atomically (e.g. via a temp file on
// Posix) because checksums are tested on resume after a failed backup. The path does not need to be
// synced for each file because all paths are synced at the end of the backup. It needs to be created in
// the prior context because it will live longer than a single loop when more than one file is being
// written.
if (write == NULL)
{
MEM_CONTEXT_PRIOR_BEGIN()
{
write = storageNewWriteP(
storageRepoWrite(), repoFile, .compressible = compressible, .noAtomic = true,
.noSyncPath = true);
ioWriteOpen(storageWriteIo(write));
}
MEM_CONTEXT_PRIOR_END();
}
// Close the source
ioReadClose(readIo);
// Write the first buffer
ioWrite(storageWriteIo(write), buffer);
bufFree(buffer);
// Copy remainder of the file if not eof
if (!readEof)
{
ioCopyP(readIo, storageWriteIo(write));
// Close the source
ioReadClose(readIo);
}
}
MEM_CONTEXT_BEGIN(lstMemContext(result))
{
// Get sizes and checksum
// Get size and checksum
fileResult->copySize = pckReadU64P(
ioFilterGroupResultP(ioReadFilterGroup(readIo), SIZE_FILTER_TYPE, .idx = 0));
fileResult->bundleOffset = bundleOffset;
fileResult->copyChecksum = pckReadBinP(
ioFilterGroupResultP(ioReadFilterGroup(readIo), CRYPTO_HASH_FILTER_TYPE, .idx = 0));
fileResult->repoSize = pckReadU64P(
ioFilterGroupResultP(ioReadFilterGroup(readIo), SIZE_FILTER_TYPE, .idx = 1));
// Get results of page checksum validation
if (file->pgFileChecksumPage)
// If the file is not bundled or not zero-length then it was copied
if (fileResult->backupCopyResult != backupCopyResultTruncate)
{
fileResult->pageChecksumResult = pckDup(
ioFilterGroupResultPackP(ioReadFilterGroup(readIo), PAGE_CHECKSUM_FILTER_TYPE));
}
// Get bundle offset
fileResult->bundleOffset = bundleOffset;
// Get results of block incremental
if (file->blockIncrSize != 0)
{
fileResult->blockIncrMapSize = pckReadU64P(
ioFilterGroupResultP(ioReadFilterGroup(readIo), BLOCK_INCR_FILTER_TYPE));
}
// Get repo size
fileResult->repoSize = pckReadU64P(
ioFilterGroupResultP(ioReadFilterGroup(readIo), SIZE_FILTER_TYPE, .idx = 1));
// Get repo checksum
if (repoChecksum)
{
fileResult->repoChecksum = pckReadBinP(
ioFilterGroupResultP(ioReadFilterGroup(readIo), CRYPTO_HASH_FILTER_TYPE, .idx = 1));
// Get results of page checksum validation
if (file->pgFileChecksumPage)
{
fileResult->pageChecksumResult = pckDup(
ioFilterGroupResultPackP(ioReadFilterGroup(readIo), PAGE_CHECKSUM_FILTER_TYPE));
}
// Get results of block incremental
if (file->blockIncrSize != 0)
{
fileResult->blockIncrMapSize = pckReadU64P(
ioFilterGroupResultP(ioReadFilterGroup(readIo), BLOCK_INCR_FILTER_TYPE));
}
// Get repo checksum
if (repoChecksum)
{
fileResult->repoChecksum = pckReadBinP(
ioFilterGroupResultP(ioReadFilterGroup(readIo), CRYPTO_HASH_FILTER_TYPE, .idx = 1));
}
}
}
MEM_CONTEXT_END();

View File

@ -19,6 +19,7 @@ typedef enum
backupCopyResultReCopy,
backupCopyResultSkip,
backupCopyResultNoOp,
backupCopyResultTruncate,
} BackupCopyResult;
/***********************************************************************************************************************************

View File

@ -3050,6 +3050,7 @@ manifestFileUpdate(Manifest *const this, const ManifestFile *const file)
(!file->checksumPage && !file->checksumPageError && file->checksumPageErrorList == NULL) ||
(file->checksumPage && !file->checksumPageError && file->checksumPageErrorList == NULL) ||
(file->checksumPage && file->checksumPageError));
ASSERT(file->size != 0 || (file->bundleId == 0 && file->bundleOffset == 0));
ManifestFilePack **const filePack = manifestFilePackFindInternal(this, file->name);
manifestFilePackUpdate(this, filePack, file);

View File

@ -3608,7 +3608,7 @@ testRun(void)
"P00 INFO: check archive for segment 0000000105DC213000000000\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/block-incr-larger (1.4MB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/block-incr-grow (128KB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/truncate-to-zero (bundle 1/0, 4B->0B, [PCT])\n"
"P01 DETAIL: store truncated file " TEST_PATH "/pg1/truncate-to-zero (4B->0B, [PCT])\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/grow-to-block-incr (bundle 1/0, 16KB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/global/pg_control (bundle 1/16411, 8KB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/block-incr-shrink (bundle 1/24603, 16.0KB, [PCT]) checksum [SHA1]\n"
@ -3628,7 +3628,6 @@ testRun(void)
"bundle/1/pg_data/block-incr-shrink {file, s=16383}\n"
"bundle/1/pg_data/global/pg_control {file, s=8192}\n"
"bundle/1/pg_data/grow-to-block-incr {file, m=1:{0,1,2}, s=16385}\n"
"bundle/1/pg_data/truncate-to-zero {file, s=0}\n"
"pg_data {path}\n"
"pg_data/backup_label {file, s=17}\n"
"pg_data/block-incr-grow.pgbi {file, m=0:{0},1:{0},0:{2},1:{1,2,3,4,5,6,7,8,9,10,11,12,13}, s=131072}\n"