1
0
mirror of https://github.com/pgbackrest/pgbackrest.git synced 2026-05-22 10:15:16 +02:00

Detect files that have not changed during non-delta incremental backup.

02eea55 added code to load a buffer of data from a file being backup up to detect files that have been truncated to zero after manifest generation. This mechanism can also be used to detect files that have not changed since the prior backup.

If the result of the file copy fits into a single buffer, then the size and checksum can be compared to the prior file before anything gets stored. If the file matches then it is referenced to the file in to prior backup.

The size that can be compared for normal copies is limited by the buffer size but for block incremental it works with any size file since there is no output from block incremental when the file is identical.
This commit is contained in:
David Steele
2024-03-08 15:07:43 +13:00
committed by GitHub
parent cf17515e40
commit 9d91d1b2f8
10 changed files with 113 additions and 17 deletions
+11
View File
@@ -30,6 +30,17 @@
</release-bug-list>
<release-improvement-list>
<release-item>
<github-pull-request id="2257"/>
<release-item-contributor-list>
<release-item-contributor id="david.steele"/>
<release-item-reviewer id="stephen.frost"/>
</release-item-contributor-list>
<p>Detect files that have not changed during non-delta incremental backup.</p>
</release-item>
<release-item>
<github-pull-request id="2277"/>
+1
View File
@@ -2006,6 +2006,7 @@ backupJobCallback(void *const data, const unsigned int clientIdx)
pckWriteBoolP(param, file.delta);
pckWriteBoolP(param, !strEq(file.name, STRDEF(MANIFEST_TARGET_PGDATA "/" PG_PATH_GLOBAL "/" PG_FILE_PGCONTROL)));
pckWriteU64P(param, file.size);
pckWriteU64P(param, file.sizePrior);
pckWriteBoolP(param, !backupProcessFilePrimary(jobData->standbyExp, file.name));
pckWriteBinP(param, file.checksumSha1 != NULL ? BUF(file.checksumSha1, HASH_TYPE_SHA1_SIZE) : NULL);
pckWriteBoolP(param, file.checksumPage);
+9 -3
View File
@@ -47,6 +47,7 @@ typedef struct BlockIncr
const BlockMap *blockMapPrior; // Prior block map
BlockMap *blockMapOut; // Output block map
uint64_t blockMapOutSize; // Output block map size (if any)
bool blockMapWrite; // Write block map (at least one new/changed block)
size_t inputOffset; // Input offset
bool inputSame; // Input the same data
@@ -175,7 +176,7 @@ blockIncrProcess(THIS_VOID, const Buffer *const input, Buffer *const output)
memcpy(blockMapItem.checksum, bufPtrConst(checksum), bufUsed(checksum));
unsigned int blockMapItemIdx = blockMapSize(this->blockMapOut);
const unsigned int blockMapItemIdx = blockMapSize(this->blockMapOut);
blockMapAdd(this->blockMapOut, &blockMapItem);
lstAdd(this->blockOutList, &blockMapItemIdx);
@@ -226,10 +227,15 @@ blockIncrProcess(THIS_VOID, const Buffer *const input, Buffer *const output)
// Reset block out size and super block no
this->blockOutSize = 0;
this->superBlockNo = 0;
// Block map must be written since there are new/changed blocks
this->blockMapWrite = true;
}
// Write the block map if done processing and at least one block was written
if (this->done && this->blockOutOffset == 0 && this->blockNo > 0)
// Write the block map if done processing and there are new/changed blocks or block list has been truncated
if (this->done && this->blockOutOffset == 0 &&
(this->blockMapWrite ||
(this->blockMapPrior != NULL && blockMapSize(this->blockMapOut) < blockMapSize(this->blockMapPrior))))
{
MEM_CONTEXT_TEMP_BEGIN()
{
+25 -2
View File
@@ -301,8 +301,8 @@ backupFile(
Buffer *const buffer = bufNew(ioBufferSize());
bool readEof = false;
// Read the first buffer to determine if the file was truncated. Detecting truncation matters only when
// bundling is enabled as otherwise the file will be stored anyway.
// Read the first buffer to determine if the file was truncated or was not changed. Detecting truncation
// matters only when bundling is enabled as otherwise the file will be stored anyway.
ioRead(readIo, buffer);
if (ioReadEof(readIo))
@@ -328,6 +328,26 @@ backupFile(
pckReadBinP(
ioFilterGroupResultP(ioReadFilterGroup(readIo), CRYPTO_HASH_FILTER_TYPE, .idx = 0))));
}
// Else check if size is equal to prior size
else if (file->manifestFileHasReference && fileResult->copySize == file->pgFileSizePrior)
{
const Buffer *const copyChecksum = pckReadBinP(
ioFilterGroupResultP(ioReadFilterGroup(readIo), CRYPTO_HASH_FILTER_TYPE));
// If checksum is also equal then no need to copy the file
if (bufEq(file->pgFileChecksum, copyChecksum))
{
// If block incremental make sure no map was returned but a prior map was provided
ASSERT(
file->blockIncrSize == 0 ||
(pckReadU64P(
ioFilterGroupResultP(ioReadFilterGroup(readIo), BLOCK_INCR_FILTER_TYPE)) == 0 &&
file->blockIncrMapPriorFile != NULL));
fileResult->backupCopyResult = backupCopyResultNoOp;
fileResult->copyChecksum = file->pgFileChecksum;
}
}
}
// Copy the file
@@ -391,6 +411,9 @@ backupFile(
{
fileResult->blockIncrMapSize = pckReadU64P(
ioFilterGroupResultP(ioReadFilterGroup(readIo), BLOCK_INCR_FILTER_TYPE));
// There must be a map because the file should have changed or shrunk
ASSERT(fileResult->blockIncrMapSize > 0);
}
// Get repo checksum
+1
View File
@@ -31,6 +31,7 @@ typedef struct BackupFile
bool pgFileDelta; // Checksum pg file before copying
bool pgFileIgnoreMissing; // Ignore missing pg file
uint64_t pgFileSize; // Expected pg file size
uint64_t pgFileSizePrior; // Prior pg file size (if manifestFileHasReference)
bool pgFileCopyExactSize; // Copy only pg expected size
const Buffer *pgFileChecksum; // Expected pg file checksum
bool pgFileChecksumPage; // Validate page checksums?
+1
View File
@@ -49,6 +49,7 @@ backupFileProtocol(PackRead *const param, ProtocolServer *const server)
file.pgFileDelta = pckReadBoolP(param);
file.pgFileIgnoreMissing = pckReadBoolP(param);
file.pgFileSize = pckReadU64P(param);
file.pgFileSizePrior = pckReadU64P(param);
file.pgFileCopyExactSize = pckReadBoolP(param);
file.pgFileChecksum = pckReadBinP(param);
file.pgFileChecksumPage = pckReadBoolP(param);
+20 -2
View File
@@ -104,6 +104,7 @@ typedef enum
manifestFilePackFlagBlockIncr,
manifestFilePackFlagCopy,
manifestFilePackFlagDelta,
manifestFilePackFlagSizePrior,
manifestFilePackFlagResume,
manifestFilePackFlagChecksumPage,
manifestFilePackFlagChecksumPageError,
@@ -146,6 +147,9 @@ manifestFilePack(const Manifest *const manifest, const ManifestFile *const file)
if (file->delta)
flag |= 1 << manifestFilePackFlagDelta;
if (file->reference && file->sizePrior != 0)
flag |= 1 << manifestFilePackFlagSizePrior;
if (file->resume)
flag |= 1 << manifestFilePackFlagResume;
@@ -192,6 +196,10 @@ manifestFilePack(const Manifest *const manifest, const ManifestFile *const file)
if (flag & (1 << manifestFilePackFlagSizeOriginal))
cvtUInt64ToVarInt128(file->sizeOriginal, buffer, &bufferPos, sizeof(buffer));
// Prior size
if (flag & (1 << manifestFilePackFlagSizePrior))
cvtUInt64ToVarInt128(cvtInt64ToZigZag((int64_t)file->size - (int64_t)file->sizePrior), buffer, &bufferPos, sizeof(buffer));
// Use the first timestamp that appears as the base for all other timestamps. Ideally we would like a timestamp as close to the
// middle as possible but it doesn't seem worth doing the calculation.
if (manifestPackBaseTime == -1)
@@ -322,6 +330,14 @@ manifestFileUnpack(const Manifest *const manifest, const ManifestFilePack *const
else
result.sizeOriginal = result.size;
// Prior size
if (flag & (1 << manifestFilePackFlagSizePrior))
{
result.sizePrior =
(uint64_t)
((int64_t)result.size - cvtInt64FromZigZag(cvtUInt64FromVarInt128((const uint8_t *)filePack, &bufferPos, UINT_MAX)));
}
// Timestamp
result.timestamp =
manifestPackBaseTime - (time_t)cvtInt64FromZigZag(cvtUInt64FromVarInt128((const uint8_t *)filePack, &bufferPos, UINT_MAX));
@@ -1711,9 +1727,11 @@ manifestBuildIncr(Manifest *this, const Manifest *manifestPrior, BackupType type
ASSERT(!file.delta || fileSizeEqual);
// Preserve values if the file will not be copied, is possibly equal to the prior file, or will be stored with
// block incremental and the prior file is also stored with block incremental
if (!file.copy || file.delta || fileBlockIncrPreserve)
// block incremental and the prior file is also stored with block incremental. If the file will not be copied
// then the file size must be equal to the prior file so there is no need to check that condition separately.
if (fileSizeEqual || fileBlockIncrPreserve)
{
file.sizePrior = filePrior.size;
file.sizeRepo = filePrior.sizeRepo;
file.checksumSha1 = filePrior.checksumSha1;
file.checksumRepoSha1 = filePrior.checksumRepoSha1;
+1
View File
@@ -159,6 +159,7 @@ typedef struct ManifestFile
uint64_t blockIncrMapSize; // Block incremental map size
uint64_t size; // Final size (after copy)
uint64_t sizeOriginal; // Original size (from manifest build)
uint64_t sizePrior; // Prior size (valid if reference is set, backup only)
uint64_t sizeRepo; // Size in repo
time_t timestamp; // Original timestamp
} ManifestFile;
+34 -9
View File
@@ -1240,6 +1240,29 @@ testRun(void)
" block {no: 0, offset: 6}\n",
"check delta");
// -------------------------------------------------------------------------------------------------------------------------
TEST_TITLE("diff/incr backup with identical data");
ioBufferSizeSet(3);
source = BUFSTRZ("ACCXYZ123@");
destination = bufNew(256);
write = ioBufferWriteNew(destination);
TEST_RESULT_VOID(
ioFilterGroupAdd(ioWriteFilterGroup(write), ioBufferNew()), "buffer to force internal buffer size");
TEST_RESULT_VOID(
ioFilterGroupAdd(
ioWriteFilterGroup(write), blockIncrNewPack(ioFilterParamList(blockIncrNew(3, 3, 8, 3, 0, 0, map, NULL, NULL)))),
"block incr");
TEST_RESULT_VOID(ioWriteOpen(write), "open");
TEST_RESULT_VOID(ioWrite(write, source), "write");
TEST_RESULT_VOID(ioWriteClose(write), "close");
TEST_ASSIGN(mapSize, pckReadU64P(ioFilterGroupResultP(ioWriteFilterGroup(write), BLOCK_INCR_FILTER_TYPE)), "map size");
TEST_RESULT_UINT(mapSize, 0, "map size is zero");
TEST_RESULT_UINT(bufUsed(destination), 0, "repo size is zero");
// -------------------------------------------------------------------------------------------------------------------------
TEST_TITLE("full backup with larger super block");
@@ -3382,14 +3405,16 @@ testRun(void)
"P01 DETAIL: backup file " TEST_PATH "/pg1/block-incr-larger (1.4MB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/block-incr-grow (128KB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: store truncated file " TEST_PATH "/pg1/truncate-to-zero (4B->0B, [PCT])\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/normal-same (bundle 1/0, 4B, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/grow-to-block-incr (bundle 1/4, 16KB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/global/pg_control (bundle 1/16416, 8KB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/block-incr-shrink-block (bundle 1/24608, 8KB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/block-incr-shrink-below (bundle 1/24625, 8B, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/block-incr-shrink (bundle 1/24633, 16.0KB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/block-incr-same (bundle 1/32859, 16KB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: match file from prior backup " TEST_PATH "/pg1/normal-same (4B, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/grow-to-block-incr (bundle 1/0, 16KB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/global/pg_control (bundle 1/16411, 8KB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/block-incr-shrink-block (bundle 1/24603, 8KB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/block-incr-shrink-below (bundle 1/24620, 8B, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/block-incr-shrink (bundle 1/24628, 16.0KB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: match file from prior backup " TEST_PATH "/pg1/block-incr-same (16KB, [PCT]) checksum [SHA1]\n"
"P00 DETAIL: reference pg_data/PG_VERSION to 20191103-165320F\n"
"P00 DETAIL: reference pg_data/block-incr-same to 20191103-165320F\n"
"P00 DETAIL: reference pg_data/normal-same to 20191103-165320F\n"
"P00 INFO: execute non-exclusive backup stop and wait for all WAL segments to archive\n"
"P00 INFO: backup stop archive = 0000000105DC213000000001, lsn = 5dc2130/300000\n"
"P00 DETAIL: wrote 'backup_label' file returned from backup stop function\n"
@@ -3400,17 +3425,17 @@ testRun(void)
TEST_RESULT_STR_Z(
testBackupValidateP(storageRepo(), STRDEF(STORAGE_REPO_BACKUP "/latest")),
".> {d=20191103-165320F_20191106-002640D}\n"
"bundle/1/pg_data/block-incr-same {s=16384, m=0:{0,1}}\n"
"bundle/1/pg_data/block-incr-shrink {s=16383, m=0:{0},1:{0}}\n"
"bundle/1/pg_data/block-incr-shrink-below {s=8}\n"
"bundle/1/pg_data/block-incr-shrink-block {s=8192, m=0:{0}}\n"
"bundle/1/pg_data/global/pg_control {s=8192}\n"
"bundle/1/pg_data/grow-to-block-incr {s=16385, m=1:{0,1,2}}\n"
"bundle/1/pg_data/normal-same {s=4}\n"
"pg_data/backup_label {s=17, ts=+2}\n"
"pg_data/block-incr-grow.pgbi {s=131072, m=0:{0},1:{0},0:{2},1:{1,2,3,4,5,6,7,8,9,10,11,12,13}}\n"
"pg_data/block-incr-larger.pgbi {s=1507328, m=1:{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15},1:{0,1,2,3,4,5,6}}\n"
"20191103-165320F/bundle/1/pg_data/PG_VERSION {s=2, ts=-200000}\n"
"20191103-165320F/bundle/1/pg_data/block-incr-same {s=16384, m=0:{0,1}}\n"
"20191103-165320F/bundle/1/pg_data/normal-same {s=4}\n"
"--------\n"
"[backup:target]\n"
"pg_data={\"path\":\"" TEST_PATH "/pg1\",\"type\":\"path\"}\n",
+10 -1
View File
@@ -1092,7 +1092,8 @@ testRun(void)
"[target:file]\n"
"pg_data/BOGUS={\"size\":6,\"timestamp\":1482182860}\n"
"pg_data/FILE3={\"reference\":\"20190101-010101F\",\"size\":0,\"timestamp\":1482182860}\n"
"pg_data/FILE4={\"size\":55,\"timestamp\":1482182861}\n"
"pg_data/FILE4={\"checksum\":\"ccccccccccaaaaaaaaaabbbbbbbbbbdddddddddd\",\"reference\":\"20190101-010101F\""
",\"size\":55,\"timestamp\":1482182861}\n"
"pg_data/PG_VERSION={\"checksum\":\"aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd\""
",\"reference\":\"20190101-010101F\",\"size\":4,\"timestamp\":1482182860}\n"
TEST_MANIFEST_FILE_DEFAULT
@@ -1369,6 +1370,14 @@ testRun(void)
TEST_RESULT_VOID(
manifestBuildIncr(manifest, manifestPrior, backupTypeIncr, STRDEF("000000030000000300000003")), "incremental manifest");
TEST_RESULT_UINT(
manifestFileFind(manifest, STRDEF(MANIFEST_TARGET_PGDATA "/block-incr-add")).sizePrior, 0, "check prior size");
TEST_RESULT_UINT(
manifestFileFind(manifest, STRDEF(MANIFEST_TARGET_PGDATA "/block-incr-sub")).sizePrior, 0, "check prior size");
TEST_RESULT_UINT(
manifestFileFind(manifest, STRDEF(MANIFEST_TARGET_PGDATA "/block-incr-keep-size")).sizePrior, 16384,
"check prior size");
contentSave = bufNew(0);
TEST_RESULT_VOID(manifestSave(manifest, ioBufferWriteNew(contentSave)), "save manifest");
TEST_RESULT_STR(