From 9d91d1b2f817b543fc8d970af56d1442c325ed7c Mon Sep 17 00:00:00 2001 From: David Steele Date: Fri, 8 Mar 2024 15:07:43 +1300 Subject: [PATCH] Detect files that have not changed during non-delta incremental backup. 02eea55 added code to load a buffer of data from a file being backup up to detect files that have been truncated to zero after manifest generation. This mechanism can also be used to detect files that have not changed since the prior backup. If the result of the file copy fits into a single buffer, then the size and checksum can be compared to the prior file before anything gets stored. If the file matches then it is referenced to the file in to prior backup. The size that can be compared for normal copies is limited by the buffer size but for block incremental it works with any size file since there is no output from block incremental when the file is identical. --- doc/xml/release/2024/2.51.xml | 11 +++++++ src/command/backup/backup.c | 1 + src/command/backup/blockIncr.c | 12 ++++++-- src/command/backup/file.c | 27 +++++++++++++++-- src/command/backup/file.h | 1 + src/command/backup/protocol.c | 1 + src/info/manifest.c | 22 ++++++++++++-- src/info/manifest.h | 1 + test/src/module/command/backupTest.c | 43 ++++++++++++++++++++++------ test/src/module/info/manifestTest.c | 11 ++++++- 10 files changed, 113 insertions(+), 17 deletions(-) diff --git a/doc/xml/release/2024/2.51.xml b/doc/xml/release/2024/2.51.xml index 17c9d220e..50dbff7f2 100644 --- a/doc/xml/release/2024/2.51.xml +++ b/doc/xml/release/2024/2.51.xml @@ -30,6 +30,17 @@ + + + + + + + + +

Detect files that have not changed during non-delta incremental backup.

+
+ diff --git a/src/command/backup/backup.c b/src/command/backup/backup.c index f8c25f67d..e34a8be17 100644 --- a/src/command/backup/backup.c +++ b/src/command/backup/backup.c @@ -2006,6 +2006,7 @@ backupJobCallback(void *const data, const unsigned int clientIdx) pckWriteBoolP(param, file.delta); pckWriteBoolP(param, !strEq(file.name, STRDEF(MANIFEST_TARGET_PGDATA "/" PG_PATH_GLOBAL "/" PG_FILE_PGCONTROL))); pckWriteU64P(param, file.size); + pckWriteU64P(param, file.sizePrior); pckWriteBoolP(param, !backupProcessFilePrimary(jobData->standbyExp, file.name)); pckWriteBinP(param, file.checksumSha1 != NULL ? BUF(file.checksumSha1, HASH_TYPE_SHA1_SIZE) : NULL); pckWriteBoolP(param, file.checksumPage); diff --git a/src/command/backup/blockIncr.c b/src/command/backup/blockIncr.c index 6cd23905b..fbdc111fd 100644 --- a/src/command/backup/blockIncr.c +++ b/src/command/backup/blockIncr.c @@ -47,6 +47,7 @@ typedef struct BlockIncr const BlockMap *blockMapPrior; // Prior block map BlockMap *blockMapOut; // Output block map uint64_t blockMapOutSize; // Output block map size (if any) + bool blockMapWrite; // Write block map (at least one new/changed block) size_t inputOffset; // Input offset bool inputSame; // Input the same data @@ -175,7 +176,7 @@ blockIncrProcess(THIS_VOID, const Buffer *const input, Buffer *const output) memcpy(blockMapItem.checksum, bufPtrConst(checksum), bufUsed(checksum)); - unsigned int blockMapItemIdx = blockMapSize(this->blockMapOut); + const unsigned int blockMapItemIdx = blockMapSize(this->blockMapOut); blockMapAdd(this->blockMapOut, &blockMapItem); lstAdd(this->blockOutList, &blockMapItemIdx); @@ -226,10 +227,15 @@ blockIncrProcess(THIS_VOID, const Buffer *const input, Buffer *const output) // Reset block out size and super block no this->blockOutSize = 0; this->superBlockNo = 0; + + // Block map must be written since there are new/changed blocks + this->blockMapWrite = true; } - // Write the block map if done processing and at least one block was written - if (this->done && this->blockOutOffset == 0 && this->blockNo > 0) + // Write the block map if done processing and there are new/changed blocks or block list has been truncated + if (this->done && this->blockOutOffset == 0 && + (this->blockMapWrite || + (this->blockMapPrior != NULL && blockMapSize(this->blockMapOut) < blockMapSize(this->blockMapPrior)))) { MEM_CONTEXT_TEMP_BEGIN() { diff --git a/src/command/backup/file.c b/src/command/backup/file.c index cd1f0c1e9..1297847f2 100644 --- a/src/command/backup/file.c +++ b/src/command/backup/file.c @@ -301,8 +301,8 @@ backupFile( Buffer *const buffer = bufNew(ioBufferSize()); bool readEof = false; - // Read the first buffer to determine if the file was truncated. Detecting truncation matters only when - // bundling is enabled as otherwise the file will be stored anyway. + // Read the first buffer to determine if the file was truncated or was not changed. Detecting truncation + // matters only when bundling is enabled as otherwise the file will be stored anyway. ioRead(readIo, buffer); if (ioReadEof(readIo)) @@ -328,6 +328,26 @@ backupFile( pckReadBinP( ioFilterGroupResultP(ioReadFilterGroup(readIo), CRYPTO_HASH_FILTER_TYPE, .idx = 0)))); } + // Else check if size is equal to prior size + else if (file->manifestFileHasReference && fileResult->copySize == file->pgFileSizePrior) + { + const Buffer *const copyChecksum = pckReadBinP( + ioFilterGroupResultP(ioReadFilterGroup(readIo), CRYPTO_HASH_FILTER_TYPE)); + + // If checksum is also equal then no need to copy the file + if (bufEq(file->pgFileChecksum, copyChecksum)) + { + // If block incremental make sure no map was returned but a prior map was provided + ASSERT( + file->blockIncrSize == 0 || + (pckReadU64P( + ioFilterGroupResultP(ioReadFilterGroup(readIo), BLOCK_INCR_FILTER_TYPE)) == 0 && + file->blockIncrMapPriorFile != NULL)); + + fileResult->backupCopyResult = backupCopyResultNoOp; + fileResult->copyChecksum = file->pgFileChecksum; + } + } } // Copy the file @@ -391,6 +411,9 @@ backupFile( { fileResult->blockIncrMapSize = pckReadU64P( ioFilterGroupResultP(ioReadFilterGroup(readIo), BLOCK_INCR_FILTER_TYPE)); + + // There must be a map because the file should have changed or shrunk + ASSERT(fileResult->blockIncrMapSize > 0); } // Get repo checksum diff --git a/src/command/backup/file.h b/src/command/backup/file.h index 14d794d3b..d3dcfdd3c 100644 --- a/src/command/backup/file.h +++ b/src/command/backup/file.h @@ -31,6 +31,7 @@ typedef struct BackupFile bool pgFileDelta; // Checksum pg file before copying bool pgFileIgnoreMissing; // Ignore missing pg file uint64_t pgFileSize; // Expected pg file size + uint64_t pgFileSizePrior; // Prior pg file size (if manifestFileHasReference) bool pgFileCopyExactSize; // Copy only pg expected size const Buffer *pgFileChecksum; // Expected pg file checksum bool pgFileChecksumPage; // Validate page checksums? diff --git a/src/command/backup/protocol.c b/src/command/backup/protocol.c index 5e9bb9416..15d835482 100644 --- a/src/command/backup/protocol.c +++ b/src/command/backup/protocol.c @@ -49,6 +49,7 @@ backupFileProtocol(PackRead *const param, ProtocolServer *const server) file.pgFileDelta = pckReadBoolP(param); file.pgFileIgnoreMissing = pckReadBoolP(param); file.pgFileSize = pckReadU64P(param); + file.pgFileSizePrior = pckReadU64P(param); file.pgFileCopyExactSize = pckReadBoolP(param); file.pgFileChecksum = pckReadBinP(param); file.pgFileChecksumPage = pckReadBoolP(param); diff --git a/src/info/manifest.c b/src/info/manifest.c index 9a2c6d8dd..a706b0ea4 100644 --- a/src/info/manifest.c +++ b/src/info/manifest.c @@ -104,6 +104,7 @@ typedef enum manifestFilePackFlagBlockIncr, manifestFilePackFlagCopy, manifestFilePackFlagDelta, + manifestFilePackFlagSizePrior, manifestFilePackFlagResume, manifestFilePackFlagChecksumPage, manifestFilePackFlagChecksumPageError, @@ -146,6 +147,9 @@ manifestFilePack(const Manifest *const manifest, const ManifestFile *const file) if (file->delta) flag |= 1 << manifestFilePackFlagDelta; + if (file->reference && file->sizePrior != 0) + flag |= 1 << manifestFilePackFlagSizePrior; + if (file->resume) flag |= 1 << manifestFilePackFlagResume; @@ -192,6 +196,10 @@ manifestFilePack(const Manifest *const manifest, const ManifestFile *const file) if (flag & (1 << manifestFilePackFlagSizeOriginal)) cvtUInt64ToVarInt128(file->sizeOriginal, buffer, &bufferPos, sizeof(buffer)); + // Prior size + if (flag & (1 << manifestFilePackFlagSizePrior)) + cvtUInt64ToVarInt128(cvtInt64ToZigZag((int64_t)file->size - (int64_t)file->sizePrior), buffer, &bufferPos, sizeof(buffer)); + // Use the first timestamp that appears as the base for all other timestamps. Ideally we would like a timestamp as close to the // middle as possible but it doesn't seem worth doing the calculation. if (manifestPackBaseTime == -1) @@ -322,6 +330,14 @@ manifestFileUnpack(const Manifest *const manifest, const ManifestFilePack *const else result.sizeOriginal = result.size; + // Prior size + if (flag & (1 << manifestFilePackFlagSizePrior)) + { + result.sizePrior = + (uint64_t) + ((int64_t)result.size - cvtInt64FromZigZag(cvtUInt64FromVarInt128((const uint8_t *)filePack, &bufferPos, UINT_MAX))); + } + // Timestamp result.timestamp = manifestPackBaseTime - (time_t)cvtInt64FromZigZag(cvtUInt64FromVarInt128((const uint8_t *)filePack, &bufferPos, UINT_MAX)); @@ -1711,9 +1727,11 @@ manifestBuildIncr(Manifest *this, const Manifest *manifestPrior, BackupType type ASSERT(!file.delta || fileSizeEqual); // Preserve values if the file will not be copied, is possibly equal to the prior file, or will be stored with - // block incremental and the prior file is also stored with block incremental - if (!file.copy || file.delta || fileBlockIncrPreserve) + // block incremental and the prior file is also stored with block incremental. If the file will not be copied + // then the file size must be equal to the prior file so there is no need to check that condition separately. + if (fileSizeEqual || fileBlockIncrPreserve) { + file.sizePrior = filePrior.size; file.sizeRepo = filePrior.sizeRepo; file.checksumSha1 = filePrior.checksumSha1; file.checksumRepoSha1 = filePrior.checksumRepoSha1; diff --git a/src/info/manifest.h b/src/info/manifest.h index e76c38034..653c42d85 100644 --- a/src/info/manifest.h +++ b/src/info/manifest.h @@ -159,6 +159,7 @@ typedef struct ManifestFile uint64_t blockIncrMapSize; // Block incremental map size uint64_t size; // Final size (after copy) uint64_t sizeOriginal; // Original size (from manifest build) + uint64_t sizePrior; // Prior size (valid if reference is set, backup only) uint64_t sizeRepo; // Size in repo time_t timestamp; // Original timestamp } ManifestFile; diff --git a/test/src/module/command/backupTest.c b/test/src/module/command/backupTest.c index 3a0753f25..e826c075b 100644 --- a/test/src/module/command/backupTest.c +++ b/test/src/module/command/backupTest.c @@ -1240,6 +1240,29 @@ testRun(void) " block {no: 0, offset: 6}\n", "check delta"); + // ------------------------------------------------------------------------------------------------------------------------- + TEST_TITLE("diff/incr backup with identical data"); + + ioBufferSizeSet(3); + + source = BUFSTRZ("ACCXYZ123@"); + destination = bufNew(256); + write = ioBufferWriteNew(destination); + + TEST_RESULT_VOID( + ioFilterGroupAdd(ioWriteFilterGroup(write), ioBufferNew()), "buffer to force internal buffer size"); + TEST_RESULT_VOID( + ioFilterGroupAdd( + ioWriteFilterGroup(write), blockIncrNewPack(ioFilterParamList(blockIncrNew(3, 3, 8, 3, 0, 0, map, NULL, NULL)))), + "block incr"); + TEST_RESULT_VOID(ioWriteOpen(write), "open"); + TEST_RESULT_VOID(ioWrite(write, source), "write"); + TEST_RESULT_VOID(ioWriteClose(write), "close"); + + TEST_ASSIGN(mapSize, pckReadU64P(ioFilterGroupResultP(ioWriteFilterGroup(write), BLOCK_INCR_FILTER_TYPE)), "map size"); + TEST_RESULT_UINT(mapSize, 0, "map size is zero"); + TEST_RESULT_UINT(bufUsed(destination), 0, "repo size is zero"); + // ------------------------------------------------------------------------------------------------------------------------- TEST_TITLE("full backup with larger super block"); @@ -3382,14 +3405,16 @@ testRun(void) "P01 DETAIL: backup file " TEST_PATH "/pg1/block-incr-larger (1.4MB, [PCT]) checksum [SHA1]\n" "P01 DETAIL: backup file " TEST_PATH "/pg1/block-incr-grow (128KB, [PCT]) checksum [SHA1]\n" "P01 DETAIL: store truncated file " TEST_PATH "/pg1/truncate-to-zero (4B->0B, [PCT])\n" - "P01 DETAIL: backup file " TEST_PATH "/pg1/normal-same (bundle 1/0, 4B, [PCT]) checksum [SHA1]\n" - "P01 DETAIL: backup file " TEST_PATH "/pg1/grow-to-block-incr (bundle 1/4, 16KB, [PCT]) checksum [SHA1]\n" - "P01 DETAIL: backup file " TEST_PATH "/pg1/global/pg_control (bundle 1/16416, 8KB, [PCT]) checksum [SHA1]\n" - "P01 DETAIL: backup file " TEST_PATH "/pg1/block-incr-shrink-block (bundle 1/24608, 8KB, [PCT]) checksum [SHA1]\n" - "P01 DETAIL: backup file " TEST_PATH "/pg1/block-incr-shrink-below (bundle 1/24625, 8B, [PCT]) checksum [SHA1]\n" - "P01 DETAIL: backup file " TEST_PATH "/pg1/block-incr-shrink (bundle 1/24633, 16.0KB, [PCT]) checksum [SHA1]\n" - "P01 DETAIL: backup file " TEST_PATH "/pg1/block-incr-same (bundle 1/32859, 16KB, [PCT]) checksum [SHA1]\n" + "P01 DETAIL: match file from prior backup " TEST_PATH "/pg1/normal-same (4B, [PCT]) checksum [SHA1]\n" + "P01 DETAIL: backup file " TEST_PATH "/pg1/grow-to-block-incr (bundle 1/0, 16KB, [PCT]) checksum [SHA1]\n" + "P01 DETAIL: backup file " TEST_PATH "/pg1/global/pg_control (bundle 1/16411, 8KB, [PCT]) checksum [SHA1]\n" + "P01 DETAIL: backup file " TEST_PATH "/pg1/block-incr-shrink-block (bundle 1/24603, 8KB, [PCT]) checksum [SHA1]\n" + "P01 DETAIL: backup file " TEST_PATH "/pg1/block-incr-shrink-below (bundle 1/24620, 8B, [PCT]) checksum [SHA1]\n" + "P01 DETAIL: backup file " TEST_PATH "/pg1/block-incr-shrink (bundle 1/24628, 16.0KB, [PCT]) checksum [SHA1]\n" + "P01 DETAIL: match file from prior backup " TEST_PATH "/pg1/block-incr-same (16KB, [PCT]) checksum [SHA1]\n" "P00 DETAIL: reference pg_data/PG_VERSION to 20191103-165320F\n" + "P00 DETAIL: reference pg_data/block-incr-same to 20191103-165320F\n" + "P00 DETAIL: reference pg_data/normal-same to 20191103-165320F\n" "P00 INFO: execute non-exclusive backup stop and wait for all WAL segments to archive\n" "P00 INFO: backup stop archive = 0000000105DC213000000001, lsn = 5dc2130/300000\n" "P00 DETAIL: wrote 'backup_label' file returned from backup stop function\n" @@ -3400,17 +3425,17 @@ testRun(void) TEST_RESULT_STR_Z( testBackupValidateP(storageRepo(), STRDEF(STORAGE_REPO_BACKUP "/latest")), ".> {d=20191103-165320F_20191106-002640D}\n" - "bundle/1/pg_data/block-incr-same {s=16384, m=0:{0,1}}\n" "bundle/1/pg_data/block-incr-shrink {s=16383, m=0:{0},1:{0}}\n" "bundle/1/pg_data/block-incr-shrink-below {s=8}\n" "bundle/1/pg_data/block-incr-shrink-block {s=8192, m=0:{0}}\n" "bundle/1/pg_data/global/pg_control {s=8192}\n" "bundle/1/pg_data/grow-to-block-incr {s=16385, m=1:{0,1,2}}\n" - "bundle/1/pg_data/normal-same {s=4}\n" "pg_data/backup_label {s=17, ts=+2}\n" "pg_data/block-incr-grow.pgbi {s=131072, m=0:{0},1:{0},0:{2},1:{1,2,3,4,5,6,7,8,9,10,11,12,13}}\n" "pg_data/block-incr-larger.pgbi {s=1507328, m=1:{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15},1:{0,1,2,3,4,5,6}}\n" "20191103-165320F/bundle/1/pg_data/PG_VERSION {s=2, ts=-200000}\n" + "20191103-165320F/bundle/1/pg_data/block-incr-same {s=16384, m=0:{0,1}}\n" + "20191103-165320F/bundle/1/pg_data/normal-same {s=4}\n" "--------\n" "[backup:target]\n" "pg_data={\"path\":\"" TEST_PATH "/pg1\",\"type\":\"path\"}\n", diff --git a/test/src/module/info/manifestTest.c b/test/src/module/info/manifestTest.c index a72146d7e..4b9aec48a 100644 --- a/test/src/module/info/manifestTest.c +++ b/test/src/module/info/manifestTest.c @@ -1092,7 +1092,8 @@ testRun(void) "[target:file]\n" "pg_data/BOGUS={\"size\":6,\"timestamp\":1482182860}\n" "pg_data/FILE3={\"reference\":\"20190101-010101F\",\"size\":0,\"timestamp\":1482182860}\n" - "pg_data/FILE4={\"size\":55,\"timestamp\":1482182861}\n" + "pg_data/FILE4={\"checksum\":\"ccccccccccaaaaaaaaaabbbbbbbbbbdddddddddd\",\"reference\":\"20190101-010101F\"" + ",\"size\":55,\"timestamp\":1482182861}\n" "pg_data/PG_VERSION={\"checksum\":\"aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd\"" ",\"reference\":\"20190101-010101F\",\"size\":4,\"timestamp\":1482182860}\n" TEST_MANIFEST_FILE_DEFAULT @@ -1369,6 +1370,14 @@ testRun(void) TEST_RESULT_VOID( manifestBuildIncr(manifest, manifestPrior, backupTypeIncr, STRDEF("000000030000000300000003")), "incremental manifest"); + TEST_RESULT_UINT( + manifestFileFind(manifest, STRDEF(MANIFEST_TARGET_PGDATA "/block-incr-add")).sizePrior, 0, "check prior size"); + TEST_RESULT_UINT( + manifestFileFind(manifest, STRDEF(MANIFEST_TARGET_PGDATA "/block-incr-sub")).sizePrior, 0, "check prior size"); + TEST_RESULT_UINT( + manifestFileFind(manifest, STRDEF(MANIFEST_TARGET_PGDATA "/block-incr-keep-size")).sizePrior, 16384, + "check prior size"); + contentSave = bufNew(0); TEST_RESULT_VOID(manifestSave(manifest, ioBufferWriteNew(contentSave)), "save manifest"); TEST_RESULT_STR(