diff --git a/doc/xml/release/2024/2.51.xml b/doc/xml/release/2024/2.51.xml index 7193ed823..54008ab64 100644 --- a/doc/xml/release/2024/2.51.xml +++ b/doc/xml/release/2024/2.51.xml @@ -74,6 +74,17 @@

Detect files that have not changed during non-delta incremental backup.

+ + + + + + + + +

Prevent invalid recovery when backup_label removed.

+
+ diff --git a/src/command/archive/get/get.c b/src/command/archive/get/get.c index 19e2cb1b1..f470c989f 100644 --- a/src/command/archive/get/get.c +++ b/src/command/archive/get/get.c @@ -375,9 +375,17 @@ archiveGetCheck(const StringList *archiveRequestList) // List of warnings StringList *warnList = strLstNew(); - // Get pg control info + // Get pg_control info. Error if the invalid checkpoint written by restore is detected and backup_label is not present. PgControl controlInfo = pgControlFromFile(storagePg(), cfgOptionStrNull(cfgOptPgVersionForce)); + if (controlInfo.checkpoint == PG_CONTROL_CHECKPOINT_INVALID && !storageExistsP(storagePg(), STRDEF(PG_FILE_BACKUPLABEL))) + { + THROW( + FormatError, + "pg_control from backup is not valid without backup_label\n" + "HINT: was the backup_label file removed?"); + } + // Build list of repos/archiveIds where WAL may be found List *cacheRepoList = lstNewP(sizeof(ArchiveGetFindCacheRepo)); diff --git a/src/command/restore/restore.c b/src/command/restore/restore.c index a9b08fa57..ea0059576 100644 --- a/src/command/restore/restore.c +++ b/src/command/restore/restore.c @@ -2612,6 +2612,22 @@ cmdRestore(void) // from being started. if (storageExistsP(storagePg(), STRDEF(PG_PATH_GLOBAL "/" PG_FILE_PGCONTROL "." STORAGE_FILE_TEMP_EXT))) { + // Invalidate the checkpoint in pg_control so the cluster cannot be started without backup_label + if (manifestData(jobData.manifest)->backupOptionOnline) + { + Buffer *const pgControlBuffer = storageGetP( + storageNewReadP(storagePg(), STRDEF(PG_PATH_GLOBAL "/" PG_FILE_PGCONTROL "." STORAGE_FILE_TEMP_EXT))); + const ManifestFile pgControlFile = manifestFileFind( + jobData.manifest, STRDEF(MANIFEST_TARGET_PGDATA "/" PG_PATH_GLOBAL "/" PG_FILE_PGCONTROL)); + + pgControlCheckpointInvalidate(pgControlBuffer, cfgOptionStrNull(cfgOptPgVersionForce)); + storagePutP( + storageNewWriteP( + storagePgWrite(), STRDEF(PG_PATH_GLOBAL "/" PG_FILE_PGCONTROL "." STORAGE_FILE_TEMP_EXT), + .modeFile = pgControlFile.mode, .timeModified = pgControlFile.timestamp), + pgControlBuffer); + } + LOG_INFO( "restore " PG_PATH_GLOBAL "/" PG_FILE_PGCONTROL " (performed last to ensure aborted restores cannot be started)"); diff --git a/src/postgres/interface.c b/src/postgres/interface.c index fb3c896be..5f1164d88 100644 --- a/src/postgres/interface.c +++ b/src/postgres/interface.c @@ -71,6 +71,9 @@ typedef struct PgInterface // Get control crc offset size_t (*controlCrcOffset)(void); + // Invalidate control checkpoint + void (*controlCheckpointInvalidate)(unsigned char *); + // Get the control version for this version of PostgreSQL uint32_t (*controlVersion)(void); @@ -191,6 +194,88 @@ pgWalSegmentSizeCheck(unsigned int pgVersion, unsigned int walSegmentSize) FUNCTION_TEST_RETURN_VOID(); } +/*********************************************************************************************************************************** +Validate the CRC in pg_control. If the version has been forced this may required scanning pg_control for the offset. Return the +offset found so it can be used to update the CRC. +***********************************************************************************************************************************/ +static size_t +pgControlCrcValidate(const Buffer *const controlFile, const PgInterface *const interface, const bool pgVersionForce) +{ + FUNCTION_LOG_BEGIN(logLevelTrace); + FUNCTION_LOG_PARAM(BUFFER, controlFile); + FUNCTION_LOG_PARAM_P(VOID, interface); + FUNCTION_LOG_PARAM(BOOL, pgVersionForce); + FUNCTION_LOG_END(); + + ASSERT(controlFile != NULL); + ASSERT(interface != NULL); + + size_t result = interface->controlCrcOffset(); + + do + { + // Calculate CRC and retrieve expected CRC + const uint32_t crcCalculated = + interface->version > PG_VERSION_94 ? + crc32cOne(bufPtrConst(controlFile), result) : crc32One(bufPtrConst(controlFile), result); + const uint32_t crcExpected = *((uint32_t *)(bufPtrConst(controlFile) + result)); + + // If CRC does not match + if (crcCalculated != crcExpected) + { + // If version is forced then the CRC might be later in the file (assuming the fork added extra fields to pg_control). + // Increment the offset by CRC data size and continue to try again. + if (pgVersionForce) + { + result += sizeof(uint32_t); + + if (result <= bufUsed(controlFile) - sizeof(uint32_t)) + continue; + } + + // If no retry then error + THROW_FMT( + ChecksumError, + "calculated " PG_FILE_PGCONTROL " checksum does not match expected value\n" + "HINT: calculated 0x%x but expected value is 0x%x\n" + "%s" + "HINT: is " PG_FILE_PGCONTROL " corrupt?\n" + "HINT: does " PG_FILE_PGCONTROL " have a different layout than expected?", + crcCalculated, crcExpected, + pgVersionForce ? "HINT: checksum values may be misleading due to forced version scan\n" : ""); + } + + // Do not retry if the CRC is valid + break; + } + while (true); + + FUNCTION_LOG_RETURN(SIZE, result); +} + +/*********************************************************************************************************************************** +Update the CRC in pg_control +***********************************************************************************************************************************/ +static void +pgControlCrcUpdate(Buffer *const controlFile, const unsigned int pgVersion, const size_t crcOffset) +{ + FUNCTION_LOG_BEGIN(logLevelTrace); + FUNCTION_LOG_PARAM(BUFFER, controlFile); + FUNCTION_LOG_PARAM(UINT, pgVersion); + FUNCTION_LOG_PARAM(SIZE, crcOffset); + FUNCTION_LOG_END(); + + ASSERT(controlFile != NULL); + ASSERT(pgVersion != 0); + ASSERT(crcOffset != 0); + + *((uint32_t *)(bufPtr(controlFile) + crcOffset)) = + pgVersion > PG_VERSION_94 ? + crc32cOne(bufPtrConst(controlFile), crcOffset) : crc32One(bufPtrConst(controlFile), crcOffset); + + FUNCTION_LOG_RETURN_VOID(); +} + /**********************************************************************************************************************************/ static PgControl pgControlFromBuffer(const Buffer *controlFile, const String *const pgVersionForce) @@ -237,46 +322,8 @@ pgControlFromBuffer(const Buffer *controlFile, const String *const pgVersionForc PgControl result = interface->control(bufPtrConst(controlFile)); result.version = interface->version; - // Check CRC - size_t crcOffset = interface->controlCrcOffset(); - - do - { - // Calculate CRC and retrieve expected CRC - const uint32_t crcCalculated = - result.version > PG_VERSION_94 ? - crc32cOne(bufPtrConst(controlFile), crcOffset) : crc32One(bufPtrConst(controlFile), crcOffset); - const uint32_t crcExpected = *((uint32_t *)(bufPtrConst(controlFile) + crcOffset)); - - // If CRC does not match - if (crcCalculated != crcExpected) - { - // If version is forced then the CRC might be later in the file (assuming the fork added extra fields to pg_control). - // Increment the offset by CRC data size and continue to try again. - if (pgVersionForce != NULL) - { - crcOffset += sizeof(uint32_t); - - if (crcOffset <= bufUsed(controlFile) - sizeof(uint32_t)) - continue; - } - - // If no retry then error - THROW_FMT( - ChecksumError, - "calculated " PG_FILE_PGCONTROL " checksum does not match expected value\n" - "HINT: calculated 0x%x but expected value is 0x%x\n" - "%s" - "HINT: is " PG_FILE_PGCONTROL " corrupt?\n" - "HINT: does " PG_FILE_PGCONTROL " have a different layout than expected?", - crcCalculated, crcExpected, - pgVersionForce == NULL ? "" : "HINT: checksum values may be misleading due to forced version scan\n"); - } - - // Do not retry if the CRC is valid - break; - } - while (true); + // Validate the CRC + pgControlCrcValidate(controlFile, interface, pgVersionForce != NULL); // Check the segment size pgWalSegmentSizeCheck(result.version, result.walSegmentSize); @@ -389,6 +436,27 @@ pgControlFromFile(const Storage *const storage, const String *const pgVersionFor FUNCTION_LOG_RETURN(PG_CONTROL, result); } +/**********************************************************************************************************************************/ +FN_EXTERN void +pgControlCheckpointInvalidate(Buffer *const buffer, const String *const pgVersionForce) +{ + FUNCTION_LOG_BEGIN(logLevelDebug); + FUNCTION_LOG_PARAM(BUFFER, buffer); + FUNCTION_LOG_PARAM(STRING, pgVersionForce); + FUNCTION_LOG_END(); + + ASSERT(buffer != NULL); + + const PgControl pgControl = pgControlFromBuffer(buffer, pgVersionForce); + const PgInterface *const interface = pgInterfaceVersion(pgControl.version); + const size_t crcOffset = pgControlCrcValidate(buffer, interface, pgVersionForce); + + pgInterfaceVersion(pgControl.version)->controlCheckpointInvalidate(bufPtr(buffer)); + pgControlCrcUpdate(buffer, interface->version, crcOffset); + + FUNCTION_LOG_RETURN_VOID(); +} + /**********************************************************************************************************************************/ FN_EXTERN uint32_t pgControlVersion(unsigned int pgVersion) diff --git a/src/postgres/interface.h b/src/postgres/interface.h index 3e2d8da55..fbfac61c8 100644 --- a/src/postgres/interface.h +++ b/src/postgres/interface.h @@ -94,6 +94,11 @@ WAL segment size is supported for versions below 11. ***********************************************************************************************************************************/ #define PG_WAL_SEGMENT_SIZE_DEFAULT ((unsigned int)(16 * 1024 * 1024)) +/*********************************************************************************************************************************** +Checkpoint written into pg_control on restore. This will prevent PostgreSQL from starting if backup_label is not present. +***********************************************************************************************************************************/ +#define PG_CONTROL_CHECKPOINT_INVALID 0xDEAD + /*********************************************************************************************************************************** PostgreSQL Control File Info ***********************************************************************************************************************************/ @@ -138,6 +143,9 @@ FN_EXTERN bool pgDbIsSystemId(unsigned int id); FN_EXTERN Buffer *pgControlBufferFromFile(const Storage *storage, const String *pgVersionForce); FN_EXTERN PgControl pgControlFromFile(const Storage *storage, const String *pgVersionForce); +// Invalidate checkpoint record in pg_control +FN_EXTERN void pgControlCheckpointInvalidate(Buffer *buffer, const String *pgVersionForce); + // Get the control version for a PostgreSQL version FN_EXTERN uint32_t pgControlVersion(unsigned int pgVersion); diff --git a/src/postgres/interface/version.intern.h b/src/postgres/interface/version.intern.h index 30ee6a584..b53439c74 100644 --- a/src/postgres/interface/version.intern.h +++ b/src/postgres/interface/version.intern.h @@ -92,6 +92,22 @@ Get control crc offset #endif +/*********************************************************************************************************************************** +Invalidate control checkpoint. PostgreSQL skips the first segment so any LSN in that segment is invalid. +***********************************************************************************************************************************/ +#if PG_VERSION > PG_VERSION_MAX + +#elif PG_VERSION >= PG_VERSION_93 + +#define PG_INTERFACE_CONTROL_CHECKPOINT_INVALIDATE(version) \ + static void \ + pgInterfaceControlCheckpointInvalidate##version(unsigned char *const controlFile) \ + { \ + ((ControlFileData *)controlFile)->checkPoint = PG_CONTROL_CHECKPOINT_INVALID; \ + } + +#endif + /*********************************************************************************************************************************** Get the control version ***********************************************************************************************************************************/ diff --git a/test/src/module/command/archiveGetTest.c b/test/src/module/command/archiveGetTest.c index 3e9f2397d..8c525e4bf 100644 --- a/test/src/module/command/archiveGetTest.c +++ b/test/src/module/command/archiveGetTest.c @@ -135,10 +135,28 @@ testRun(void) .remove = true); TEST_STORAGE_LIST_EMPTY(storageSpool(), STORAGE_SPOOL_ARCHIVE_IN); + // ------------------------------------------------------------------------------------------------------------------------- + TEST_TITLE("pg_control from backup is not valid without backup_label"); + + HRN_PG_CONTROL_PUT(storagePgWrite(), PG_VERSION_10, .checkpoint = PG_CONTROL_CHECKPOINT_INVALID); + + strLstAddZ(argList, "000000010000000100000001"); + HRN_CFG_LOAD(cfgCmdArchiveGet, argList, .role = cfgCmdRoleAsync); + + TEST_ERROR( + cmdArchiveGetAsync(), FormatError, + "pg_control from backup is not valid without backup_label\n" + "HINT: was the backup_label file removed?"); + + TEST_RESULT_LOG( + "P00 INFO: get 1 WAL file(s) from archive: 000000010000000100000001"); + TEST_STORAGE_LIST(storageSpoolWrite(), STORAGE_SPOOL_ARCHIVE_IN, "global.error\n", .remove = true); + // ------------------------------------------------------------------------------------------------------------------------- TEST_TITLE("no segments to find"); - HRN_PG_CONTROL_PUT(storagePgWrite(), PG_VERSION_10); + // Success not that backup_label exists + HRN_STORAGE_PUT_EMPTY(storagePgWrite(), PG_FILE_BACKUPLABEL); HRN_INFO_PUT( storageRepoWrite(), INFO_ARCHIVE_PATH_FILE, @@ -148,7 +166,6 @@ testRun(void) "[db:history]\n" "1={\"db-id\":" HRN_PG_SYSTEMID_10_Z ",\"db-version\":\"10\"}\n"); - strLstAddZ(argList, "000000010000000100000001"); HRN_CFG_LOAD(cfgCmdArchiveGet, argList, .role = cfgCmdRoleAsync); TEST_RESULT_VOID(cmdArchiveGetAsync(), "get async"); diff --git a/test/src/module/command/restoreTest.c b/test/src/module/command/restoreTest.c index 4fc2aa211..9b7c74b1c 100644 --- a/test/src/module/command/restoreTest.c +++ b/test/src/module/command/restoreTest.c @@ -2503,6 +2503,8 @@ testRun(void) // ------------------------------------------------------------------------------------------------------------------------- TEST_TITLE("incremental delta"); + const char *pgControlSha1 = NULL; + argList = strLstNew(); hrnCfgArgRawZ(argList, cfgOptStanza, "test1"); hrnCfgArgRaw(argList, cfgOptRepoPath, repoPath); @@ -2548,13 +2550,12 @@ testRun(void) HRN_MANIFEST_PATH_ADD(manifest, .name = TEST_PGDATA PG_PATH_GLOBAL); // global/pg_control - Buffer *fileBuffer = bufNew(8192); - memset(bufPtr(fileBuffer), 255, bufSize(fileBuffer)); - bufUsedSet(fileBuffer, bufSize(fileBuffer)); + Buffer *fileBuffer = hrnPgControlToBuffer(0, 0, (PgControl){.version = PG_VERSION_10}); + pgControlSha1 = strZ(strNewEncode(encodingHex, cryptoHashOne(hashTypeSha1, fileBuffer))); HRN_MANIFEST_FILE_ADD( manifest, .name = TEST_PGDATA PG_PATH_GLOBAL "/" PG_FILE_PGCONTROL, .size = 8192, .timestamp = 1482182860, - .checksumSha1 = "5e2b96c19c4f5c63a5afa2de504d29fe64a4c908"); + .checksumSha1 = pgControlSha1); HRN_STORAGE_PUT(storageRepoWrite(), TEST_REPO_PATH PG_PATH_GLOBAL "/" PG_FILE_PGCONTROL, fileBuffer); // global/888 @@ -2834,7 +2835,7 @@ testRun(void) TEST_RESULT_VOID(cmdRestore(), "successful restore"); - TEST_RESULT_LOG( + TEST_RESULT_LOG_FMT( "P00 INFO: repo1: restore backup set 20161219-212741F_20161219-212918I\n" "P00 INFO: map link 'pg_hba.conf' to '../config/pg_hba.conf'\n" "P00 INFO: map link 'pg_wal' to '../wal'\n" @@ -2865,7 +2866,7 @@ testRun(void) "P01 DETAIL: restore file " TEST_PATH "/pg/base/16384/16385 (16KB, [PCT]) checksum" " d74e5f7ebe52a3ed468ba08c5b6aefaccd1ca88f\n" "P01 DETAIL: restore file " TEST_PATH "/pg/global/pg_control.pgbackrest.tmp (8KB, [PCT])" - " checksum 5e2b96c19c4f5c63a5afa2de504d29fe64a4c908\n" + " checksum %s\n" "P01 DETAIL: restore file " TEST_PATH "/pg/base/1/2 (8KB, [PCT]) checksum 4d7b2a36c5387decf799352a3751883b7ceb96aa\n" "P01 DETAIL: restore file " TEST_PATH "/pg/postgresql.conf (15B, [PCT]) checksum" " 98b8abb2e681e2a5a7d8ab082c0a79727887558d\n" @@ -2911,7 +2912,8 @@ testRun(void) "P00 DETAIL: sync path '" TEST_PATH "/pg/pg_tblspc/1/PG_10_201707211'\n" "P00 INFO: restore global/pg_control (performed last to ensure aborted restores cannot be started)\n" "P00 DETAIL: sync path '" TEST_PATH "/pg/global'\n" - "P00 INFO: restore size = [SIZE], file total = 23"); + "P00 INFO: restore size = [SIZE], file total = 23", + pgControlSha1); TEST_STORAGE_LIST( storagePg(), NULL, @@ -3028,7 +3030,7 @@ testRun(void) TEST_RESULT_VOID(cmdRestore(), "successful restore"); - TEST_RESULT_LOG( + TEST_RESULT_LOG_FMT( "P00 INFO: repo2: restore backup set 20161219-212741F_20161219-212918I, recovery will start at [TIME]\n" "P00 INFO: map link 'pg_hba.conf' to '../config/pg_hba.conf'\n" "P00 INFO: map link 'pg_wal' to '../wal'\n" @@ -3058,7 +3060,7 @@ testRun(void) "P01 DETAIL: restore file " TEST_PATH "/pg/base/16384/16385 - exists and matches backup (16KB, [PCT])" " checksum d74e5f7ebe52a3ed468ba08c5b6aefaccd1ca88f\n" "P01 DETAIL: restore file " TEST_PATH "/pg/global/pg_control.pgbackrest.tmp (8KB, [PCT])" - " checksum 5e2b96c19c4f5c63a5afa2de504d29fe64a4c908\n" + " checksum %s\n" "P01 DETAIL: restore file " TEST_PATH "/pg/base/1/2 (8KB, [PCT]) checksum 4d7b2a36c5387decf799352a3751883b7ceb96aa\n" "P01 DETAIL: restore file " TEST_PATH "/pg/postgresql.conf - exists and matches backup (15B, [PCT])" " checksum 98b8abb2e681e2a5a7d8ab082c0a79727887558d\n" @@ -3104,7 +3106,8 @@ testRun(void) "P00 DETAIL: sync path '" TEST_PATH "/pg/pg_tblspc/1/PG_10_201707211'\n" "P00 INFO: restore global/pg_control (performed last to ensure aborted restores cannot be started)\n" "P00 DETAIL: sync path '" TEST_PATH "/pg/global'\n" - "P00 INFO: restore size = [SIZE], file total = 23"); + "P00 INFO: restore size = [SIZE], file total = 23", + pgControlSha1); // Check stanza archive spool path was removed TEST_STORAGE_LIST_EMPTY(storageSpool(), STORAGE_PATH_ARCHIVE); diff --git a/test/src/module/postgres/interfaceTest.c b/test/src/module/postgres/interfaceTest.c index 1b988fe13..36def178f 100644 --- a/test/src/module/postgres/interfaceTest.c +++ b/test/src/module/postgres/interfaceTest.c @@ -282,6 +282,34 @@ testRun(void) TEST_RESULT_UINT(info.systemId, 0xAAAA0AAAA, "check system id"); TEST_RESULT_UINT(info.version, PG_VERSION_14, "check version"); TEST_RESULT_UINT(info.catalogVersion, 202007201, "check catalog version"); + + // ------------------------------------------------------------------------------------------------------------------------- + TEST_TITLE("Invalidate checkpoint"); + + control = hrnPgControlToBuffer(0, 0, (PgControl){.version = PG_VERSION_13, .systemId = 0xAAAA0AAAA, .checkpoint = 777}); + + info = pgControlFromBuffer(control, NULL); + TEST_RESULT_UINT(info.checkpoint, 777, "check checkpoint"); + + TEST_RESULT_VOID(pgControlCheckpointInvalidate(control, NULL), "invalidate checkpoint"); + info = pgControlFromBuffer(control, NULL); + TEST_RESULT_UINT(info.checkpoint, 0xDEAD, "check invalid checkpoint"); + + // ------------------------------------------------------------------------------------------------------------------------- + TEST_TITLE("Invalidate checkpoint on force control version"); + + // Start with an invalid crc but write a valid one at a greater offset to test the forced scan + control = hrnPgControlToBuffer( + 0, 0xFADEFADE, (PgControl){.version = PG_VERSION_94, .systemId = 0xAAAA0AAAA, .checkpoint = 777}); + crcOffset = pgInterfaceVersion(PG_VERSION_94)->controlCrcOffset() + sizeof(uint32_t) * 2; + *((uint32_t *)(bufPtr(control) + crcOffset)) = crc32One(bufPtrConst(control), crcOffset); + + info = pgControlFromBuffer(control, STRDEF(PG_VERSION_94_Z)); + TEST_RESULT_UINT(info.checkpoint, 777, "check checkpoint"); + + TEST_RESULT_VOID(pgControlCheckpointInvalidate(control, STRDEF(PG_VERSION_94_Z)), "invalidate checkpoint"); + info = pgControlFromBuffer(control, STRDEF(PG_VERSION_94_Z)); + TEST_RESULT_UINT(info.checkpoint, 0xDEAD, "check invalid checkpoint"); } // *****************************************************************************************************************************