1
0
mirror of https://github.com/pgbackrest/pgbackrest.git synced 2025-07-05 00:28:52 +02:00

Add timeline and checkpoint checks to backup.

Add the following checks:

* Checkpoint is updated in pg_control after pg_start_backup(). This helps ensure that PostgreSQL and pgBackRest have a consistent view of the storage and that PGDATA paths match.
* Timeline of backup start WAL file matches pg_control. Hard to see how this one could get hit, but we have the power...
* Standby is on the same timeline as the primary. If not, this standby is not following the primary.
* Last standby checkpoint is not greater than the backup checkpoint. If so, this standby is not following the primary.

This also requires some additional plumbing to read/write timeline/checkpoint from pg_control and parse timelines from WAL filenames. There were some changes in the backup tests caused by the fact that pg_control now has different contents for each backup.

The check to ensure that the required checkpoint was reached on the standby should also be updated to use pg_control (it currently uses pg_control_checkpoint()), but that requires non-trivial changes to the test harness and will need to wait.
This commit is contained in:
David Steele
2021-12-07 09:21:07 -05:00
committed by GitHub
parent 9c76056dd0
commit 49145d72ba
17 changed files with 276 additions and 53 deletions

View File

@ -263,6 +263,7 @@ testBackupPqScript(unsigned int pgVersion, time_t backupTimeStart, TestBackupPqS
// Set archive timeout really small to save time on errors
cfgOptionSet(cfgOptArchiveTimeout, cfgSourceParam, varNewInt64(100));
// Set LSN and WAL start/stop
uint64_t lsnStart = ((uint64_t)backupTimeStart & 0xFFFFFF00) << 28;
uint64_t lsnStop =
lsnStart + ((param.walTotal == 0 ? 0 : param.walTotal - 1) * pgControl.walSegmentSize) + (pgControl.walSegmentSize / 2);
@ -274,6 +275,14 @@ testBackupPqScript(unsigned int pgVersion, time_t backupTimeStart, TestBackupPqS
const char *lsnStopStr = strZ(pgLsnToStr(lsnStop));
const char *walSegmentStop = strZ(pgLsnToWalSegment(param.timeline, lsnStop, pgControl.walSegmentSize));
// Save pg_control with updated info
pgControl.checkpoint = lsnStart;
pgControl.timeline = param.timeline;
HRN_STORAGE_PUT(
storagePgIdxWrite(0), PG_PATH_GLOBAL "/" PG_FILE_PGCONTROL, hrnPgControlToBuffer(pgControl),
.timeModified = backupTimeStart);
// Update pg_control on primary with the backup time
HRN_PG_CONTROL_TIME(storagePgIdxWrite(0), backupTimeStart);
@ -384,6 +393,9 @@ testBackupPqScript(unsigned int pgVersion, time_t backupTimeStart, TestBackupPqS
ASSERT(!param.errorAfterStart);
ASSERT(!param.noArchiveCheck);
// Save pg_control with updated info
HRN_STORAGE_PUT(storagePgIdxWrite(1), PG_PATH_GLOBAL "/" PG_FILE_PGCONTROL, hrnPgControlToBuffer(pgControl));
if (param.noPriorWal)
{
harnessPqScriptSet((HarnessPq [])
@ -1528,8 +1540,8 @@ testRun(void)
"P00 INFO: new backup label = [FULL-1]\n"
"P00 INFO: full backup size = 8KB, file total = 2",
TEST_64BIT() ?
(TEST_BIG_ENDIAN() ? "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" : "7ac45fd284c8b8aa84ec84927e95601023911b8e") :
"37689eebc90fa4bf25d34101e3193fb5b592295b");
(TEST_BIG_ENDIAN() ? "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" : "6c1435a9f3c24a020794f58945ada456cb1d3bbe") :
"d432aca683e0443e97cf0600ba3a5a9efd7586fd");
// Make pg no longer appear to be running
HRN_STORAGE_REMOVE(storagePgWrite(), PG_FILE_POSTMTRPID, .errorOnMissing = true);
@ -2093,7 +2105,7 @@ testRun(void)
" from resumed backup\n"
"P00 DETAIL: remove file '" TEST_PATH "/repo/backup/test1/20191003-105320F_20191004-144000D/pg_data/resume-ref.gz'"
" from resumed backup (reference in resumed manifest)\n"
"P01 DETAIL: match file from prior backup " TEST_PATH "/pg1/global/pg_control (8KB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/global/pg_control (8KB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: match file from prior backup " TEST_PATH "/pg1/postgresql.conf (11B, [PCT]) checksum [SHA1]\n"
"P00 WARN: resumed backup file pg_data/time-mismatch2 does not have expected checksum"
" 984816fd329622876e14907634264e6f332e9fb3. The file will be recopied and backup will continue but this may be"
@ -2103,7 +2115,6 @@ testRun(void)
"P01 DETAIL: match file from prior backup " TEST_PATH "/pg1/PG_VERSION (3B, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/resume-ref (0B, [PCT])\n"
"P00 DETAIL: hardlink pg_data/PG_VERSION to 20191003-105320F\n"
"P00 DETAIL: hardlink pg_data/global/pg_control to 20191003-105320F\n"
"P00 DETAIL: hardlink pg_data/postgresql.conf to 20191003-105320F\n"
"P00 INFO: execute exclusive pg_stop_backup() and wait for all WAL segments to archive\n"
"P00 INFO: backup stop archive = 0000000105D9759000000000, lsn = 5d97590/800000\n"
@ -2130,7 +2141,7 @@ testRun(void)
"[target:file]\n"
"pg_data/PG_VERSION={\"checksum\":\"06d06bb31b570b94d7b4325f511f853dbe771c21\",\"reference\":\"20191003-105320F\""
",\"size\":3,\"timestamp\":1570000000}\n"
"pg_data/global/pg_control={\"reference\":\"20191003-105320F\",\"size\":8192,\"timestamp\":1570200000}\n"
"pg_data/global/pg_control={\"size\":8192,\"timestamp\":1570200000}\n"
"pg_data/postgresql.conf={\"checksum\":\"e3db315c260e79211b7b52587123b7aa060f30ab\""
",\"reference\":\"20191003-105320F\",\"size\":11,\"timestamp\":1570000000}\n"
"pg_data/resume-ref={\"size\":0,\"timestamp\":1570200000}\n"
@ -2185,6 +2196,9 @@ testRun(void)
hrnCfgArgRawBool(argList, cfgOptArchiveCopy, true);
HRN_CFG_LOAD(cfgCmdBackup, argList);
// Add pg_control to standby
HRN_PG_CONTROL_PUT(storagePgIdxWrite(1), PG_VERSION_96);
// Create file to copy from the standby. This file will be zero-length on the primary and non-zero-length on the standby
// but no bytes will be copied.
HRN_STORAGE_PUT_EMPTY(storagePgIdxWrite(0), PG_PATH_BASE "/1/1", .timeModified = backupTimeStart);
@ -2572,13 +2586,12 @@ testRun(void)
"P00 INFO: check archive for segment 0000002C05DB8EB000000000\n"
"P00 WARN: a timeline switch has occurred since the 20191027-181320F backup, enabling delta checksum\n"
" HINT: this is normal after restoring from backup or promoting a standby.\n"
"P01 DETAIL: match file from prior backup " TEST_PATH "/pg1/global/pg_control (8KB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: backup file " TEST_PATH "/pg1/global/pg_control (8KB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: match file from prior backup " TEST_PATH "/pg1/base/1/1 (8KB, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: match file from prior backup " TEST_PATH "/pg1/postgresql.conf (11B, [PCT]) checksum [SHA1]\n"
"P01 DETAIL: match file from prior backup " TEST_PATH "/pg1/PG_VERSION (2B, [PCT]) checksum [SHA1]\n"
"P00 DETAIL: hardlink pg_data/PG_VERSION to 20191027-181320F\n"
"P00 DETAIL: hardlink pg_data/base/1/1 to 20191027-181320F\n"
"P00 DETAIL: hardlink pg_data/global/pg_control to 20191027-181320F\n"
"P00 DETAIL: hardlink pg_data/postgresql.conf to 20191027-181320F\n"
"P00 DETAIL: hardlink pg_tblspc/32768/PG_11_201809051/1/5 to 20191027-181320F\n"
"P00 INFO: execute non-exclusive pg_stop_backup() and wait for all WAL segments to archive\n"
@ -2623,7 +2636,7 @@ testRun(void)
",\"timestamp\":1572400002}\n"
"pg_data/base/1/1={\"checksum\":\"0631457264ff7f8d5fb1edc2c0211992a67c73e6\",\"checksum-page\":true"
",\"mas""ter\":false,\"reference\":\"20191027-181320F\",\"size\":8192,\"timestamp\":1572200000}\n"
"pg_data/global/pg_control={\"reference\":\"20191027-181320F\",\"size\":8192,\"timestamp\":1572400000}\n"
"pg_data/global/pg_control={\"size\":8192,\"timestamp\":1572400000}\n"
"pg_data/postgresql.conf={\"checksum\":\"e3db315c260e79211b7b52587123b7aa060f30ab\""
",\"reference\":\"20191027-181320F\",\"size\":11,\"timestamp\":1570000000}\n"
"pg_data/tablespace_map={\"checksum\":\"87fe624d7976c2144e10afcb7a9a49b071f35e9c\",\"size\":19"