From 6c9dfcfe8290df7745356ac9183d82dca61ade98 Mon Sep 17 00:00:00 2001 From: Grigory Smolkin Date: Wed, 7 Nov 2018 04:21:56 +0300 Subject: [PATCH] PGPRO-2095: use latest replayed lsn instead of STOP LSN --- src/backup.c | 80 +++++++++++++++++++++++++++------------------- src/pg_probackup.h | 4 +++ src/util.c | 30 ++++++++--------- 3 files changed, 67 insertions(+), 47 deletions(-) diff --git a/src/backup.c b/src/backup.c index 602ab823..e3e1d60a 100644 --- a/src/backup.c +++ b/src/backup.c @@ -756,28 +756,25 @@ do_backup_instance(void) parray_free(prev_backup_filelist); } - /* Copy pg_control in case of backup from replica >= 9.6 */ + /* In case of backup from replica >= 9.6 we must fix minRecPoint, + * First we must find pg_control in backup_files_list. + */ if (current.from_replica && !exclusive_backup) { + char pg_control_path[MAXPGPATH]; + + snprintf(pg_control_path, sizeof(pg_control_path), "%s/%s", pgdata, "global/pg_control"); + for (i = 0; i < parray_num(backup_files_list); i++) { pgFile *tmp_file = (pgFile *) parray_get(backup_files_list, i); - if (strcmp(tmp_file->name, "pg_control") == 0) + if (strcmp(tmp_file->path, pg_control_path) == 0) { pg_control = tmp_file; break; } } - - if (!pg_control) - elog(ERROR, "Failed to locate pg_control in copied files"); - - if (is_remote_backup) - remote_copy_file(NULL, pg_control); - else - if (!copy_file(pgdata, database_path, pg_control)) - elog(ERROR, "Failed to copy pg_control"); } @@ -1160,9 +1157,6 @@ pg_start_backup(const char *label, bool smooth, pgBackup *backup) */ pg_switch_wal(conn); - //elog(INFO, "START LSN: %X/%X", - // (uint32) (backup->start_lsn >> 32), (uint32) (backup->start_lsn)); - if (current.backup_mode == BACKUP_MODE_DIFF_PAGE) /* In PAGE mode wait for current segment... */ wait_wal_lsn(backup->start_lsn, true, false); @@ -1175,8 +1169,10 @@ pg_start_backup(const char *label, bool smooth, pgBackup *backup) /* ...for others wait for previous segment */ wait_wal_lsn(backup->start_lsn, true, true); - /* Wait for start_lsn to be replayed by replica */ - if (backup->from_replica) + /* In case of backup from replica for PostgreSQL 9.5 + * wait for start_lsn to be replayed by replica + */ + if (backup->from_replica && exclusive_backup) wait_replica_wal_lsn(backup->start_lsn, true); } @@ -1526,7 +1522,7 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment) GetXLogFileName(wal_segment, tli, targetSegNo, xlog_seg_size); /* - * In pg_start_backup we wait for 'lsn' in 'pg_wal' directory iff it is + * In pg_start_backup we wait for 'lsn' in 'pg_wal' directory if it is * stream and non-page backup. Page backup needs archived WAL files, so we * wait for 'lsn' in archive 'wal' directory for page backups. * @@ -1547,7 +1543,12 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment) { join_path_components(wal_segment_path, arclog_path, wal_segment); wal_segment_dir = arclog_path; - timeout = archive_timeout; + + if (archive_timeout > 0) + timeout = archive_timeout; + else + timeout = ARCHIVE_TIMEOUT_DEFAULT; + } if (wait_prev_segment) @@ -1780,14 +1781,29 @@ pg_stop_backup(pgBackup *backup) * Stop the non-exclusive backup. Besides stop_lsn it returns from * pg_stop_backup(false) copy of the backup label and tablespace map * so they can be written to disk by the caller. + * In case of backup from replica >= 9.6 we do not trust minRecPoint + * and stop_backup LSN, so we use latest replayed LSN as STOP LSN. */ - stop_backup_query = "SELECT" - " pg_catalog.txid_snapshot_xmax(pg_catalog.txid_current_snapshot())," - " current_timestamp(0)::timestamptz," - " lsn," - " labelfile," - " spcmapfile" - " FROM pg_catalog.pg_stop_backup(false)"; + if (current.from_replica) + stop_backup_query = "SELECT" + " pg_catalog.txid_snapshot_xmax(pg_catalog.txid_current_snapshot())," + " current_timestamp(0)::timestamptz," +#if PG_VERSION_NUM >= 100000 + " pg_catalog.pg_last_wal_replay_lsn()," +#else + " pg_catalog.pg_last_xlog_replay_location()," +#endif + " labelfile," + " spcmapfile" + " FROM pg_catalog.pg_stop_backup(false)"; + else + stop_backup_query = "SELECT" + " pg_catalog.txid_snapshot_xmax(pg_catalog.txid_current_snapshot())," + " current_timestamp(0)::timestamptz," + " lsn," + " labelfile," + " spcmapfile" + " FROM pg_catalog.pg_stop_backup(false)"; } else @@ -1873,14 +1889,14 @@ pg_stop_backup(pgBackup *backup) /* Calculate LSN */ stop_backup_lsn = ((uint64) lsn_hi) << 32 | lsn_lo; - //if (!XRecOffIsValid(stop_backup_lsn)) - //{ - // stop_backup_lsn = restore_lsn; - //} - if (!XRecOffIsValid(stop_backup_lsn)) - elog(ERROR, "Invalid stop_backup_lsn value %X/%X", - (uint32) (stop_backup_lsn >> 32), (uint32) (stop_backup_lsn)); + { + if (XRecOffIsNull(stop_backup_lsn)) + stop_backup_lsn = stop_backup_lsn + SizeOfXLogLongPHD; + else + elog(ERROR, "Invalid stop_backup_lsn value %X/%X", + (uint32) (stop_backup_lsn >> 32), (uint32) (stop_backup_lsn)); + } /* Write backup_label and tablespace_map */ if (!exclusive_backup) diff --git a/src/pg_probackup.h b/src/pg_probackup.h index b75bb581..f5d6bb5c 100644 --- a/src/pg_probackup.h +++ b/src/pg_probackup.h @@ -57,6 +57,10 @@ #define XID_FMT "%u" #endif +/* Check if an XLogRecPtr value is pointed to 0 offset */ +#define XRecOffIsNull(xlrp) \ + ((xlrp) % XLOG_BLCKSZ == 0) + typedef enum CompressAlg { NOT_DEFINED_COMPRESS = 0, diff --git a/src/util.c b/src/util.c index 5f059c37..e20cda17 100644 --- a/src/util.c +++ b/src/util.c @@ -119,7 +119,7 @@ writeControlFile(ControlFileData *ControlFile, char *path) /* copy controlFileSize */ buffer = pg_malloc(ControlFileSize); - memcpy(buffer, &ControlFile, sizeof(ControlFileData)); + memcpy(buffer, ControlFile, sizeof(ControlFileData)); /* Write pg_control */ unlink(path); @@ -136,8 +136,8 @@ writeControlFile(ControlFileData *ControlFile, char *path) if (fsync(fd) != 0) elog(ERROR, "Failed to fsync file: %s", path); - pg_free(buffer); close(fd); + pg_free(buffer); } /* @@ -290,9 +290,7 @@ get_data_checksum_version(bool safe) return ControlFile.data_checksum_version; } -/* MinRecoveryPoint 'as-is' is not to be trusted - * Use STOP LSN instead - */ +/* MinRecoveryPoint 'as-is' is not to be trusted */ void set_min_recovery_point(pgFile *file, const char *backup_path, XLogRecPtr stop_backup_lsn) { @@ -301,20 +299,21 @@ set_min_recovery_point(pgFile *file, const char *backup_path, XLogRecPtr stop_ba size_t size; char fullpath[MAXPGPATH]; - elog(LOG, "Setting minRecPoint to STOP LSN: %X/%X", - (uint32) (stop_backup_lsn >> 32), - (uint32) stop_backup_lsn); - - /* Path to pg_control in backup */ - snprintf(fullpath, sizeof(fullpath), "%s/%s", backup_path, XLOG_CONTROL_FILE); - - /* First fetch file... */ - buffer = slurpFile(backup_path, XLOG_CONTROL_FILE, &size, false); + /* First fetch file content */ + buffer = slurpFile(pgdata, XLOG_CONTROL_FILE, &size, false); if (buffer == NULL) elog(ERROR, "ERROR"); digestControlFile(&ControlFile, buffer, size); + elog(LOG, "Current minRecPoint %X/%X", + (uint32) (ControlFile.minRecoveryPoint >> 32), + (uint32) ControlFile.minRecoveryPoint); + + elog(LOG, "Setting minRecPoint to %X/%X", + (uint32) (stop_backup_lsn >> 32), + (uint32) stop_backup_lsn); + ControlFile.minRecoveryPoint = stop_backup_lsn; /* Update checksum in pg_control header */ @@ -327,7 +326,8 @@ set_min_recovery_point(pgFile *file, const char *backup_path, XLogRecPtr stop_ba /* paranoia */ checkControlFile(&ControlFile); - /* update pg_control */ + /* overwrite pg_control */ + snprintf(fullpath, sizeof(fullpath), "%s/%s", backup_path, XLOG_CONTROL_FILE); writeControlFile(&ControlFile, fullpath); /* Update pg_control checksum in backup_list */