diff --git a/src/backup.c b/src/backup.c index 04478d0a..23139841 100644 --- a/src/backup.c +++ b/src/backup.c @@ -109,7 +109,7 @@ static int checkpoint_timeout(void); //static void backup_list_file(parray *files, const char *root, ) static void parse_backup_filelist_filenames(parray *files, const char *root); static XLogRecPtr wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, - bool wait_prev_lsn, bool wait_prev_segment); + bool wait_prev_segment); static void wait_replica_wal_lsn(XLogRecPtr lsn, bool is_start_backup); static void make_pagemap_from_ptrack(parray *files); static void *StreamLog(void *arg); @@ -1166,7 +1166,7 @@ pg_start_backup(const char *label, bool smooth, pgBackup *backup) if (current.backup_mode == BACKUP_MODE_DIFF_PAGE) /* In PAGE mode wait for current segment... */ - wait_wal_lsn(backup->start_lsn, true, false, false); + wait_wal_lsn(backup->start_lsn, true, false); /* * Do not wait start_lsn for stream backup. * Because WAL streaming will start after pg_start_backup() in stream @@ -1174,7 +1174,7 @@ pg_start_backup(const char *label, bool smooth, pgBackup *backup) */ else if (!stream_wal) /* ...for others wait for previous segment */ - wait_wal_lsn(backup->start_lsn, true, false, true); + wait_wal_lsn(backup->start_lsn, true, true); /* In case of backup from replica for PostgreSQL 9.5 * wait for start_lsn to be replayed by replica @@ -1507,8 +1507,7 @@ pg_ptrack_get_and_clear(Oid tablespace_oid, Oid db_oid, Oid rel_filenode, * returns InvalidXLogRecPtr. */ static XLogRecPtr -wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_lsn, - bool wait_prev_segment) +wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment) { TimeLineID tli; XLogSegNo targetSegNo; @@ -1519,7 +1518,6 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_lsn, bool file_exists = false; uint32 try_count = 0, timeout; - char *prior_to = (wait_prev_lsn) ? " prior to " : ""; #ifdef HAVE_LIBZ char gz_wal_segment_path[MAXPGPATH]; @@ -1565,8 +1563,8 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_lsn, if (wait_prev_segment) elog(LOG, "Looking for segment: %s", wal_segment); else - elog(LOG, "Looking for LSN %s%X/%X in segment: %s", - prior_to, (uint32) (lsn >> 32), (uint32) lsn, wal_segment); + elog(LOG, "Looking for LSN %X/%X in segment: %s", + (uint32) (lsn >> 32), (uint32) lsn, wal_segment); #ifdef HAVE_LIBZ snprintf(gz_wal_segment_path, sizeof(gz_wal_segment_path), "%s.gz", @@ -1602,16 +1600,19 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_lsn, /* * A WAL segment found. Check LSN on it. */ - if (!wait_prev_lsn) + if (wal_contains_lsn(wal_segment_dir, lsn, tli, xlog_seg_size)) + /* Target LSN was found */ { - if (wal_contains_lsn(wal_segment_dir, lsn, tli, xlog_seg_size)) - /* Target LSN was found */ - { - elog(LOG, "Found LSN: %X/%X", (uint32) (lsn >> 32), (uint32) lsn); - return lsn; - } + elog(LOG, "Found LSN: %X/%X", (uint32) (lsn >> 32), (uint32) lsn); + return lsn; } - else + + /* + * If we failed to get LSN of valid record in a reasonable time, try + * to get LSN of last valid record prior to the target LSN. But only + * in case of a backup from a replica. + */ + if (!exclusive_backup && current.from_replica) { XLogRecPtr res; @@ -1620,7 +1621,8 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_lsn, if (!XLogRecPtrIsInvalid(res)) { /* LSN of the prior record was found */ - elog(LOG, "Found LSN: %X/%X", (uint32) (res >> 32), (uint32) res); + elog(LOG, "Found prior LSN: %X/%X, it is used as stop LSN", + (uint32) (res >> 32), (uint32) res); return res; } } @@ -1638,18 +1640,16 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_lsn, elog(INFO, "Wait for WAL segment %s to be archived", wal_segment_path); else - elog(INFO, "Wait for LSN %s%X/%X in archived WAL segment %s", - prior_to, (uint32) (lsn >> 32), (uint32) lsn, - wal_segment_path); + elog(INFO, "Wait for LSN %X/%X in archived WAL segment %s", + (uint32) (lsn >> 32), (uint32) lsn, wal_segment_path); } if (timeout > 0 && try_count > timeout) { if (file_exists) elog(ERROR, "WAL segment %s was archived, " - "but target LSN %s%X/%X could not be archived in %d seconds", - wal_segment, prior_to, (uint32) (lsn >> 32), (uint32) lsn, - timeout); + "but target LSN %X/%X could not be archived in %d seconds", + wal_segment, (uint32) (lsn >> 32), (uint32) lsn, timeout); /* If WAL segment doesn't exist or we wait for previous segment */ else elog(ERROR, @@ -2056,7 +2056,6 @@ pg_stop_backup(pgBackup *backup) { char *xlog_path, stream_xlog_path[MAXPGPATH]; - XLogRecPtr stop_valid_lsn = InvalidXLogRecPtr; /* Wait for stop_lsn to be received by replica */ /* XXX Do we need this? */ @@ -2067,9 +2066,7 @@ pg_stop_backup(pgBackup *backup) * We wait for stop_lsn in stream mode just in case. */ if (!stop_lsn_exists) - stop_valid_lsn = wait_wal_lsn(stop_backup_lsn, false, - !exclusive_backup && current.from_replica, - false); + stop_backup_lsn = wait_wal_lsn(stop_backup_lsn, false, false); if (stream_wal) { @@ -2088,7 +2085,7 @@ pg_stop_backup(pgBackup *backup) /* iterate over WAL from stop_backup lsn to start_backup lsn */ if (!read_recovery_info(xlog_path, backup->tli, xlog_seg_size, - backup->start_lsn, stop_valid_lsn, + backup->start_lsn, stop_backup_lsn, &backup->recovery_time, &backup->recovery_xid)) { elog(LOG, "Failed to find Recovery Time in WAL. Forced to trust current_timestamp");