From 0876dd61900a0ef5a733f3025c7a732dc6e208b9 Mon Sep 17 00:00:00 2001 From: Grigory Smolkin Date: Mon, 22 Mar 2021 20:01:53 +0300 Subject: [PATCH] [Issue #346] set "interrupted" in elog, wait for streamed segments --- src/backup.c | 32 +++++++++++++++++++------------- src/utils/logger.c | 1 + 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/src/backup.c b/src/backup.c index be0147e9..4ec79064 100644 --- a/src/backup.c +++ b/src/backup.c @@ -56,7 +56,7 @@ static void pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn, PGNode static XLogRecPtr wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, TimeLineID tli, bool in_prev_segment, bool segment_only, - int timeout_elevel, bool in_stream_dir); + int timeout_elevel, bool in_stream_dir, pgBackup *backup); static void check_external_for_tablespaces(parray *external_list, PGconn *backup_conn); @@ -268,7 +268,7 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync, bool * Because WAL streaming will start after pg_start_backup() in stream * mode. */ - wait_wal_lsn(current.start_lsn, true, current.tli, false, true, ERROR, false); + wait_wal_lsn(current.start_lsn, true, current.tli, false, true, ERROR, false, ¤t); } /* start stream replication */ @@ -279,6 +279,12 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync, bool start_WAL_streaming(backup_conn, dst_backup_path, &instance_config.conn_opt, current.start_lsn, current.tli); + + /* Make sure that WAL streaming is working + * PAGE backup in stream mode is waited twice, first for + * segment in WAL archive and then for streamed segment + */ + wait_wal_lsn(current.start_lsn, true, current.tli, false, true, ERROR, true, ¤t); } /* initialize backup's file list */ @@ -1262,7 +1268,7 @@ pg_is_superuser(PGconn *conn) static XLogRecPtr wait_wal_lsn(XLogRecPtr target_lsn, bool is_start_lsn, TimeLineID tli, bool in_prev_segment, bool segment_only, - int timeout_elevel, bool in_stream_dir) + int timeout_elevel, bool in_stream_dir, pgBackup *backup) { XLogSegNo targetSegNo; char pg_wal_dir[MAXPGPATH]; @@ -1294,15 +1300,14 @@ wait_wal_lsn(XLogRecPtr target_lsn, bool is_start_lsn, TimeLineID tli, */ if (in_stream_dir) { - pgBackupGetPath2(¤t, pg_wal_dir, lengthof(pg_wal_dir), - DATABASE_DIR, PG_XLOG_DIR); + join_path_components(pg_wal_dir, backup->database_dir, PG_XLOG_DIR); join_path_components(wal_segment_path, pg_wal_dir, wal_segment); wal_segment_dir = pg_wal_dir; } else { join_path_components(wal_segment_path, arclog_path, wal_segment); - wal_segment_dir = arclog_path; + wal_segment_dir = arclog_path; /* global var */ } /* TODO: remove this in 3.0 (it is a cludge against some old bug with archive_timeout) */ @@ -1394,7 +1399,7 @@ wait_wal_lsn(XLogRecPtr target_lsn, bool is_start_lsn, TimeLineID tli, sleep(1); if (interrupted) - elog(ERROR, "Interrupted during waiting for WAL archiving"); + elog(ERROR, "Interrupted during waiting for WAL %s", in_stream_dir ? "streaming" : "archiving"); try_count++; /* Inform user if WAL segment is absent in first attempt */ @@ -1418,9 +1423,10 @@ wait_wal_lsn(XLogRecPtr target_lsn, bool is_start_lsn, TimeLineID tli, { if (file_exists) elog(timeout_elevel, "WAL segment %s was %s, " - "but target LSN %X/%X could not be archived in %d seconds", + "but target LSN %X/%X could not be %s in %d seconds", wal_segment, wal_delivery_str, - (uint32) (target_lsn >> 32), (uint32) target_lsn, timeout); + (uint32) (target_lsn >> 32), (uint32) target_lsn, + wal_delivery_str, timeout); /* If WAL segment doesn't exist or we wait for previous segment */ else elog(timeout_elevel, @@ -1705,7 +1711,7 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn, { /* Wait for segment with current stop_lsn, it is ok for it to never arrive */ wait_wal_lsn(stop_backup_lsn_tmp, false, backup->tli, - false, true, WARNING, stream_wal); + false, true, WARNING, stream_wal, backup); /* Get the first record in segment with current stop_lsn */ lsn_tmp = get_first_record_lsn(xlog_path, segno, backup->tli, @@ -1733,7 +1739,7 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn, * because previous record can be the contrecord. */ lsn_tmp = wait_wal_lsn(stop_backup_lsn_tmp, false, backup->tli, - true, false, ERROR, stream_wal); + true, false, ERROR, stream_wal, backup); /* sanity */ if (!XRecOffIsValid(lsn_tmp) || XLogRecPtrIsInvalid(lsn_tmp)) @@ -1747,7 +1753,7 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn, { /* Wait for segment with current stop_lsn */ wait_wal_lsn(stop_backup_lsn_tmp, false, backup->tli, - false, true, ERROR, stream_wal); + false, true, ERROR, stream_wal, backup); /* Get the next closest record in segment with current stop_lsn */ lsn_tmp = get_next_record_lsn(xlog_path, segno, backup->tli, @@ -1876,7 +1882,7 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn, */ if (!stop_lsn_exists) stop_backup_lsn = wait_wal_lsn(stop_backup_lsn_tmp, false, backup->tli, - false, false, ERROR, stream_wal); + false, false, ERROR, stream_wal, backup); if (stream_wal) { diff --git a/src/utils/logger.c b/src/utils/logger.c index f039d4a5..584b937e 100644 --- a/src/utils/logger.c +++ b/src/utils/logger.c @@ -169,6 +169,7 @@ exit_if_necessary(int elevel) { /* Interrupt other possible routines */ thread_interrupted = true; + interrupted = true; #ifdef WIN32 ExitThread(elevel); #else