1
0
mirror of https://github.com/postgrespro/pg_probackup.git synced 2025-01-07 13:40:17 +02:00

minor improvement: be more paranoid about system ID mismatch when streaming or reading WAL files. Replace RunIdentifySystem() with IdentifySystem() for logging purposes

This commit is contained in:
Grigory Smolkin 2019-07-15 19:39:12 +03:00
parent 69eed7acb5
commit 14514a4815
3 changed files with 86 additions and 29 deletions

View File

@ -103,6 +103,7 @@ static XLogRecPtr wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn,
bool wait_prev_segment);
static void make_pagemap_from_ptrack(parray* files, PGconn* backup_conn);
static void *StreamLog(void *arg);
static void IdentifySystem(StreamThreadArg *stream_thread_arg);
static void check_external_for_tablespaces(parray *external_list,
PGconn *backup_conn);
@ -289,30 +290,10 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo)
instance_config.conn_opt.pgport,
instance_config.conn_opt.pgdatabase,
instance_config.conn_opt.pguser);
/* sanity */
IdentifySystem(&stream_thread_arg);
if (!CheckServerVersionForStreaming(stream_thread_arg.conn))
{
PQfinish(stream_thread_arg.conn);
/*
* Error message already written in CheckServerVersionForStreaming().
* There's no hope of recovering from a version mismatch, so don't
* retry.
*/
elog(ERROR, "Cannot continue backup because stream connect has failed.");
}
/*
* Identify server, obtaining start LSN position and current timeline ID
* at the same time, necessary if not valid data can be found in the
* existing output directory.
*/
if (!RunIdentifySystem(stream_thread_arg.conn, NULL, NULL, NULL, NULL))
{
PQfinish(stream_thread_arg.conn);
elog(ERROR, "Cannot continue backup because stream connect has failed.");
}
/* By default there are some error */
/* By default there are some error */
stream_thread_arg.ret = 1;
/* we must use startpos as start_lsn from start_backup */
stream_thread_arg.startpos = current.start_lsn;
@ -522,7 +503,7 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo)
char pg_control_path[MAXPGPATH];
snprintf(pg_control_path, sizeof(pg_control_path), "%s/%s",
instance_config.pgdata, "global/pg_control");
instance_config.pgdata, XLOG_CONTROL_FILE);
for (i = 0; i < parray_num(backup_files_list); i++)
{
@ -2529,7 +2510,7 @@ StreamLog(void *arg)
/*
* Start the replication
*/
elog(LOG, _("started streaming WAL at %X/%X (timeline %u)"),
elog(LOG, "started streaming WAL at %X/%X (timeline %u)",
(uint32) (stream_arg->startpos >> 32), (uint32) stream_arg->startpos,
stream_arg->starttli);
@ -2570,13 +2551,13 @@ StreamLog(void *arg)
#endif
}
#else
if(ReceiveXlogStream(stream_arg->conn, stream_arg->startpos, stream_arg->starttli, NULL,
(char *) stream_arg->basedir, stop_streaming,
standby_message_timeout, NULL, false, false) == false)
if(ReceiveXlogStream(stream_arg->conn, stream_arg->startpos, stream_arg->starttli,
NULL, (char *) stream_arg->basedir, stop_streaming,
standby_message_timeout, NULL, false, false) == false)
elog(ERROR, "Problem in receivexlog");
#endif
elog(LOG, _("finished streaming WAL at %X/%X (timeline %u)"),
elog(LOG, "finished streaming WAL at %X/%X (timeline %u)",
(uint32) (stop_stream_lsn >> 32), (uint32) stop_stream_lsn, stream_arg->starttli);
stream_arg->ret = 0;
@ -2744,3 +2725,62 @@ check_external_for_tablespaces(parray *external_list, PGconn *backup_conn)
}
}
}
/*
* Run IDENTIFY_SYSTEM through a given connection and
* check system identifier and timeline are matching
*/
void
IdentifySystem(StreamThreadArg *stream_thread_arg)
{
PGresult *res;
uint64 stream_conn_sysidentifier = 0;
char *stream_conn_sysidentifier_str;
TimeLineID stream_conn_tli = 0;
if (!CheckServerVersionForStreaming(stream_thread_arg->conn))
{
PQfinish(stream_thread_arg->conn);
/*
* Error message already written in CheckServerVersionForStreaming().
* There's no hope of recovering from a version mismatch, so don't
* retry.
*/
elog(ERROR, "Cannot continue backup because stream connect has failed.");
}
/*
* Identify server, obtain server system identifier and timeline
*/
res = pgut_execute(stream_thread_arg->conn, "IDENTIFY_SYSTEM", 0, NULL);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
elog(WARNING,"Could not send replication command \"%s\": %s",
"IDENTIFY_SYSTEM", PQerrorMessage(stream_thread_arg->conn));
PQfinish(stream_thread_arg->conn);
elog(ERROR, "Cannot continue backup because stream connect has failed.");
}
stream_conn_sysidentifier_str = PQgetvalue(res, 0, 0);
stream_conn_tli = atoi(PQgetvalue(res, 0, 1));
/* Additional sanity, primary for PG 9.5,
* where system id can be obtained only via "IDENTIFY SYSTEM"
*/
if (!parse_uint64(stream_conn_sysidentifier_str, &stream_conn_sysidentifier, 0))
elog(ERROR, "%s is not system_identifier", stream_conn_sysidentifier_str);
if (stream_conn_sysidentifier != instance_config.system_identifier)
elog(ERROR, "System identifier mismatch. Connected PostgreSQL instance has system id: "
"" UINT64_FORMAT ". Expected: " UINT64_FORMAT ".",
stream_conn_sysidentifier, instance_config.system_identifier);
if (stream_conn_tli != current.tli)
elog(ERROR, "Timeline identifier mismatch. "
"Connected PostgreSQL instance has timeline id: %X. Expected: %X.",
stream_conn_tli, current.tli);
PQclear(res);
}

View File

@ -510,9 +510,18 @@ wal_contains_lsn(const char *archivedir, XLogRecPtr target_lsn,
xlogreader = InitXLogPageRead(&reader_data, archivedir, target_tli,
wal_seg_size, false, false, true);
if (xlogreader == NULL)
elog(ERROR, "Out of memory");
xlogreader->system_identifier = instance_config.system_identifier;
res = XLogReadRecord(xlogreader, target_lsn, &errormsg) != NULL;
/* Didn't find 'target_lsn' and there is no error, return false */
if (errormsg)
elog(WARNING, "Could not read WAL record at %X/%X: %s",
(uint32) (target_lsn >> 32), (uint32) (target_lsn), errormsg);
CleanupXLogPageRead(xlogreader);
XLogReaderFree(xlogreader);
@ -551,6 +560,11 @@ get_last_wal_lsn(const char *archivedir, XLogRecPtr start_lsn,
xlogreader = InitXLogPageRead(&reader_data, archivedir, tli, wal_seg_size,
false, false, true);
if (xlogreader == NULL)
elog(ERROR, "Out of memory");
xlogreader->system_identifier = instance_config.system_identifier;
/*
* Calculate startpoint. Decide: we should use 'start_lsn' or offset 0.
*/

View File

@ -206,6 +206,9 @@ pgut_get_conninfo_string(PGconn *conn)
return connstr;
}
/* TODO: it is better to use PQconnectdbParams like in psql
* It will allow to set application_name for pg_probackup
*/
PGconn *
pgut_connect(const char *host, const char *port,
const char *dbname, const char *username)