1
0
mirror of https://github.com/postgrespro/pg_probackup.git synced 2025-02-09 14:33:17 +02:00

PGPRO-2095: backup from replica without connection to master for PostgreSQL >= 9.6

This commit is contained in:
Grigory Smolkin 2018-10-31 09:47:53 +03:00
parent bf1c8797ae
commit d29aa8b0b4
3 changed files with 150 additions and 18 deletions

View File

@ -475,6 +475,8 @@ do_backup_instance(void)
pgBackup *prev_backup = NULL;
parray *prev_backup_filelist = NULL;
pgFile *pg_control = NULL;
elog(LOG, "Database backup start");
/* Initialize size summary */
@ -754,9 +756,37 @@ do_backup_instance(void)
parray_free(prev_backup_filelist);
}
/* Copy pg_control in case of backup from replica >= 9.6 */
if (current.from_replica && !exclusive_backup)
{
for (i = 0; i < parray_num(backup_files_list); i++)
{
pgFile *tmp_file = (pgFile *) parray_get(backup_files_list, i);
if (strcmp(tmp_file->name, "pg_control") == 0)
{
pg_control = tmp_file;
break;
}
}
if (!pg_control)
elog(ERROR, "Failed to locate pg_control in copied files");
if (is_remote_backup)
remote_copy_file(NULL, pg_control);
else
if (!copy_file(pgdata, database_path, pg_control))
elog(ERROR, "Failed to copy pg_control");
}
/* Notify end of backup */
pg_stop_backup(&current);
if (current.from_replica && !exclusive_backup)
set_min_recovery_point(pg_control, database_path, current.stop_lsn);
/* Add archived xlog files into the list of files of this backup */
if (stream_wal)
{
@ -883,7 +913,7 @@ do_backup(time_t start_time)
}
}
if (current.from_replica)
if (current.from_replica && exclusive_backup)
{
/* Check master connection options */
if (master_host == NULL)
@ -1089,8 +1119,11 @@ pg_start_backup(const char *label, bool smooth, pgBackup *backup)
params[0] = label;
/* For replica we call pg_start_backup() on master */
conn = (backup->from_replica) ? master_conn : backup_conn;
/* For 9.5 replica we call pg_start_backup() on master */
if (backup->from_replica && exclusive_backup)
conn = master_conn;
else
conn = backup_conn;
/* 2nd argument is 'fast'*/
params[1] = smooth ? "false" : "true";
@ -1118,16 +1151,21 @@ pg_start_backup(const char *label, bool smooth, pgBackup *backup)
PQclear(res);
if (current.backup_mode == BACKUP_MODE_DIFF_PAGE)
if (current.backup_mode == BACKUP_MODE_DIFF_PAGE &&
(!(backup->from_replica && !exclusive_backup)))
/*
* Switch to a new WAL segment. It is necessary to get archived WAL
* segment, which includes start LSN of current backup.
* Don`t do this for replica backups unless it`s PG 9.5
*/
pg_switch_wal(conn);
//elog(INFO, "START LSN: %X/%X",
// (uint32) (backup->start_lsn >> 32), (uint32) (backup->start_lsn));
if (current.backup_mode == BACKUP_MODE_DIFF_PAGE)
/* In PAGE mode wait for current segment... */
wait_wal_lsn(backup->start_lsn, true, false);
wait_wal_lsn(backup->start_lsn, true, false);
/*
* Do not wait start_lsn for stream backup.
* Because WAL streaming will start after pg_start_backup() in stream
@ -1669,7 +1707,7 @@ pg_stop_backup(pgBackup *backup)
PGresult *tablespace_map_content = NULL;
uint32 lsn_hi;
uint32 lsn_lo;
XLogRecPtr restore_lsn = InvalidXLogRecPtr;
//XLogRecPtr restore_lsn = InvalidXLogRecPtr;
int pg_stop_backup_timeout = 0;
char path[MAXPGPATH];
char backup_label[MAXPGPATH];
@ -1689,16 +1727,21 @@ pg_stop_backup(pgBackup *backup)
if (!backup_in_progress)
elog(ERROR, "backup is not in progress");
/* For replica we call pg_stop_backup() on master */
conn = (current.from_replica) ? master_conn : backup_conn;
/* For 9.5 replica we call pg_stop_backup() on master */
if (current.from_replica && exclusive_backup)
conn = master_conn;
else
conn = backup_conn;
/* Remove annoying NOTICE messages generated by backend */
res = pgut_execute(conn, "SET client_min_messages = warning;",
0, NULL);
PQclear(res);
/* Create restore point */
if (backup != NULL)
/* Create restore point
* only if it`s backup from master, or exclusive replica(wich connects to master)
*/
if (backup != NULL && (!current.from_replica || (current.from_replica && exclusive_backup)))
{
const char *params[1];
char name[1024];
@ -1716,7 +1759,7 @@ pg_stop_backup(pgBackup *backup)
/* Extract timeline and LSN from the result */
XLogDataFromLSN(PQgetvalue(res, 0, 0), &lsn_hi, &lsn_lo);
/* Calculate LSN */
restore_lsn = ((uint64) lsn_hi) << 32 | lsn_lo;
//restore_lsn = ((uint64) lsn_hi) << 32 | lsn_lo;
PQclear(res);
}
@ -1830,10 +1873,10 @@ pg_stop_backup(pgBackup *backup)
/* Calculate LSN */
stop_backup_lsn = ((uint64) lsn_hi) << 32 | lsn_lo;
if (!XRecOffIsValid(stop_backup_lsn))
{
stop_backup_lsn = restore_lsn;
}
//if (!XRecOffIsValid(stop_backup_lsn))
//{
// stop_backup_lsn = restore_lsn;
//}
if (!XRecOffIsValid(stop_backup_lsn))
elog(ERROR, "Invalid stop_backup_lsn value %X/%X",
@ -1939,7 +1982,7 @@ pg_stop_backup(pgBackup *backup)
stream_xlog_path[MAXPGPATH];
/* Wait for stop_lsn to be received by replica */
if (backup->from_replica)
if (current.from_replica)
wait_replica_wal_lsn(stop_backup_lsn, false);
/*
* Wait for stop_lsn to be archived or streamed.
@ -1962,10 +2005,12 @@ pg_stop_backup(pgBackup *backup)
elog(LOG, "Getting the Recovery Time from WAL");
/* iterate over WAL from stop_backup lsn to start_backup lsn */
if (!read_recovery_info(xlog_path, backup->tli, xlog_seg_size,
backup->start_lsn, backup->stop_lsn,
&backup->recovery_time, &backup->recovery_xid))
{
elog(LOG, "Failed to find Recovery Time in WAL. Forced to trust current_timestamp");
backup->recovery_time = recovery_time;
backup->recovery_xid = recovery_xid;
}
@ -2074,7 +2119,7 @@ backup_files(void *arg)
elog(ERROR, "interrupted during backup");
if (progress)
elog(LOG, "Progress: (%d/%d). Process file \"%s\"",
elog(INFO, "Progress: (%d/%d). Process file \"%s\"",
i + 1, n_backup_files_list, file->path);
/* stat file to check its current state */
@ -2168,7 +2213,7 @@ backup_files(void *arg)
file->path, file->write_size);
}
else
elog(LOG, "unexpected file type %d", buf.st_mode);
elog(WARNING, "unexpected file type %d", buf.st_mode);
}
/* Close connection */

View File

@ -555,6 +555,7 @@ extern uint64 get_system_identifier(char *pgdata);
extern uint64 get_remote_system_identifier(PGconn *conn);
extern uint32 get_data_checksum_version(bool safe);
extern uint32 get_xlog_seg_size(char *pgdata_path);
extern void set_min_recovery_point(pgFile *file, const char *backup_path, XLogRecPtr stop_backup_lsn);
extern void sanityChecks(void);
extern void time2iso(char *buf, size_t len, time_t time);

View File

@ -14,6 +14,8 @@
#include <time.h>
#include <unistd.h>
const char *
base36enc(long unsigned int value)
{
@ -100,6 +102,44 @@ digestControlFile(ControlFileData *ControlFile, char *src, size_t size)
checkControlFile(ControlFile);
}
/*
* Write ControlFile to pg_control
*/
static void
writeControlFile(ControlFileData *ControlFile, char *path)
{
int fd;
char *buffer = NULL;
#if PG_VERSION_NUM >= 100000
int ControlFileSize = PG_CONTROL_FILE_SIZE;
#else
int ControlFileSize = PG_CONTROL_SIZE;
#endif
/* copy controlFileSize */
buffer = pg_malloc(ControlFileSize);
memcpy(buffer, &ControlFile, sizeof(ControlFileData));
/* Write pg_control */
unlink(path);
fd = open(path,
O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
S_IRUSR | S_IWUSR);
if (fd < 0)
elog(ERROR, "Failed to open file: %s", path);
if (write(fd, buffer, ControlFileSize) != ControlFileSize)
elog(ERROR, "Failed to overwrite file: %s", path);
if (fsync(fd) != 0)
elog(ERROR, "Failed to fsync file: %s", path);
pg_free(buffer);
close(fd);
}
/*
* Utility shared by backup and restore to fetch the current timeline
* used by a node.
@ -250,6 +290,52 @@ get_data_checksum_version(bool safe)
return ControlFile.data_checksum_version;
}
/* MinRecoveryPoint 'as-is' is not to be trusted
* Use STOP LSN instead
*/
void
set_min_recovery_point(pgFile *file, const char *backup_path, XLogRecPtr stop_backup_lsn)
{
ControlFileData ControlFile;
char *buffer;
size_t size;
char fullpath[MAXPGPATH];
elog(LOG, "Setting minRecPoint to STOP LSN: %X/%X",
(uint32) (stop_backup_lsn >> 32),
(uint32) stop_backup_lsn);
/* Path to pg_control in backup */
snprintf(fullpath, sizeof(fullpath), "%s/%s", backup_path, XLOG_CONTROL_FILE);
/* First fetch file... */
buffer = slurpFile(backup_path, XLOG_CONTROL_FILE, &size, false);
if (buffer == NULL)
elog(ERROR, "ERROR");
digestControlFile(&ControlFile, buffer, size);
ControlFile.minRecoveryPoint = stop_backup_lsn;
/* Update checksum in pg_control header */
INIT_CRC32C(ControlFile.crc);
COMP_CRC32C(ControlFile.crc,
(char *) &ControlFile,
offsetof(ControlFileData, crc));
FIN_CRC32C(ControlFile.crc);
/* paranoia */
checkControlFile(&ControlFile);
/* update pg_control */
writeControlFile(&ControlFile, fullpath);
/* Update pg_control checksum in backup_list */
file->crc = pgFileGetCRC(fullpath, false);
pg_free(buffer);
}
/*
* Convert time_t value to ISO-8601 format string. Always set timezone offset.