mirror of
https://github.com/postgrespro/pg_probackup.git
synced 2025-03-30 23:04:31 +02:00
[Issue #281] Non-exclusive locking mode for read-only tasks, such as restore and validate
This commit is contained in:
parent
bd29752456
commit
1cf233d3a0
@ -765,7 +765,7 @@ do_backup(time_t start_time, pgSetBackupParams *set_backup_params,
|
||||
/* Create backup directory and BACKUP_CONTROL_FILE */
|
||||
if (pgBackupCreateDir(¤t))
|
||||
elog(ERROR, "Cannot create backup directory");
|
||||
if (!lock_backup(¤t, true))
|
||||
if (!lock_backup(¤t, true, true))
|
||||
elog(ERROR, "Cannot lock backup %s directory",
|
||||
base36enc(current.start_time));
|
||||
write_backup(¤t, true);
|
||||
|
472
src/catalog.c
472
src/catalog.c
@ -24,9 +24,14 @@ static pgBackup* get_oldest_backup(timelineInfo *tlinfo);
|
||||
static const char *backupModes[] = {"", "PAGE", "PTRACK", "DELTA", "FULL"};
|
||||
static pgBackup *readBackupControlFile(const char *path);
|
||||
|
||||
static bool exit_hook_registered = false;
|
||||
static bool backup_lock_exit_hook_registered = false;
|
||||
static parray *lock_files = NULL;
|
||||
|
||||
static int lock_backup_exclusive(pgBackup *backup, bool strict);
|
||||
static bool lock_backup_internal(pgBackup *backup, bool exclusive);
|
||||
static bool lock_backup_read_only(pgBackup *backup);
|
||||
static bool wait_read_only_owners(pgBackup *backup);
|
||||
|
||||
static timelineInfo *
|
||||
timelineInfoNew(TimeLineID tli)
|
||||
{
|
||||
@ -131,29 +136,140 @@ write_backup_status(pgBackup *backup, BackupStatus status,
|
||||
tmp->status = backup->status;
|
||||
tmp->root_dir = pgut_strdup(backup->root_dir);
|
||||
|
||||
/* lock backup in exclusive mode */
|
||||
if (!lock_backup(tmp, strict, true))
|
||||
elog(ERROR, "Cannot lock backup %s directory", base36enc(backup->start_time));
|
||||
|
||||
write_backup(tmp, strict);
|
||||
|
||||
pgBackupFree(tmp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Create exclusive lockfile in the backup's directory.
|
||||
* Lock backup in either exclusive or non-exclusive (read-only) mode.
|
||||
* "strict" flag allows to ignore "out of space" errors and should be
|
||||
* used only by DELETE command to free disk space on filled up
|
||||
* filesystem.
|
||||
*
|
||||
* Only read only tasks (validate, restore) are allowed to take non-exclusive locks.
|
||||
* Changing backup metadata must be done with exclusive lock.
|
||||
*
|
||||
* Only one process can hold exclusive lock at any time.
|
||||
* Exlusive lock - PID of process, holding the lock - is placed in
|
||||
* lock file: BACKUP_LOCK_FILE.
|
||||
*
|
||||
* Multiple proccess are allowed to take non-exclusive locks simultaneously.
|
||||
* Non-exclusive locks - PIDs of proccesses, holding the lock - are placed in
|
||||
* separate lock file: BACKUP_RO_LOCK_FILE.
|
||||
* When taking RO lock, a brief exclusive lock is taken.
|
||||
*
|
||||
* TODO: lock-timeout as parameter
|
||||
* TODO: we must think about more fine grain unlock mechanism - separate unlock_backup() function.
|
||||
*/
|
||||
bool
|
||||
lock_backup(pgBackup *backup, bool strict)
|
||||
lock_backup(pgBackup *backup, bool strict, bool exclusive)
|
||||
{
|
||||
char lock_file[MAXPGPATH];
|
||||
int fd;
|
||||
char buffer[MAXPGPATH * 2 + 256];
|
||||
int ntries;
|
||||
int len;
|
||||
int encoded_pid;
|
||||
pid_t my_pid,
|
||||
my_p_pid;
|
||||
int rc;
|
||||
char lock_file[MAXPGPATH];
|
||||
bool enospc_detected = false;
|
||||
|
||||
join_path_components(lock_file, backup->root_dir, BACKUP_CATALOG_PID);
|
||||
join_path_components(lock_file, backup->root_dir, BACKUP_LOCK_FILE);
|
||||
|
||||
rc = lock_backup_exclusive(backup, strict);
|
||||
|
||||
if (rc == 1)
|
||||
return false;
|
||||
else if (rc == 2)
|
||||
{
|
||||
enospc_detected = true;
|
||||
if (strict)
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* We have exclusive lock, now there are following scenarios:
|
||||
*
|
||||
* 1. If we are for exlusive lock, then we must open the RO lock file
|
||||
* and check if any of the processes listed there are still alive.
|
||||
* If some processes are alive and are not going away in lock_timeout,
|
||||
* then return false.
|
||||
*
|
||||
* 2. If we are here for non-exlusive lock, then write the pid
|
||||
* into RO lock list and release the exclusive lock.
|
||||
*/
|
||||
|
||||
if (lock_backup_internal(backup, exclusive))
|
||||
{
|
||||
if (!exclusive)
|
||||
{
|
||||
/* release exclusive lock */
|
||||
if (fio_unlink(lock_file, FIO_BACKUP_HOST) < 0)
|
||||
elog(ERROR, "Could not remove old lock file \"%s\": %s",
|
||||
lock_file, strerror(errno));
|
||||
|
||||
/* we are done */
|
||||
return true;
|
||||
}
|
||||
|
||||
/* When locking backup in lax exclusive mode,
|
||||
* we should wait until all RO locks owners are gone.
|
||||
*/
|
||||
if (!strict && enospc_detected)
|
||||
{
|
||||
/* We are in lax mode and EONSPC was encountered: once again try to grab exclusive lock,
|
||||
* because there is a chance that lock_backup_read_only may have freed some space on filesystem,
|
||||
* thanks to unlinking of BACKUP_RO_LOCK_FILE.
|
||||
* If somebody concurrently acquired exclusive lock first, then we should give up.
|
||||
*/
|
||||
if (lock_backup_exclusive(backup, strict) == 1)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Arrange to unlink the lock file(s) at proc_exit.
|
||||
*/
|
||||
if (!backup_lock_exit_hook_registered)
|
||||
{
|
||||
atexit(unlink_lock_atexit);
|
||||
backup_lock_exit_hook_registered = true;
|
||||
}
|
||||
|
||||
/* Use parray so that the lock files are unlinked in a loop */
|
||||
if (lock_files == NULL)
|
||||
lock_files = parray_new();
|
||||
parray_append(lock_files, pgut_strdup(lock_file));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Lock backup in exclusive mode
|
||||
* Result codes:
|
||||
* 0 Success
|
||||
* 1 Failed to acquire lock in lock_timeout time
|
||||
* 2 Failed to acquire lock due to ENOSPC
|
||||
*/
|
||||
int
|
||||
lock_backup_exclusive(pgBackup *backup, bool strict)
|
||||
{
|
||||
char lock_file[MAXPGPATH];
|
||||
int fd = 0;
|
||||
char buffer[MAXPGPATH * 2 + 256];
|
||||
int ntries = LOCK_TIMEOUT;
|
||||
int log_freq = ntries / 5;
|
||||
int len;
|
||||
int encoded_pid;
|
||||
pid_t my_p_pid;
|
||||
|
||||
join_path_components(lock_file, backup->root_dir, BACKUP_LOCK_FILE);
|
||||
|
||||
/*
|
||||
* TODO: is this stuff with ppid below is relevant for us ?
|
||||
*
|
||||
* If the PID in the lockfile is our own PID or our parent's or
|
||||
* grandparent's PID, then the file must be stale (probably left over from
|
||||
* a previous system boot cycle). We need to check this because of the
|
||||
@ -171,7 +287,6 @@ lock_backup(pgBackup *backup, bool strict)
|
||||
* would surely never launch a competing postmaster or pg_ctl process
|
||||
* directly.
|
||||
*/
|
||||
my_pid = getpid();
|
||||
#ifndef WIN32
|
||||
my_p_pid = getppid();
|
||||
#else
|
||||
@ -188,8 +303,14 @@ lock_backup(pgBackup *backup, bool strict)
|
||||
* (for example, a non-writable $backup_instance_path directory might cause a failure
|
||||
* that won't go away). 100 tries seems like plenty.
|
||||
*/
|
||||
for (ntries = 0;; ntries++)
|
||||
do
|
||||
{
|
||||
FILE *fp_out = NULL;
|
||||
|
||||
if (interrupted)
|
||||
elog(ERROR, "Interrupted while locking backup %s",
|
||||
base36enc(backup->start_time));
|
||||
|
||||
/*
|
||||
* Try to create the lock file --- O_EXCL makes this atomic.
|
||||
*
|
||||
@ -202,8 +323,11 @@ lock_backup(pgBackup *backup, bool strict)
|
||||
|
||||
/*
|
||||
* Couldn't create the pid file. Probably it already exists.
|
||||
* If file already exists or we have some permission problem (???),
|
||||
* then retry;
|
||||
*/
|
||||
if ((errno != EEXIST && errno != EACCES) || ntries > 100)
|
||||
// if ((errno != EEXIST && errno != EACCES))
|
||||
if (errno != EEXIST)
|
||||
elog(ERROR, "Could not create lock file \"%s\": %s",
|
||||
lock_file, strerror(errno));
|
||||
|
||||
@ -211,28 +335,38 @@ lock_backup(pgBackup *backup, bool strict)
|
||||
* Read the file to get the old owner's PID. Note race condition
|
||||
* here: file might have been deleted since we tried to create it.
|
||||
*/
|
||||
fd = fio_open(lock_file, O_RDONLY, FIO_BACKUP_HOST);
|
||||
if (fd < 0)
|
||||
|
||||
fp_out = fopen(lock_file, "r");
|
||||
if (fp_out == NULL)
|
||||
{
|
||||
if (errno == ENOENT)
|
||||
continue; /* race condition; try again */
|
||||
elog(ERROR, "Could not open lock file \"%s\": %s",
|
||||
lock_file, strerror(errno));
|
||||
continue; /* race condition; try again */
|
||||
elog(ERROR, "Cannot open lock file \"%s\": %s", lock_file, strerror(errno));
|
||||
}
|
||||
if ((len = fio_read(fd, buffer, sizeof(buffer) - 1)) < 0)
|
||||
elog(ERROR, "Could not read lock file \"%s\": %s",
|
||||
lock_file, strerror(errno));
|
||||
fio_close(fd);
|
||||
|
||||
len = fread(buffer, 1, sizeof(buffer) - 1, fp_out);
|
||||
if (ferror(fp_out))
|
||||
elog(ERROR, "Cannot read from lock file: \"%s\"", lock_file);
|
||||
fclose(fp_out);
|
||||
|
||||
/*
|
||||
* It should be possible only as a result of system crash,
|
||||
* so its hypothetical owner should be dead by now
|
||||
*/
|
||||
if (len == 0)
|
||||
elog(ERROR, "Lock file \"%s\" is empty", lock_file);
|
||||
{
|
||||
elog(WARNING, "Lock file \"%s\" is empty", lock_file);
|
||||
goto grab_lock;
|
||||
}
|
||||
|
||||
buffer[len] = '\0';
|
||||
encoded_pid = atoi(buffer);
|
||||
|
||||
if (encoded_pid <= 0)
|
||||
elog(ERROR, "Bogus data in lock file \"%s\": \"%s\"",
|
||||
{
|
||||
elog(WARNING, "Bogus data in lock file \"%s\": \"%s\"",
|
||||
lock_file, buffer);
|
||||
goto grab_lock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if the other process still exists
|
||||
@ -247,9 +381,19 @@ lock_backup(pgBackup *backup, bool strict)
|
||||
{
|
||||
if (kill(encoded_pid, 0) == 0)
|
||||
{
|
||||
elog(WARNING, "Process %d is using backup %s and still is running",
|
||||
encoded_pid, base36enc(backup->start_time));
|
||||
return false;
|
||||
/* complain every fifth interval */
|
||||
if ((ntries % log_freq) == 0)
|
||||
{
|
||||
elog(WARNING, "Process %d is using backup %s, and is still running",
|
||||
encoded_pid, base36enc(backup->start_time));
|
||||
|
||||
elog(WARNING, "Waiting %u seconds on lock for backup %s", ntries, base36enc(backup->start_time));
|
||||
}
|
||||
|
||||
sleep(1);
|
||||
|
||||
/* try again */
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -262,15 +406,25 @@ lock_backup(pgBackup *backup, bool strict)
|
||||
}
|
||||
}
|
||||
|
||||
grab_lock:
|
||||
/*
|
||||
* Looks like nobody's home. Unlink the file and try again to create
|
||||
* it. Need a loop because of possible race condition against other
|
||||
* would-be creators.
|
||||
*/
|
||||
if (fio_unlink(lock_file, FIO_BACKUP_HOST) < 0)
|
||||
{
|
||||
if (errno == ENOENT)
|
||||
continue; /* race condition, again */
|
||||
elog(ERROR, "Could not remove old lock file \"%s\": %s",
|
||||
lock_file, strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
} while (ntries--);
|
||||
|
||||
/* Failed to acquire exclusive lock in time */
|
||||
if (fd <= 0)
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* Successfully created the file, now fill it.
|
||||
@ -284,52 +438,215 @@ lock_backup(pgBackup *backup, bool strict)
|
||||
|
||||
fio_close(fd);
|
||||
fio_unlink(lock_file, FIO_BACKUP_HOST);
|
||||
/* if write didn't set errno, assume problem is no disk space */
|
||||
errno = save_errno ? save_errno : ENOSPC;
|
||||
|
||||
/* In lax mode if we failed to grab lock because of 'out of space error',
|
||||
* then treat backup as locked.
|
||||
* Only delete command should be run in lax mode.
|
||||
*/
|
||||
if (!strict && errno == ENOSPC)
|
||||
return true;
|
||||
|
||||
elog(ERROR, "Could not write lock file \"%s\": %s",
|
||||
lock_file, strerror(errno));
|
||||
if (!strict && save_errno == ENOSPC)
|
||||
return 2;
|
||||
else
|
||||
elog(ERROR, "Could not write lock file \"%s\": %s",
|
||||
lock_file, strerror(save_errno));
|
||||
}
|
||||
|
||||
if (fio_flush(fd) != 0)
|
||||
{
|
||||
int save_errno = errno;
|
||||
int save_errno = errno;
|
||||
|
||||
fio_close(fd);
|
||||
fio_unlink(lock_file, FIO_BACKUP_HOST);
|
||||
errno = save_errno;
|
||||
elog(ERROR, "Could not write lock file \"%s\": %s",
|
||||
lock_file, strerror(errno));
|
||||
|
||||
/* In lax mode if we failed to grab lock because of 'out of space error',
|
||||
* then treat backup as locked.
|
||||
* Only delete command should be run in lax mode.
|
||||
*/
|
||||
if (!strict && save_errno == ENOSPC)
|
||||
return 2;
|
||||
else
|
||||
elog(ERROR, "Could not flush lock file \"%s\": %s",
|
||||
lock_file, strerror(save_errno));
|
||||
}
|
||||
|
||||
if (fio_close(fd) != 0)
|
||||
{
|
||||
int save_errno = errno;
|
||||
|
||||
fio_unlink(lock_file, FIO_BACKUP_HOST);
|
||||
errno = save_errno;
|
||||
elog(ERROR, "Could not write lock file \"%s\": %s",
|
||||
lock_file, strerror(errno));
|
||||
|
||||
if (!strict && errno == ENOSPC)
|
||||
return 2;
|
||||
else
|
||||
elog(ERROR, "Could not close lock file \"%s\": %s",
|
||||
lock_file, strerror(save_errno));
|
||||
}
|
||||
|
||||
/*
|
||||
* Arrange to unlink the lock file(s) at proc_exit.
|
||||
*/
|
||||
if (!exit_hook_registered)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Wait until all read-only lock owners are gone */
|
||||
bool
|
||||
wait_read_only_owners(pgBackup *backup)
|
||||
{
|
||||
FILE *fp = NULL;
|
||||
char buffer[256];
|
||||
pid_t encoded_pid;
|
||||
int ntries = LOCK_TIMEOUT;
|
||||
int log_freq = ntries / 5;
|
||||
char lock_file[MAXPGPATH];
|
||||
|
||||
join_path_components(lock_file, backup->root_dir, BACKUP_RO_LOCK_FILE);
|
||||
|
||||
fp = fopen(lock_file, "r");
|
||||
if (fp == NULL && errno != ENOENT)
|
||||
elog(ERROR, "Cannot open lock file \"%s\": %s", lock_file, strerror(errno));
|
||||
|
||||
/* iterate over pids in lock file */
|
||||
while (fp && fgets(buffer, sizeof(buffer), fp))
|
||||
{
|
||||
encoded_pid = atoi(buffer);
|
||||
if (encoded_pid <= 0)
|
||||
{
|
||||
elog(WARNING, "Bogus data in lock file \"%s\": \"%s\"", lock_file, buffer);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* wait until RO lock owners go away */
|
||||
do
|
||||
{
|
||||
if (interrupted)
|
||||
elog(ERROR, "Interrupted while locking backup %s",
|
||||
base36enc(backup->start_time));
|
||||
|
||||
if (encoded_pid != my_pid)
|
||||
{
|
||||
if (kill(encoded_pid, 0) == 0)
|
||||
{
|
||||
if ((ntries % log_freq) == 0)
|
||||
{
|
||||
elog(WARNING, "Process %d is using backup %s in read only mode, and is still running",
|
||||
encoded_pid, base36enc(backup->start_time));
|
||||
|
||||
elog(WARNING, "Waiting %u seconds on lock for backup %s", ntries,
|
||||
base36enc(backup->start_time));
|
||||
}
|
||||
|
||||
sleep(1);
|
||||
|
||||
/* try again */
|
||||
continue;
|
||||
}
|
||||
else if (errno != ESRCH)
|
||||
elog(ERROR, "Failed to send signal 0 to a process %d: %s",
|
||||
encoded_pid, strerror(errno));
|
||||
}
|
||||
|
||||
/* locker is dead */
|
||||
break;
|
||||
|
||||
} while (ntries--);
|
||||
|
||||
if (ntries <= 0)
|
||||
{
|
||||
elog(WARNING, "Cannot to lock backup %s in exclusive mode, because process %u owns read-only lock",
|
||||
base36enc(backup->start_time), encoded_pid);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (fp && ferror(fp))
|
||||
elog(ERROR, "Cannot read from lock file: \"%s\"", lock_file);
|
||||
|
||||
if (fp)
|
||||
fclose(fp);
|
||||
|
||||
/* unlink RO lock list */
|
||||
fio_unlink(lock_file, FIO_BACKUP_HOST);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
lock_backup_internal(pgBackup *backup, bool exclusive)
|
||||
{
|
||||
if (exclusive)
|
||||
return wait_read_only_owners(backup);
|
||||
else
|
||||
return lock_backup_read_only(backup);
|
||||
}
|
||||
|
||||
bool
|
||||
lock_backup_read_only(pgBackup *backup)
|
||||
{
|
||||
FILE *fp_in = NULL;
|
||||
FILE *fp_out = NULL;
|
||||
char buf_in[256];
|
||||
pid_t encoded_pid;
|
||||
char lock_file[MAXPGPATH];
|
||||
|
||||
char buffer[8192]; /*TODO: should be enough, but maybe malloc+realloc is better ? */
|
||||
char lock_file_tmp[MAXPGPATH];
|
||||
int buffer_len = 0;
|
||||
|
||||
join_path_components(lock_file, backup->root_dir, BACKUP_RO_LOCK_FILE);
|
||||
snprintf(lock_file_tmp, MAXPGPATH, "%s%s", lock_file, "tmp");
|
||||
|
||||
/* open already existing lock files */
|
||||
fp_in = fopen(lock_file, "r");
|
||||
if (fp_in == NULL && errno != ENOENT)
|
||||
elog(ERROR, "Cannot open lock file \"%s\": %s", lock_file, strerror(errno));
|
||||
|
||||
/* read PIDs of owners */
|
||||
while (fp_in && fgets(buf_in, sizeof(buf_in), fp_in))
|
||||
{
|
||||
atexit(unlink_lock_atexit);
|
||||
exit_hook_registered = true;
|
||||
encoded_pid = atoi(buf_in);
|
||||
if (encoded_pid <= 0)
|
||||
{
|
||||
elog(WARNING, "Bogus data in lock file \"%s\": \"%s\"", lock_file, buf_in);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (encoded_pid != my_pid)
|
||||
{
|
||||
if (kill(encoded_pid, 0) == 0)
|
||||
{
|
||||
/*
|
||||
* Somebody is still using this backup in RO mode,
|
||||
* copy this pid into a new file.
|
||||
*/
|
||||
buffer_len += snprintf(buffer+buffer_len, 4096, "%u\n", encoded_pid);
|
||||
}
|
||||
else if (errno != ESRCH)
|
||||
elog(ERROR, "Failed to send signal 0 to a process %d: %s",
|
||||
encoded_pid, strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
if (fp_in)
|
||||
{
|
||||
if (ferror(fp_in))
|
||||
elog(ERROR, "Cannot read from lock file: \"%s\"", lock_file);
|
||||
fclose(fp_in);
|
||||
}
|
||||
|
||||
/* Use parray so that the lock files are unlinked in a loop */
|
||||
if (lock_files == NULL)
|
||||
lock_files = parray_new();
|
||||
parray_append(lock_files, pgut_strdup(lock_file));
|
||||
fp_out = fopen(lock_file_tmp, "w");
|
||||
if (fp_out == NULL)
|
||||
elog(ERROR, "Cannot open temp lock file \"%s\": %s", lock_file_tmp, strerror(errno));
|
||||
|
||||
/* add my own pid */
|
||||
buffer_len += snprintf(buffer+buffer_len, sizeof(buffer), "%u\n", my_pid);
|
||||
|
||||
/* write out the collected PIDs to temp lock file */
|
||||
fwrite(buffer, 1, buffer_len, fp_out);
|
||||
|
||||
if (ferror(fp_out))
|
||||
elog(ERROR, "Cannot write to lock file: \"%s\"", lock_file_tmp);
|
||||
|
||||
if (fclose(fp_out) != 0)
|
||||
elog(ERROR, "Cannot close temp lock file \"%s\": %s", lock_file_tmp, strerror(errno));
|
||||
|
||||
if (rename(lock_file_tmp, lock_file) < 0)
|
||||
elog(ERROR, "Cannot rename file \"%s\" to \"%s\": %s",
|
||||
lock_file_tmp, lock_file, strerror(errno));
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -570,7 +887,7 @@ get_backup_filelist(pgBackup *backup, bool strict)
|
||||
* Lock list of backups. Function goes in backward direction.
|
||||
*/
|
||||
void
|
||||
catalog_lock_backup_list(parray *backup_list, int from_idx, int to_idx, bool strict)
|
||||
catalog_lock_backup_list(parray *backup_list, int from_idx, int to_idx, bool strict, bool exclusive)
|
||||
{
|
||||
int start_idx,
|
||||
end_idx;
|
||||
@ -585,7 +902,7 @@ catalog_lock_backup_list(parray *backup_list, int from_idx, int to_idx, bool str
|
||||
for (i = start_idx; i >= end_idx; i--)
|
||||
{
|
||||
pgBackup *backup = (pgBackup *) parray_get(backup_list, i);
|
||||
if (!lock_backup(backup, strict))
|
||||
if (!lock_backup(backup, strict, exclusive))
|
||||
elog(ERROR, "Cannot lock backup %s directory",
|
||||
base36enc(backup->start_time));
|
||||
}
|
||||
@ -1785,21 +2102,23 @@ pgBackupWriteControl(FILE *out, pgBackup *backup)
|
||||
|
||||
/*
|
||||
* Save the backup content into BACKUP_CONTROL_FILE.
|
||||
* TODO: honor the strict flag
|
||||
* Flag strict allows to ignore "out of space" error
|
||||
* when attempting to lock backup. Only delete is allowed
|
||||
* to use this functionality.
|
||||
*/
|
||||
void
|
||||
write_backup(pgBackup *backup, bool strict)
|
||||
{
|
||||
FILE *fp = NULL;
|
||||
FILE *fp_out = NULL;
|
||||
char path[MAXPGPATH];
|
||||
char path_temp[MAXPGPATH];
|
||||
char buf[4096];
|
||||
char buf[8192];
|
||||
|
||||
join_path_components(path, backup->root_dir, BACKUP_CONTROL_FILE);
|
||||
snprintf(path_temp, sizeof(path_temp), "%s.tmp", path);
|
||||
|
||||
fp = fopen(path_temp, PG_BINARY_W);
|
||||
if (fp == NULL)
|
||||
fp_out = fopen(path_temp, PG_BINARY_W);
|
||||
if (fp_out == NULL)
|
||||
elog(ERROR, "Cannot open control file \"%s\": %s",
|
||||
path_temp, strerror(errno));
|
||||
|
||||
@ -1807,19 +2126,34 @@ write_backup(pgBackup *backup, bool strict)
|
||||
elog(ERROR, "Cannot change mode of \"%s\": %s", path_temp,
|
||||
strerror(errno));
|
||||
|
||||
setvbuf(fp, buf, _IOFBF, sizeof(buf));
|
||||
setvbuf(fp_out, buf, _IOFBF, sizeof(buf));
|
||||
|
||||
pgBackupWriteControl(fp, backup);
|
||||
pgBackupWriteControl(fp_out, backup);
|
||||
|
||||
if (fflush(fp) != 0)
|
||||
elog(ERROR, "Cannot flush control file \"%s\": %s",
|
||||
path_temp, strerror(errno));
|
||||
/* Ignore 'out of space' error in lax mode */
|
||||
if (fflush(fp_out) != 0)
|
||||
{
|
||||
int elevel = ERROR;
|
||||
int save_errno = errno;
|
||||
|
||||
if (fsync(fileno(fp)) < 0)
|
||||
if (!strict && (errno == ENOSPC))
|
||||
elevel = WARNING;
|
||||
|
||||
elog(elevel, "Cannot flush control file \"%s\": %s",
|
||||
path_temp, strerror(save_errno));
|
||||
|
||||
if (!strict && (save_errno == ENOSPC))
|
||||
{
|
||||
fclose(fp_out);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (fsync(fileno(fp_out)) < 0)
|
||||
elog(ERROR, "Cannot sync control file \"%s\": %s",
|
||||
path_temp, strerror(errno));
|
||||
|
||||
if (fclose(fp) != 0)
|
||||
if (fclose(fp_out) != 0)
|
||||
elog(ERROR, "Cannot close control file \"%s\": %s",
|
||||
path_temp, strerror(errno));
|
||||
|
||||
|
10
src/delete.c
10
src/delete.c
@ -89,7 +89,7 @@ do_delete(time_t backup_id)
|
||||
if (!dry_run)
|
||||
{
|
||||
/* Lock marked for delete backups */
|
||||
catalog_lock_backup_list(delete_list, parray_num(delete_list) - 1, 0, false);
|
||||
catalog_lock_backup_list(delete_list, parray_num(delete_list) - 1, 0, false, true);
|
||||
|
||||
/* Delete backups from the end of list */
|
||||
for (i = (int) parray_num(delete_list) - 1; i >= 0; i--)
|
||||
@ -513,7 +513,7 @@ do_retention_merge(parray *backup_list, parray *to_keep_list, parray *to_purge_l
|
||||
parray_rm(to_purge_list, full_backup, pgBackupCompareId);
|
||||
|
||||
/* Lock merge chain */
|
||||
catalog_lock_backup_list(merge_list, parray_num(merge_list) - 1, 0, true);
|
||||
catalog_lock_backup_list(merge_list, parray_num(merge_list) - 1, 0, true, true);
|
||||
|
||||
/* Consider this extreme case */
|
||||
// PAGEa1 PAGEb1 both valid
|
||||
@ -630,7 +630,7 @@ do_retention_purge(parray *to_keep_list, parray *to_purge_list)
|
||||
continue;
|
||||
|
||||
/* Actual purge */
|
||||
if (!lock_backup(delete_backup, false))
|
||||
if (!lock_backup(delete_backup, false, true))
|
||||
{
|
||||
/* If the backup still is used, do not interrupt and go to the next */
|
||||
elog(WARNING, "Cannot lock backup %s directory, skip purging",
|
||||
@ -975,7 +975,7 @@ do_delete_instance(void)
|
||||
/* Delete all backups. */
|
||||
backup_list = catalog_get_backup_list(instance_name, INVALID_BACKUP_ID);
|
||||
|
||||
catalog_lock_backup_list(backup_list, 0, parray_num(backup_list) - 1, true);
|
||||
catalog_lock_backup_list(backup_list, 0, parray_num(backup_list) - 1, true, true);
|
||||
|
||||
for (i = 0; i < parray_num(backup_list); i++)
|
||||
{
|
||||
@ -1081,7 +1081,7 @@ do_delete_status(InstanceConfig *instance_config, const char *status)
|
||||
if (backup->stream)
|
||||
size_to_delete += backup->wal_bytes;
|
||||
|
||||
if (!dry_run && lock_backup(backup, false))
|
||||
if (!dry_run && lock_backup(backup, false, true))
|
||||
delete_backup_files(backup);
|
||||
|
||||
n_deleted++;
|
||||
|
@ -400,7 +400,7 @@ do_merge(time_t backup_id)
|
||||
parray_append(merge_list, full_backup);
|
||||
|
||||
/* Lock merge chain */
|
||||
catalog_lock_backup_list(merge_list, parray_num(merge_list) - 1, 0, true);
|
||||
catalog_lock_backup_list(merge_list, parray_num(merge_list) - 1, 0, true, true);
|
||||
|
||||
/* do actual merge */
|
||||
merge_chain(merge_list, full_backup, dest_backup);
|
||||
|
@ -66,7 +66,8 @@ extern const char *PROGRAM_EMAIL;
|
||||
#define PG_GLOBAL_DIR "global"
|
||||
#define BACKUP_CONTROL_FILE "backup.control"
|
||||
#define BACKUP_CATALOG_CONF_FILE "pg_probackup.conf"
|
||||
#define BACKUP_CATALOG_PID "backup.pid"
|
||||
#define BACKUP_LOCK_FILE "backup.pid"
|
||||
#define BACKUP_RO_LOCK_FILE "backup_ro.pid"
|
||||
#define DATABASE_FILE_LIST "backup_content.control"
|
||||
#define PG_BACKUP_LABEL_FILE "backup_label"
|
||||
#define PG_TABLESPACE_MAP_FILE "tablespace_map"
|
||||
@ -78,6 +79,7 @@ extern const char *PROGRAM_EMAIL;
|
||||
/* Timeout defaults */
|
||||
#define ARCHIVE_TIMEOUT_DEFAULT 300
|
||||
#define REPLICA_TIMEOUT_DEFAULT 300
|
||||
#define LOCK_TIMEOUT 30
|
||||
|
||||
/* Directory/File permission */
|
||||
#define DIR_PERMISSION (0700)
|
||||
@ -869,14 +871,14 @@ extern void write_backup(pgBackup *backup, bool strict);
|
||||
extern void write_backup_status(pgBackup *backup, BackupStatus status,
|
||||
const char *instance_name, bool strict);
|
||||
extern void write_backup_data_bytes(pgBackup *backup);
|
||||
extern bool lock_backup(pgBackup *backup, bool strict);
|
||||
extern bool lock_backup(pgBackup *backup, bool strict, bool exclusive);
|
||||
|
||||
extern const char *pgBackupGetBackupMode(pgBackup *backup);
|
||||
|
||||
extern parray *catalog_get_instance_list(void);
|
||||
extern parray *catalog_get_backup_list(const char *instance_name, time_t requested_backup_id);
|
||||
extern void catalog_lock_backup_list(parray *backup_list, int from_idx,
|
||||
int to_idx, bool strict);
|
||||
int to_idx, bool strict, bool exclusive);
|
||||
extern pgBackup *catalog_get_last_data_backup(parray *backup_list,
|
||||
TimeLineID tli,
|
||||
time_t current_start_time);
|
||||
|
@ -496,18 +496,11 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
|
||||
{
|
||||
tmp_backup = (pgBackup *) parray_get(parent_chain, i);
|
||||
|
||||
/* Do not interrupt, validate the next backup */
|
||||
if (!lock_backup(tmp_backup, true))
|
||||
/* lock every backup in chain in read-only mode */
|
||||
if (!lock_backup(tmp_backup, true, false))
|
||||
{
|
||||
if (params->is_restore)
|
||||
elog(ERROR, "Cannot lock backup %s directory",
|
||||
base36enc(tmp_backup->start_time));
|
||||
else
|
||||
{
|
||||
elog(WARNING, "Cannot lock backup %s directory, skip validation",
|
||||
base36enc(tmp_backup->start_time));
|
||||
continue;
|
||||
}
|
||||
elog(ERROR, "Cannot lock backup %s directory",
|
||||
base36enc(tmp_backup->start_time));
|
||||
}
|
||||
|
||||
/* validate datafiles only */
|
||||
@ -660,7 +653,7 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
|
||||
{
|
||||
pgBackup *backup = (pgBackup *) parray_get(parent_chain, i);
|
||||
|
||||
if (!lock_backup(backup, true))
|
||||
if (!lock_backup(backup, true, false))
|
||||
elog(ERROR, "Cannot lock backup %s", base36enc(backup->start_time));
|
||||
|
||||
if (backup->status != BACKUP_STATUS_OK &&
|
||||
|
@ -192,7 +192,7 @@ pgBackupValidate(pgBackup *backup, pgRestoreParams *params)
|
||||
backup->status = BACKUP_STATUS_CORRUPT;
|
||||
|
||||
write_backup_status(backup, corrupted ? BACKUP_STATUS_CORRUPT :
|
||||
BACKUP_STATUS_OK, instance_name, true);
|
||||
BACKUP_STATUS_OK, instance_name, true);
|
||||
|
||||
if (corrupted)
|
||||
elog(WARNING, "Backup %s data files are corrupted", base36enc(backup->start_time));
|
||||
@ -570,7 +570,7 @@ do_validate_instance(void)
|
||||
base_full_backup = current_backup;
|
||||
|
||||
/* Do not interrupt, validate the next backup */
|
||||
if (!lock_backup(current_backup, true))
|
||||
if (!lock_backup(current_backup, true, false))
|
||||
{
|
||||
elog(WARNING, "Cannot lock backup %s directory, skip validation",
|
||||
base36enc(current_backup->start_time));
|
||||
@ -665,7 +665,7 @@ do_validate_instance(void)
|
||||
if (backup->status == BACKUP_STATUS_ORPHAN)
|
||||
{
|
||||
/* Do not interrupt, validate the next backup */
|
||||
if (!lock_backup(backup, true))
|
||||
if (!lock_backup(backup, true, false))
|
||||
{
|
||||
elog(WARNING, "Cannot lock backup %s directory, skip validation",
|
||||
base36enc(backup->start_time));
|
||||
|
@ -212,9 +212,7 @@ class BackupTest(ProbackupTest, unittest.TestCase):
|
||||
except ProbackupException as e:
|
||||
self.assertTrue(
|
||||
"INFO: Validate backups of the instance 'node'" in e.message and
|
||||
"WARNING: Backup file".format(
|
||||
file) in e.message and
|
||||
"is not found".format(file) in e.message and
|
||||
"WARNING: Backup file" in e.message and "is not found" in e.message and
|
||||
"WARNING: Backup {0} data files are corrupted".format(
|
||||
backup_id) in e.message and
|
||||
"WARNING: Some backups are not valid" in e.message,
|
||||
|
@ -210,6 +210,7 @@ class CheckdbTest(ProbackupTest, unittest.TestCase):
|
||||
log_file_content)
|
||||
|
||||
# Clean after yourself
|
||||
gdb.kill()
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
# @unittest.skip("skip")
|
||||
@ -495,6 +496,7 @@ class CheckdbTest(ProbackupTest, unittest.TestCase):
|
||||
self.assertNotIn('connection to client lost', output)
|
||||
|
||||
# Clean after yourself
|
||||
gdb.kill()
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
# @unittest.skip("skip")
|
||||
@ -588,7 +590,7 @@ class CheckdbTest(ProbackupTest, unittest.TestCase):
|
||||
'GRANT EXECUTE ON FUNCTION pg_catalog.charne("char", "char") TO backup; '
|
||||
'GRANT EXECUTE ON FUNCTION pg_catalog.pg_is_in_recovery() TO backup; '
|
||||
'GRANT EXECUTE ON FUNCTION pg_catalog.pg_control_system() TO backup; '
|
||||
'GRANT EXECUTE ON FUNCTION bt_index_check(regclass) TO backup; '
|
||||
# 'GRANT EXECUTE ON FUNCTION bt_index_check(regclass) TO backup; '
|
||||
'GRANT EXECUTE ON FUNCTION bt_index_check(regclass, bool) TO backup;'
|
||||
)
|
||||
# >= 10
|
||||
@ -655,3 +657,6 @@ class CheckdbTest(ProbackupTest, unittest.TestCase):
|
||||
e.message,
|
||||
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
|
||||
repr(e.message), self.cmd))
|
||||
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
@ -433,11 +433,11 @@ class DeltaTest(ProbackupTest, unittest.TestCase):
|
||||
node.safe_psql("postgres", "checkpoint")
|
||||
|
||||
# GET LOGICAL CONTENT FROM NODE
|
||||
result = node.safe_psql("postgres", "select * from pgbench_accounts")
|
||||
result = node.safe_psql("postgres", "select count(*) from pgbench_accounts")
|
||||
# delta BACKUP
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node, backup_type='delta',
|
||||
options=['--stream'])
|
||||
backup_dir, 'node', node,
|
||||
backup_type='delta', options=['--stream'])
|
||||
# GET PHYSICAL CONTENT FROM NODE
|
||||
pgdata = self.pgdata_content(node.data_dir)
|
||||
|
||||
@ -450,8 +450,10 @@ class DeltaTest(ProbackupTest, unittest.TestCase):
|
||||
restored_node, 'somedata_restored')
|
||||
|
||||
self.restore_node(
|
||||
backup_dir, 'node', restored_node, options=[
|
||||
"-j", "4", "-T", "{0}={1}".format(tblspc_path, tblspc_path_new)])
|
||||
backup_dir, 'node', restored_node,
|
||||
options=[
|
||||
"-j", "4", "-T", "{0}={1}".format(
|
||||
tblspc_path, tblspc_path_new)])
|
||||
|
||||
# GET PHYSICAL CONTENT FROM NODE_RESTORED
|
||||
pgdata_restored = self.pgdata_content(restored_node.data_dir)
|
||||
@ -461,7 +463,8 @@ class DeltaTest(ProbackupTest, unittest.TestCase):
|
||||
restored_node.slow_start()
|
||||
|
||||
result_new = restored_node.safe_psql(
|
||||
"postgres", "select * from pgbench_accounts")
|
||||
"postgres",
|
||||
"select count(*) from pgbench_accounts")
|
||||
|
||||
# COMPARE RESTORED FILES
|
||||
self.assertEqual(result, result_new, 'data is lost')
|
||||
|
@ -1102,7 +1102,7 @@ class ProbackupTest(object):
|
||||
|
||||
def delete_pb(
|
||||
self, backup_dir, instance,
|
||||
backup_id=None, options=[], old_binary=False):
|
||||
backup_id=None, options=[], old_binary=False, gdb=False):
|
||||
cmd_list = [
|
||||
'delete',
|
||||
'-B', backup_dir
|
||||
@ -1112,7 +1112,7 @@ class ProbackupTest(object):
|
||||
if backup_id:
|
||||
cmd_list += ['-i', backup_id]
|
||||
|
||||
return self.run_pb(cmd_list + options, old_binary=old_binary)
|
||||
return self.run_pb(cmd_list + options, old_binary=old_binary, gdb=gdb)
|
||||
|
||||
def delete_expired(
|
||||
self, backup_dir, instance, options=[], old_binary=False):
|
||||
|
@ -142,6 +142,7 @@ class IncrRestoreTest(ProbackupTest, unittest.TestCase):
|
||||
fname = self.id().split('.')[3]
|
||||
node = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'node'),
|
||||
set_replication=True,
|
||||
initdb_params=['--data-checksums'])
|
||||
|
||||
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||
@ -184,6 +185,7 @@ class IncrRestoreTest(ProbackupTest, unittest.TestCase):
|
||||
node = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'node'),
|
||||
initdb_params=['--data-checksums'],
|
||||
set_replication=True,
|
||||
pg_options={'autovacuum': 'off'})
|
||||
|
||||
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||
@ -245,6 +247,7 @@ class IncrRestoreTest(ProbackupTest, unittest.TestCase):
|
||||
node = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'node'),
|
||||
initdb_params=['--data-checksums'],
|
||||
set_replication=True,
|
||||
pg_options={'autovacuum': 'off'})
|
||||
|
||||
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||
@ -1320,7 +1323,7 @@ class IncrRestoreTest(ProbackupTest, unittest.TestCase):
|
||||
self.assertEqual(
|
||||
node.safe_psql(
|
||||
'postgres',
|
||||
'select count(*) from t1').rstrip(),
|
||||
'select count(*) from t1').decode('utf-8').rstrip(),
|
||||
'1')
|
||||
|
||||
# Clean after yourself
|
||||
@ -1488,7 +1491,7 @@ class IncrRestoreTest(ProbackupTest, unittest.TestCase):
|
||||
self.assertEqual(
|
||||
node.safe_psql(
|
||||
'postgres',
|
||||
'select count(*) from t1').rstrip(),
|
||||
'select count(*) from t1').decode('utf-8').rstrip(),
|
||||
'1')
|
||||
|
||||
# Clean after yourself
|
||||
|
@ -50,7 +50,7 @@ class LockingTest(ProbackupTest, unittest.TestCase):
|
||||
backup_id = self.show_pb(backup_dir, 'node')[1]['id']
|
||||
|
||||
self.assertIn(
|
||||
"is using backup {0} and still is running".format(backup_id),
|
||||
"is using backup {0}, and is still running".format(backup_id),
|
||||
validate_output,
|
||||
'\n Unexpected Validate Output: {0}\n'.format(repr(validate_output)))
|
||||
|
||||
@ -61,7 +61,8 @@ class LockingTest(ProbackupTest, unittest.TestCase):
|
||||
'RUNNING', self.show_pb(backup_dir, 'node')[1]['status'])
|
||||
|
||||
# Clean after yourself
|
||||
# self.del_test_dir(module_name, fname)
|
||||
gdb.kill()
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
def test_locking_running_validate_2(self):
|
||||
"""
|
||||
@ -129,6 +130,7 @@ class LockingTest(ProbackupTest, unittest.TestCase):
|
||||
'ERROR', self.show_pb(backup_dir, 'node')[1]['status'])
|
||||
|
||||
# Clean after yourself
|
||||
gdb.kill()
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
def test_locking_running_validate_2_specific_id(self):
|
||||
@ -227,6 +229,7 @@ class LockingTest(ProbackupTest, unittest.TestCase):
|
||||
repr(e.message), self.cmd))
|
||||
|
||||
# Clean after yourself
|
||||
gdb.kill()
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
def test_locking_running_3(self):
|
||||
@ -296,6 +299,7 @@ class LockingTest(ProbackupTest, unittest.TestCase):
|
||||
'ERROR', self.show_pb(backup_dir, 'node')[1]['status'])
|
||||
|
||||
# Clean after yourself
|
||||
gdb.kill()
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
def test_locking_restore_locked(self):
|
||||
@ -303,8 +307,8 @@ class LockingTest(ProbackupTest, unittest.TestCase):
|
||||
make node, take full backup, take two page backups,
|
||||
launch validate on PAGE1 and stop it in the middle,
|
||||
launch restore of PAGE2.
|
||||
Expect restore to fail because validation of
|
||||
intermediate backup is impossible
|
||||
Expect restore to sucseed because read-only locks
|
||||
do not conflict
|
||||
"""
|
||||
fname = self.id().split('.')[3]
|
||||
node = self.make_simple_node(
|
||||
@ -334,24 +338,13 @@ class LockingTest(ProbackupTest, unittest.TestCase):
|
||||
|
||||
node.cleanup()
|
||||
|
||||
try:
|
||||
self.restore_node(backup_dir, 'node', node)
|
||||
self.assertEqual(
|
||||
1, 0,
|
||||
"Expecting Error because restore without whole chain validation "
|
||||
"is prohibited unless --no-validate provided.\n "
|
||||
"Output: {0} \n CMD: {1}".format(
|
||||
repr(self.output), self.cmd))
|
||||
except ProbackupException as e:
|
||||
self.assertTrue(
|
||||
"ERROR: Cannot lock backup {0} directory\n".format(full_id) in e.message,
|
||||
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
|
||||
repr(e.message), self.cmd))
|
||||
self.restore_node(backup_dir, 'node', node)
|
||||
|
||||
# Clean after yourself
|
||||
gdb.kill()
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
def test_locking_restore_locked_without_validation(self):
|
||||
def test_concurrent_delete_and_restore(self):
|
||||
"""
|
||||
make node, take full backup, take page backup,
|
||||
launch validate on FULL and stop it in the middle,
|
||||
@ -376,10 +369,11 @@ class LockingTest(ProbackupTest, unittest.TestCase):
|
||||
# PAGE1
|
||||
restore_id = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||
|
||||
gdb = self.validate_pb(
|
||||
gdb = self.delete_pb(
|
||||
backup_dir, 'node', backup_id=backup_id, gdb=True)
|
||||
|
||||
gdb.set_breakpoint('pgBackupValidate')
|
||||
# gdb.set_breakpoint('pgFileDelete')
|
||||
gdb.set_breakpoint('delete_backup_files')
|
||||
gdb.run_until_break()
|
||||
|
||||
node.cleanup()
|
||||
@ -397,13 +391,14 @@ class LockingTest(ProbackupTest, unittest.TestCase):
|
||||
self.assertTrue(
|
||||
"Backup {0} is used without validation".format(
|
||||
restore_id) in e.message and
|
||||
'is using backup {0} and still is running'.format(
|
||||
'is using backup {0}, and is still running'.format(
|
||||
backup_id) in e.message and
|
||||
'ERROR: Cannot lock backup' in e.message,
|
||||
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
|
||||
repr(e.message), self.cmd))
|
||||
|
||||
# Clean after yourself
|
||||
gdb.kill()
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
def test_locking_concurrent_validate_and_backup(self):
|
||||
@ -439,6 +434,7 @@ class LockingTest(ProbackupTest, unittest.TestCase):
|
||||
self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||
|
||||
# Clean after yourself
|
||||
gdb.kill()
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
def test_locking_concurren_restore_and_delete(self):
|
||||
@ -467,7 +463,6 @@ class LockingTest(ProbackupTest, unittest.TestCase):
|
||||
gdb.set_breakpoint('create_data_directories')
|
||||
gdb.run_until_break()
|
||||
|
||||
# This PAGE backup is expected to be successfull
|
||||
try:
|
||||
self.delete_pb(backup_dir, 'node', full_id)
|
||||
self.assertEqual(
|
||||
@ -483,6 +478,7 @@ class LockingTest(ProbackupTest, unittest.TestCase):
|
||||
repr(e.message), self.cmd))
|
||||
|
||||
# Clean after yourself
|
||||
gdb.kill()
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
def test_backup_directory_name(self):
|
||||
@ -538,3 +534,7 @@ class LockingTest(ProbackupTest, unittest.TestCase):
|
||||
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
# TODO:
|
||||
# test that concurrent validation and restore are not locking each other
|
||||
# check that quick exclusive lock, when taking RO-lock, is really quick
|
||||
|
@ -393,7 +393,7 @@ class PageTest(ProbackupTest, unittest.TestCase):
|
||||
pgbench.wait()
|
||||
|
||||
# GET LOGICAL CONTENT FROM NODE
|
||||
result = node.safe_psql("postgres", "select * from pgbench_accounts")
|
||||
result = node.safe_psql("postgres", "select count(*) from pgbench_accounts")
|
||||
# PAGE BACKUP
|
||||
self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||
|
||||
@ -422,7 +422,7 @@ class PageTest(ProbackupTest, unittest.TestCase):
|
||||
restored_node.slow_start()
|
||||
|
||||
result_new = restored_node.safe_psql(
|
||||
"postgres", "select * from pgbench_accounts")
|
||||
"postgres", "select count(*) from pgbench_accounts")
|
||||
|
||||
# COMPARE RESTORED FILES
|
||||
self.assertEqual(result, result_new, 'data is lost')
|
||||
|
@ -3093,6 +3093,7 @@ class RestoreTest(ProbackupTest, unittest.TestCase):
|
||||
"GRANT CONNECT ON DATABASE backupdb to backup; "
|
||||
"GRANT USAGE ON SCHEMA pg_catalog TO backup; "
|
||||
"GRANT SELECT ON TABLE pg_catalog.pg_proc TO backup; "
|
||||
"GRANT SELECT ON TABLE pg_catalog.pg_extension TO backup; "
|
||||
"GRANT SELECT ON TABLE pg_catalog.pg_database TO backup; " # for partial restore, checkdb and ptrack
|
||||
"GRANT EXECUTE ON FUNCTION pg_catalog.nameeq(name, name) TO backup; "
|
||||
"GRANT EXECUTE ON FUNCTION pg_catalog.textout(text) TO backup; "
|
||||
|
@ -41,7 +41,7 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
output_before = self.show_archive(backup_dir, 'node', tli=1)
|
||||
|
||||
# Purge backups
|
||||
log = self.delete_expired(
|
||||
self.delete_expired(
|
||||
backup_dir, 'node', options=['--expired', '--wal'])
|
||||
self.assertEqual(len(self.show_pb(backup_dir, 'node')), 2)
|
||||
|
||||
@ -142,13 +142,13 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
node.slow_start()
|
||||
|
||||
# take FULL BACKUP
|
||||
backup_id_1 = self.backup_node(backup_dir, 'node', node)
|
||||
self.backup_node(backup_dir, 'node', node)
|
||||
|
||||
# Take second FULL BACKUP
|
||||
backup_id_2 = self.backup_node(backup_dir, 'node', node)
|
||||
self.backup_node(backup_dir, 'node', node)
|
||||
|
||||
# Take third FULL BACKUP
|
||||
backup_id_3 = self.backup_node(backup_dir, 'node', node)
|
||||
self.backup_node(backup_dir, 'node', node)
|
||||
|
||||
backups = os.path.join(backup_dir, 'backups', 'node')
|
||||
for backup in os.listdir(backups):
|
||||
@ -189,7 +189,7 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
node.slow_start()
|
||||
|
||||
# take FULL BACKUPs
|
||||
backup_id_1 = self.backup_node(backup_dir, 'node', node)
|
||||
self.backup_node(backup_dir, 'node', node)
|
||||
|
||||
backup_id_2 = self.backup_node(backup_dir, 'node', node)
|
||||
|
||||
@ -444,8 +444,7 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
# PAGEa1 OK
|
||||
# FULLb OK
|
||||
# FULLa ERROR
|
||||
page_id_b2 = self.backup_node(
|
||||
backup_dir, 'node', node, backup_type='page')
|
||||
self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||
|
||||
# Change PAGEa2 and FULLa status to OK
|
||||
self.change_backup_status(backup_dir, 'node', page_id_a2, 'OK')
|
||||
@ -632,7 +631,7 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
node.pgbench_init(scale=5)
|
||||
|
||||
# Take FULL BACKUPs
|
||||
backup_id_a = self.backup_node(backup_dir, 'node', node)
|
||||
self.backup_node(backup_dir, 'node', node)
|
||||
pgbench = node.pgbench(options=['-t', '20', '-c', '1'])
|
||||
pgbench.wait()
|
||||
|
||||
@ -663,13 +662,13 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
# PAGEa1 ERROR
|
||||
# FULLb OK
|
||||
# FULLa OK
|
||||
page_id_b1 = self.backup_node(
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node, backup_type='page')
|
||||
|
||||
pgbench = node.pgbench(options=['-t', '20', '-c', '1'])
|
||||
pgbench.wait()
|
||||
|
||||
page_id_b2 = self.backup_node(
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node, backup_type='page')
|
||||
|
||||
pgbench = node.pgbench(options=['-t', '20', '-c', '1'])
|
||||
@ -711,7 +710,7 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format(
|
||||
datetime.now() - timedelta(days=3)))
|
||||
|
||||
output = self.delete_expired(
|
||||
self.delete_expired(
|
||||
backup_dir, 'node',
|
||||
options=['--retention-window=1', '--expired', '--merge-expired'])
|
||||
|
||||
@ -1305,26 +1304,26 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
node.pgbench_init(scale=3)
|
||||
|
||||
# Chain A
|
||||
backup_id_a = self.backup_node(backup_dir, 'node', node)
|
||||
page_id_a1 = self.backup_node(
|
||||
self.backup_node(backup_dir, 'node', node)
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node, backup_type='page')
|
||||
|
||||
page_id_a2 = self.backup_node(
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node, backup_type='page')
|
||||
|
||||
# Chain B
|
||||
backup_id_b = self.backup_node(backup_dir, 'node', node)
|
||||
self.backup_node(backup_dir, 'node', node)
|
||||
|
||||
pgbench = node.pgbench(options=['-T', '10', '-c', '2'])
|
||||
pgbench.wait()
|
||||
|
||||
page_id_b1 = self.backup_node(
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node, backup_type='delta')
|
||||
|
||||
pgbench = node.pgbench(options=['-T', '10', '-c', '2'])
|
||||
pgbench.wait()
|
||||
|
||||
page_id_b2 = self.backup_node(
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node, backup_type='page')
|
||||
|
||||
pgbench = node.pgbench(options=['-T', '10', '-c', '2'])
|
||||
@ -1347,7 +1346,7 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
conf.write("recovery_time='{:%Y-%m-%d %H:%M:%S}'\n".format(
|
||||
datetime.now() - timedelta(days=3)))
|
||||
|
||||
output = self.delete_expired(
|
||||
self.delete_expired(
|
||||
backup_dir, 'node',
|
||||
options=[
|
||||
'--retention-window=1', '--expired',
|
||||
@ -1391,26 +1390,26 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
node.pgbench_init(scale=3)
|
||||
|
||||
# Chain A
|
||||
backup_id_a = self.backup_node(backup_dir, 'node', node)
|
||||
page_id_a1 = self.backup_node(
|
||||
self.backup_node(backup_dir, 'node', node)
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node, backup_type='page')
|
||||
|
||||
page_id_a2 = self.backup_node(
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node, backup_type='page')
|
||||
|
||||
# Chain B
|
||||
backup_id_b = self.backup_node(backup_dir, 'node', node)
|
||||
self.backup_node(backup_dir, 'node', node)
|
||||
|
||||
page_id_b1 = self.backup_node(
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node, backup_type='delta')
|
||||
|
||||
page_id_b2 = self.backup_node(
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node, backup_type='page')
|
||||
|
||||
page_id_b3 = self.backup_node(
|
||||
backup_dir, 'node', node, backup_type='delta')
|
||||
|
||||
pgdata = self.pgdata_content(node.data_dir)
|
||||
self.pgdata_content(node.data_dir)
|
||||
|
||||
# Purge backups
|
||||
backups = os.path.join(backup_dir, 'backups', 'node')
|
||||
@ -1483,15 +1482,15 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
node.slow_start()
|
||||
|
||||
# Take FULL BACKUPs
|
||||
backup_id_a1 = self.backup_node(backup_dir, 'node', node)
|
||||
gdb = self.backup_node(
|
||||
backup_dir, 'node', node, backup_type='page', gdb=True)
|
||||
self.backup_node(backup_dir, 'node', node)
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node, backup_type='page')
|
||||
|
||||
page_id_a3 = self.backup_node(
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node, backup_type='page')
|
||||
|
||||
# Change FULLb backup status to ERROR
|
||||
self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR')
|
||||
# self.change_backup_status(backup_dir, 'node', backup_id_b, 'ERROR')
|
||||
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
@ -1516,7 +1515,7 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
node.slow_start()
|
||||
|
||||
# Take FULL BACKUP
|
||||
full_id = self.backup_node(backup_dir, 'node', node)
|
||||
self.backup_node(backup_dir, 'node', node)
|
||||
|
||||
# Take PAGE BACKUP
|
||||
gdb = self.backup_node(
|
||||
@ -1528,15 +1527,15 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
gdb._execute('signal SIGINT')
|
||||
gdb.continue_execution_until_error()
|
||||
|
||||
page_id = self.show_pb(backup_dir, 'node')[1]['id']
|
||||
self.show_pb(backup_dir, 'node')[1]['id']
|
||||
|
||||
# Take DELTA backup
|
||||
delta_id = self.backup_node(
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node, backup_type='delta',
|
||||
options=['--retention-window=2', '--delete-expired'])
|
||||
|
||||
# Take FULL BACKUP
|
||||
full2_id = self.backup_node(backup_dir, 'node', node)
|
||||
self.backup_node(backup_dir, 'node', node)
|
||||
|
||||
self.assertEqual(len(self.show_pb(backup_dir, 'node')), 4)
|
||||
|
||||
@ -1563,7 +1562,7 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
node.slow_start()
|
||||
|
||||
# Take FULL BACKUP
|
||||
full_id = self.backup_node(backup_dir, 'node', node)
|
||||
self.backup_node(backup_dir, 'node', node)
|
||||
|
||||
# Take PAGE BACKUP
|
||||
gdb = self.backup_node(
|
||||
@ -1574,7 +1573,7 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
gdb._execute('signal SIGKILL')
|
||||
gdb.continue_execution_until_error()
|
||||
|
||||
page_id = self.show_pb(backup_dir, 'node')[1]['id']
|
||||
self.show_pb(backup_dir, 'node')[1]['id']
|
||||
|
||||
if self.get_version(node) < 90600:
|
||||
node.safe_psql(
|
||||
@ -1582,7 +1581,7 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
'SELECT pg_catalog.pg_stop_backup()')
|
||||
|
||||
# Take DELTA backup
|
||||
delta_id = self.backup_node(
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node, backup_type='delta',
|
||||
options=['--retention-window=2', '--delete-expired'])
|
||||
|
||||
@ -1630,7 +1629,7 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
self.backup_node(backup_dir, 'node', node, backup_type="page")
|
||||
|
||||
# Purge backups
|
||||
log = self.delete_expired(
|
||||
self.delete_expired(
|
||||
backup_dir, 'node', options=['--expired', '--wal'])
|
||||
self.assertEqual(len(self.show_pb(backup_dir, 'node')), 2)
|
||||
|
||||
@ -1639,7 +1638,7 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
def test_retention_redundancy_overlapping_chains(self):
|
||||
def test_retention_redundancy_overlapping_chains_1(self):
|
||||
""""""
|
||||
fname = self.id().split('.')[3]
|
||||
node = self.make_simple_node(
|
||||
@ -1678,7 +1677,7 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
self.backup_node(backup_dir, 'node', node, backup_type="page")
|
||||
|
||||
# Purge backups
|
||||
log = self.delete_expired(
|
||||
self.delete_expired(
|
||||
backup_dir, 'node', options=['--expired', '--wal'])
|
||||
self.assertEqual(len(self.show_pb(backup_dir, 'node')), 2)
|
||||
|
||||
@ -1869,7 +1868,7 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
|
||||
# FULL
|
||||
node.pgbench_init(scale=1)
|
||||
B1 = self.backup_node(backup_dir, 'node', node)
|
||||
self.backup_node(backup_dir, 'node', node)
|
||||
|
||||
# PAGE
|
||||
node.pgbench_init(scale=1)
|
||||
@ -1885,12 +1884,12 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
|
||||
node.pgbench_init(scale=1)
|
||||
|
||||
B3 = self.backup_node(
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node, backup_type='page')
|
||||
|
||||
node.pgbench_init(scale=1)
|
||||
|
||||
B4 = self.backup_node(
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node, backup_type='page')
|
||||
|
||||
# Timeline 2
|
||||
@ -1940,11 +1939,11 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
node_restored.slow_start()
|
||||
|
||||
node_restored.pgbench_init(scale=1)
|
||||
B5 = self.backup_node(
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node_restored, data_dir=node_restored.data_dir)
|
||||
|
||||
node.pgbench_init(scale=1)
|
||||
B6 = self.backup_node(backup_dir, 'node', node)
|
||||
self.backup_node(backup_dir, 'node', node)
|
||||
|
||||
lsn = self.show_archive(backup_dir, 'node', tli=2)['switchpoint']
|
||||
|
||||
@ -2007,9 +2006,9 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
node.pgbench_init(scale=5)
|
||||
|
||||
# B2 FULL on TLI1
|
||||
B2 = self.backup_node(backup_dir, 'node', node)
|
||||
self.backup_node(backup_dir, 'node', node)
|
||||
node.pgbench_init(scale=4)
|
||||
B3 = self.backup_node(backup_dir, 'node', node)
|
||||
self.backup_node(backup_dir, 'node', node)
|
||||
node.pgbench_init(scale=4)
|
||||
|
||||
self.delete_pb(backup_dir, 'node', options=['--delete-wal'])
|
||||
@ -2023,7 +2022,7 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
backup_dir, 'node', node_tli2,
|
||||
options=[
|
||||
'--recovery-target-xid={0}'.format(target_xid),
|
||||
'--recovery-target-timeline=1'.format(target_xid),
|
||||
'--recovery-target-timeline=1',
|
||||
'--recovery-target-action=promote'])
|
||||
|
||||
self.assertIn(
|
||||
@ -2039,11 +2038,11 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
"select txid_current()").decode('utf-8').rstrip()
|
||||
node_tli2.pgbench_init(scale=1)
|
||||
|
||||
B4 = self.backup_node(
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node_tli2, data_dir=node_tli2.data_dir)
|
||||
node_tli2.pgbench_init(scale=3)
|
||||
|
||||
B5 = self.backup_node(
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node_tli2, data_dir=node_tli2.data_dir)
|
||||
node_tli2.pgbench_init(scale=1)
|
||||
node_tli2.cleanup()
|
||||
@ -2086,7 +2085,7 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
|
||||
node_tli4.pgbench_init(scale=5)
|
||||
|
||||
B6 = self.backup_node(
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node_tli4, data_dir=node_tli4.data_dir)
|
||||
node_tli4.pgbench_init(scale=5)
|
||||
node_tli4.cleanup()
|
||||
@ -2232,7 +2231,7 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
# B2 FULL on TLI1
|
||||
B2 = self.backup_node(backup_dir, 'node', node)
|
||||
node.pgbench_init(scale=4)
|
||||
B3 = self.backup_node(backup_dir, 'node', node)
|
||||
self.backup_node(backup_dir, 'node', node)
|
||||
node.pgbench_init(scale=4)
|
||||
|
||||
# TLI 2
|
||||
@ -2244,7 +2243,7 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
backup_dir, 'node', node_tli2,
|
||||
options=[
|
||||
'--recovery-target-xid={0}'.format(target_xid),
|
||||
'--recovery-target-timeline=1'.format(target_xid),
|
||||
'--recovery-target-timeline=1',
|
||||
'--recovery-target-action=promote'])
|
||||
|
||||
self.assertIn(
|
||||
@ -2264,7 +2263,7 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
backup_dir, 'node', node_tli2, data_dir=node_tli2.data_dir)
|
||||
node_tli2.pgbench_init(scale=3)
|
||||
|
||||
B5 = self.backup_node(
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node_tli2, data_dir=node_tli2.data_dir)
|
||||
node_tli2.pgbench_init(scale=1)
|
||||
node_tli2.cleanup()
|
||||
@ -2307,7 +2306,7 @@ class RetentionTest(ProbackupTest, unittest.TestCase):
|
||||
|
||||
node_tli4.pgbench_init(scale=5)
|
||||
|
||||
B6 = self.backup_node(
|
||||
self.backup_node(
|
||||
backup_dir, 'node', node_tli4, data_dir=node_tli4.data_dir)
|
||||
node_tli4.pgbench_init(scale=5)
|
||||
node_tli4.cleanup()
|
||||
|
Loading…
x
Reference in New Issue
Block a user