1
0
mirror of https://github.com/postgrespro/pg_probackup.git synced 2025-02-08 14:28:36 +02:00

Merge branch 'master' into pgpro_2370

This commit is contained in:
Anastasia 2019-02-11 18:23:51 +03:00
commit 0220ae5146
8 changed files with 379 additions and 43 deletions

View File

@ -763,6 +763,19 @@ do_backup_instance(void)
else
elog(ERROR, "Data files transferring failed");
/* Remove disappeared during backup files from backup_list */
for (i = 0; i < parray_num(backup_files_list); i++)
{
pgFile *tmp_file = (pgFile *) parray_get(backup_files_list, i);
if (tmp_file->write_size == FILE_NOT_FOUND)
{
pg_atomic_clear_flag(&tmp_file->lock);
pgFileFree(tmp_file);
parray_remove(backup_files_list, i);
}
}
/* clean previous backup file list */
if (prev_backup_filelist)
{
@ -2241,7 +2254,7 @@ backup_files(void *arg)
* If file is not found, this is not en error.
* It could have been deleted by concurrent postgres transaction.
*/
file->write_size = BYTES_INVALID;
file->write_size = FILE_NOT_FOUND;
elog(LOG, "File \"%s\" is not found", file->path);
continue;
}
@ -2291,7 +2304,9 @@ backup_files(void *arg)
instance_config.compress_alg,
instance_config.compress_level))
{
file->write_size = BYTES_INVALID;
/* disappeared file not to be confused with 'not changed' */
if (file->write_size != FILE_NOT_FOUND)
file->write_size = BYTES_INVALID;
elog(VERBOSE, "File \"%s\" was not copied to backup", file->path);
continue;
}
@ -2315,7 +2330,9 @@ backup_files(void *arg)
if (skip ||
!copy_file(arguments->from_root, arguments->to_root, file))
{
file->write_size = BYTES_INVALID;
/* disappeared file not to be confused with 'not changed' */
if (file->write_size != FILE_NOT_FOUND)
file->write_size = BYTES_INVALID;
elog(VERBOSE, "File \"%s\" was not copied to backup",
file->path);
continue;

View File

@ -981,6 +981,9 @@ is_parent(time_t parent_backup_time, pgBackup *child_backup, bool inclusive)
if (!child_backup)
elog(ERROR, "Target backup cannot be NULL");
if (inclusive && child_backup->start_time == parent_backup_time)
return true;
while (child_backup->parent_backup_link &&
child_backup->parent_backup != parent_backup_time)
{
@ -990,8 +993,8 @@ is_parent(time_t parent_backup_time, pgBackup *child_backup, bool inclusive)
if (child_backup->parent_backup == parent_backup_time)
return true;
if (inclusive && child_backup->start_time == parent_backup_time)
return true;
//if (inclusive && child_backup->start_time == parent_backup_time)
// return true;
return false;
}

View File

@ -564,6 +564,7 @@ backup_data_file(backup_files_arg* arguments,
if (errno == ENOENT)
{
elog(LOG, "File \"%s\" is not found", file->path);
file->write_size = FILE_NOT_FOUND;
return false;
}
@ -946,7 +947,11 @@ copy_file(const char *from_root, const char *to_root, pgFile *file)
/* maybe deleted, it's not error */
if (errno == ENOENT)
{
elog(LOG, "File \"%s\" is not found", file->path);
file->write_size = FILE_NOT_FOUND;
return false;
}
elog(ERROR, "cannot open source file \"%s\": %s", file->path,
strerror(errno));

View File

@ -1055,11 +1055,11 @@ create_data_directories(const char *data_dir, const char *backup_dir,
}
if (link_sep)
elog(LOG, "create directory \"%s\" and symbolic link \"%.*s\"",
elog(VERBOSE, "create directory \"%s\" and symbolic link \"%.*s\"",
linked_path,
(int) (link_sep - relative_ptr), relative_ptr);
else
elog(LOG, "create directory \"%s\" and symbolic link \"%s\"",
elog(VERBOSE, "create directory \"%s\" and symbolic link \"%s\"",
linked_path, relative_ptr);
/* Firstly, create linked directory */
@ -1090,7 +1090,7 @@ create_data_directories(const char *data_dir, const char *backup_dir,
}
create_directory:
elog(LOG, "create directory \"%s\"", relative_ptr);
elog(VERBOSE, "create directory \"%s\"", relative_ptr);
/* This is not symlink, create directory */
join_path_components(to_path, data_dir, relative_ptr);

View File

@ -104,6 +104,9 @@ typedef struct XLogPageReadPrivate
#ifdef HAVE_LIBZ
gzFile gz_xlogfile;
char gz_xlogpath[MAXPGPATH];
char gz_buf[XLOG_BLCKSZ];
uint32 gz_prev_off;
#endif
} XLogPageReadPrivate;
@ -1057,22 +1060,30 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
#ifdef HAVE_LIBZ
else
{
if (gzseek(private_data->gz_xlogfile, (z_off_t) targetPageOff, SEEK_SET) == -1)
if (private_data->gz_prev_off != 0 &&
private_data->gz_prev_off == targetPageOff)
memcpy(readBuf, private_data->gz_buf, XLOG_BLCKSZ);
else
{
elog(WARNING, "Thread [%d]: Could not seek in compressed WAL segment \"%s\": %s",
private_data->thread_num,
private_data->gz_xlogpath,
get_gz_error(private_data->gz_xlogfile));
return -1;
}
if (gzseek(private_data->gz_xlogfile, (z_off_t) targetPageOff, SEEK_SET) == -1)
{
elog(WARNING, "Thread [%d]: Could not seek in compressed WAL segment \"%s\": %s",
private_data->thread_num,
private_data->gz_xlogpath,
get_gz_error(private_data->gz_xlogfile));
return -1;
}
if (gzread(private_data->gz_xlogfile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
{
elog(WARNING, "Thread [%d]: Could not read from compressed WAL segment \"%s\": %s",
private_data->thread_num,
private_data->gz_xlogpath,
get_gz_error(private_data->gz_xlogfile));
return -1;
if (gzread(private_data->gz_xlogfile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
{
elog(WARNING, "Thread [%d]: Could not read from compressed WAL segment \"%s\": %s",
private_data->thread_num,
private_data->gz_xlogpath,
get_gz_error(private_data->gz_xlogfile));
return -1;
}
private_data->gz_prev_off = targetPageOff;
memcpy(private_data->gz_buf, readBuf, XLOG_BLCKSZ);
}
}
#endif
@ -1131,6 +1142,7 @@ CleanupXLogPageRead(XLogReaderState *xlogreader)
{
gzclose(private_data->gz_xlogfile);
private_data->gz_xlogfile = NULL;
private_data->gz_prev_off = 0;
}
#endif
private_data->xlogexists = false;

View File

@ -165,7 +165,8 @@ typedef enum ShowFormat
/* special values of pgBackup fields */
#define INVALID_BACKUP_ID 0 /* backup ID is not provided by user */
#define BYTES_INVALID (-1)
#define BYTES_INVALID (-1) /* file didn`t changed since previous backup, DELTA backup do not rely on it */
#define FILE_NOT_FOUND (-2) /* file disappeared during backup */
#define BLOCKNUM_INVALID (-1)
/*

View File

@ -57,6 +57,7 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
int base_full_backup_index = 0;
int corrupted_backup_index = 0;
char *action = is_restore ? "Restore":"Validate";
parray *parent_chain = NULL;
if (is_restore)
{
@ -285,6 +286,27 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
if (is_restore)
check_tablespace_mapping(dest_backup);
/* At this point we are sure that parent chain is whole
* so we can build separate array, containing all needed backups,
* to simplify validation and restore
*/
parent_chain = parray_new();
/* Take every backup that is a child of base_backup AND parent of dest_backup
* including base_backup and dest_backup
*/
for (i = base_full_backup_index; i >= dest_backup_index; i--)
{
tmp_backup = (pgBackup *) parray_get(backups, i);
if (is_parent(base_full_backup->start_time, tmp_backup, true) &&
is_parent(tmp_backup->start_time, dest_backup, true))
{
parray_append(parent_chain, tmp_backup);
}
}
/* for validation or restore with enabled validation */
if (!is_restore || !rt->restore_no_validate)
{
if (dest_backup->backup_mode != BACKUP_MODE_FULL)
@ -292,27 +314,25 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
/*
* Validate backups from base_full_backup to dest_backup.
* At this point we are sure that parent chain is intact.
*/
for (i = base_full_backup_index; i >= dest_backup_index; i--)
for (i = 0; i < parray_num(parent_chain); i++)
{
tmp_backup = (pgBackup *) parray_get(backups, i);
tmp_backup = (pgBackup *) parray_get(parent_chain, i);
if (is_parent(base_full_backup->start_time, tmp_backup, true))
pgBackupValidate(tmp_backup);
/* Maybe we should be more paranoid and check for !BACKUP_STATUS_OK? */
if (tmp_backup->status == BACKUP_STATUS_CORRUPT)
{
pgBackupValidate(tmp_backup);
/* Maybe we should be more paranoid and check for !BACKUP_STATUS_OK? */
if (tmp_backup->status == BACKUP_STATUS_CORRUPT)
{
corrupted_backup = tmp_backup;
corrupted_backup_index = i;
break;
}
/* We do not validate WAL files of intermediate backups
* It`s done to speed up restore
corrupted_backup = tmp_backup;
/* we need corrupted backup index from 'backups' not parent_chain
* so we can properly orphanize all its descendants
*/
corrupted_backup_index = get_backup_index_number(backups, corrupted_backup);
break;
}
/* We do not validate WAL files of intermediate backups
* It`s done to speed up restore
*/
}
/* There is no point in wal validation of corrupted backups */
@ -355,7 +375,6 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
}
}
// TODO: rewrite restore to use parent_chain
/*
* If dest backup is corrupted or was orphaned in previous check
* produce corresponding error message
@ -376,13 +395,12 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
base36enc(dest_backup->start_time), status2str(dest_backup->status));
/* We ensured that all backups are valid, now restore if required
* TODO: use parent_link
*/
if (is_restore)
{
for (i = base_full_backup_index; i >= dest_backup_index; i--)
for (i = 0; i < parray_num(parent_chain); i++)
{
pgBackup *backup = (pgBackup *) parray_get(backups, i);
pgBackup *backup = (pgBackup *) parray_get(parent_chain, i);
if (rt->lsn_specified && parse_server_version(backup->server_version) < 100000)
elog(ERROR, "Backup %s was created for version %s which doesn't support recovery_target_lsn",
@ -405,6 +423,7 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
/* cleanup */
parray_walk(backups, pgBackupFree);
parray_free(backups);
parray_free(parent_chain);
elog(INFO, "%s of backup %s completed.",
action, base36enc(dest_backup->start_time));
@ -480,6 +499,7 @@ restore_backup(pgBackup *backup)
/* By default there are some error */
threads_args[i].ret = 1;
/* Useless message TODO: rewrite */
elog(LOG, "Start thread for num:%zu", parray_num(files));
pthread_create(&threads[i], NULL, restore_files, arg);

View File

@ -4,7 +4,7 @@ from .helpers.ptrack_helpers import ProbackupTest, ProbackupException
import subprocess
from datetime import datetime
import sys
import time
from time import sleep
module_name = 'restore'
@ -1441,3 +1441,281 @@ class RestoreTest(ProbackupTest, unittest.TestCase):
True,
'Failed to start pg_wal_dump: {0}'.format(
pg_receivexlog.communicate()[1]))
# @unittest.skip("skip")
def test_restore_chain(self):
"""
make node, take full backup, take several
ERROR delta backups, take valid delta backup,
restore must be successfull
"""
fname = self.id().split('.')[3]
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
'wal_level': 'replica',
'max_wal_senders': '2'})
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
# Take FULL
self.backup_node(
backup_dir, 'node', node)
# Take DELTA
self.backup_node(
backup_dir, 'node', node, backup_type='delta')
# Take ERROR DELTA
try:
self.backup_node(
backup_dir, 'node', node,
backup_type='delta', options=['--archive-timeout=0s'])
except ProbackupException as e:
pass
# Take ERROR DELTA
try:
self.backup_node(
backup_dir, 'node', node,
backup_type='delta', options=['--archive-timeout=0s'])
except ProbackupException as e:
pass
# Take DELTA
self.backup_node(
backup_dir, 'node', node, backup_type='delta')
# Take ERROR DELTA
try:
self.backup_node(
backup_dir, 'node', node,
backup_type='delta', options=['--archive-timeout=0s'])
except ProbackupException as e:
pass
self.assertEqual(
'OK',
self.show_pb(backup_dir, 'node')[0]['status'],
'Backup STATUS should be "OK"')
self.assertEqual(
'OK',
self.show_pb(backup_dir, 'node')[1]['status'],
'Backup STATUS should be "OK"')
self.assertEqual(
'ERROR',
self.show_pb(backup_dir, 'node')[2]['status'],
'Backup STATUS should be "ERROR"')
self.assertEqual(
'ERROR',
self.show_pb(backup_dir, 'node')[3]['status'],
'Backup STATUS should be "ERROR"')
self.assertEqual(
'OK',
self.show_pb(backup_dir, 'node')[4]['status'],
'Backup STATUS should be "OK"')
self.assertEqual(
'ERROR',
self.show_pb(backup_dir, 'node')[5]['status'],
'Backup STATUS should be "ERROR"')
node.cleanup()
self.restore_node(backup_dir, 'node', node)
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_restore_chain_with_corrupted_backup(self):
"""more complex test_restore_chain()"""
fname = self.id().split('.')[3]
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
'wal_level': 'replica',
'max_wal_senders': '2'})
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
# Take FULL
self.backup_node(
backup_dir, 'node', node)
# Take DELTA
self.backup_node(
backup_dir, 'node', node, backup_type='page')
# Take ERROR DELTA
try:
self.backup_node(
backup_dir, 'node', node,
backup_type='page', options=['--archive-timeout=0s'])
except ProbackupException as e:
pass
# Take 1 DELTA
self.backup_node(
backup_dir, 'node', node, backup_type='delta')
# Take ERROR DELTA
try:
self.backup_node(
backup_dir, 'node', node,
backup_type='delta', options=['--archive-timeout=0s'])
except ProbackupException as e:
pass
# Take 2 DELTA
self.backup_node(
backup_dir, 'node', node, backup_type='delta')
# Take ERROR DELTA
try:
self.backup_node(
backup_dir, 'node', node,
backup_type='delta', options=['--archive-timeout=0s'])
except ProbackupException as e:
pass
# Take 3 DELTA
self.backup_node(
backup_dir, 'node', node, backup_type='delta')
# Corrupted 4 DELTA
corrupt_id = self.backup_node(
backup_dir, 'node', node, backup_type='delta')
# ORPHAN 5 DELTA
restore_target_id = self.backup_node(
backup_dir, 'node', node, backup_type='delta')
# ORPHAN 6 DELTA
self.backup_node(
backup_dir, 'node', node, backup_type='delta')
# NEXT FULL BACKUP
self.backup_node(
backup_dir, 'node', node, backup_type='full')
# Next Delta
self.backup_node(
backup_dir, 'node', node, backup_type='delta')
# do corrupt 6 DELTA backup
file = os.path.join(
backup_dir, 'backups', 'node',
corrupt_id, 'database', 'global', 'pg_control')
file_new = os.path.join(backup_dir, 'pg_control')
os.rename(file, file_new)
# RESTORE BACKUP
node.cleanup()
try:
self.restore_node(
backup_dir, 'node', node, backup_id=restore_target_id)
self.assertEqual(
1, 0,
"Expecting Error because restore backup is corrupted.\n "
"Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
'ERROR: Backup {0} is orphan'.format(restore_target_id),
e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
self.assertEqual(
'OK',
self.show_pb(backup_dir, 'node')[0]['status'],
'Backup STATUS should be "OK"')
self.assertEqual(
'OK',
self.show_pb(backup_dir, 'node')[1]['status'],
'Backup STATUS should be "OK"')
self.assertEqual(
'ERROR',
self.show_pb(backup_dir, 'node')[2]['status'],
'Backup STATUS should be "ERROR"')
self.assertEqual(
'OK',
self.show_pb(backup_dir, 'node')[3]['status'],
'Backup STATUS should be "OK"')
self.assertEqual(
'ERROR',
self.show_pb(backup_dir, 'node')[4]['status'],
'Backup STATUS should be "ERROR"')
self.assertEqual(
'OK',
self.show_pb(backup_dir, 'node')[5]['status'],
'Backup STATUS should be "OK"')
self.assertEqual(
'ERROR',
self.show_pb(backup_dir, 'node')[6]['status'],
'Backup STATUS should be "ERROR"')
self.assertEqual(
'OK',
self.show_pb(backup_dir, 'node')[7]['status'],
'Backup STATUS should be "OK"')
# corruption victim
self.assertEqual(
'CORRUPT',
self.show_pb(backup_dir, 'node')[8]['status'],
'Backup STATUS should be "CORRUPT"')
# orphaned child
self.assertEqual(
'ORPHAN',
self.show_pb(backup_dir, 'node')[9]['status'],
'Backup STATUS should be "ORPHAN"')
# orphaned child
self.assertEqual(
'ORPHAN',
self.show_pb(backup_dir, 'node')[10]['status'],
'Backup STATUS should be "ORPHAN"')
# next FULL
self.assertEqual(
'OK',
self.show_pb(backup_dir, 'node')[11]['status'],
'Backup STATUS should be "OK"')
# next DELTA
self.assertEqual(
'OK',
self.show_pb(backup_dir, 'node')[12]['status'],
'Backup STATUS should be "OK"')
node.cleanup()
# Clean after yourself
self.del_test_dir(module_name, fname)