mirror of
https://github.com/postgrespro/pg_probackup.git
synced 2025-01-20 11:34:51 +02:00
PGPRO-1248: new status ORPHAN for backup with corrupted parent
This commit is contained in:
parent
269812aafc
commit
b6f1dd8fbe
@ -556,6 +556,8 @@ readBackupControlFile(const char *path)
|
|||||||
backup->status = BACKUP_STATUS_DELETED;
|
backup->status = BACKUP_STATUS_DELETED;
|
||||||
else if (strcmp(status, "DONE") == 0)
|
else if (strcmp(status, "DONE") == 0)
|
||||||
backup->status = BACKUP_STATUS_DONE;
|
backup->status = BACKUP_STATUS_DONE;
|
||||||
|
else if (strcmp(status, "ORPHAN") == 0)
|
||||||
|
backup->status = BACKUP_STATUS_ORPHAN;
|
||||||
else if (strcmp(status, "CORRUPT") == 0)
|
else if (strcmp(status, "CORRUPT") == 0)
|
||||||
backup->status = BACKUP_STATUS_CORRUPT;
|
backup->status = BACKUP_STATUS_CORRUPT;
|
||||||
else
|
else
|
||||||
|
@ -258,7 +258,9 @@ validate_backup_wal_from_start_to_stop(pgBackup *backup,
|
|||||||
*/
|
*/
|
||||||
backup->status = BACKUP_STATUS_CORRUPT;
|
backup->status = BACKUP_STATUS_CORRUPT;
|
||||||
pgBackupWriteBackupControlFile(backup);
|
pgBackupWriteBackupControlFile(backup);
|
||||||
elog(ERROR, "there are not enough WAL records to restore from %X/%X to %X/%X",
|
elog(WARNING, "There are not enough WAL records to consistenly restore "
|
||||||
|
"backup %s from START LSN: %X/%X to STOP LSN: %X/%X",
|
||||||
|
base36enc(backup->start_time),
|
||||||
(uint32) (backup->start_lsn >> 32),
|
(uint32) (backup->start_lsn >> 32),
|
||||||
(uint32) (backup->start_lsn),
|
(uint32) (backup->start_lsn),
|
||||||
(uint32) (backup->stop_lsn >> 32),
|
(uint32) (backup->stop_lsn >> 32),
|
||||||
@ -329,14 +331,19 @@ validate_wal(pgBackup *backup,
|
|||||||
|
|
||||||
free(backup_id);
|
free(backup_id);
|
||||||
|
|
||||||
|
if (backup->status == BACKUP_STATUS_CORRUPT)
|
||||||
|
{
|
||||||
|
elog(WARNING, "Backup %s WAL segments are corrupted", base36enc(backup->start_time));
|
||||||
|
return;
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* If recovery target is provided check that we can restore backup to a
|
* If recovery target is provided check that we can restore backup to a
|
||||||
* recoverty target time or xid.
|
* recovery target time or xid.
|
||||||
*/
|
*/
|
||||||
if (!TransactionIdIsValid(target_xid) && target_time == 0)
|
if (!TransactionIdIsValid(target_xid) && target_time == 0)
|
||||||
{
|
{
|
||||||
/* Recoverty target is not given so exit */
|
/* Recovery target is not given so exit */
|
||||||
elog(INFO, "backup validation completed successfully");
|
elog(INFO, "Backup %s WAL segments are valid", base36enc(backup->start_time));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -118,6 +118,7 @@ typedef enum BackupStatus
|
|||||||
BACKUP_STATUS_DELETING, /* data files are being deleted */
|
BACKUP_STATUS_DELETING, /* data files are being deleted */
|
||||||
BACKUP_STATUS_DELETED, /* data files have been deleted */
|
BACKUP_STATUS_DELETED, /* data files have been deleted */
|
||||||
BACKUP_STATUS_DONE, /* completed but not validated yet */
|
BACKUP_STATUS_DONE, /* completed but not validated yet */
|
||||||
|
BACKUP_STATUS_ORPHAN, /* backup validity is unknown but at least one parent backup is corrupted */
|
||||||
BACKUP_STATUS_CORRUPT /* files are corrupted, not available */
|
BACKUP_STATUS_CORRUPT /* files are corrupted, not available */
|
||||||
} BackupStatus;
|
} BackupStatus;
|
||||||
|
|
||||||
|
@ -90,8 +90,10 @@ do_restore_or_validate(time_t target_backup_id,
|
|||||||
pgBackup *current_backup = NULL;
|
pgBackup *current_backup = NULL;
|
||||||
pgBackup *dest_backup = NULL;
|
pgBackup *dest_backup = NULL;
|
||||||
pgBackup *base_full_backup = NULL;
|
pgBackup *base_full_backup = NULL;
|
||||||
|
pgBackup *corrupted_backup = NULL;
|
||||||
int dest_backup_index = 0;
|
int dest_backup_index = 0;
|
||||||
int base_full_backup_index = 0;
|
int base_full_backup_index = 0;
|
||||||
|
int corrupted_backup_index = 0;
|
||||||
char *action = is_restore ? "Restore":"Validate";
|
char *action = is_restore ? "Restore":"Validate";
|
||||||
|
|
||||||
if (is_restore)
|
if (is_restore)
|
||||||
@ -205,7 +207,7 @@ do_restore_or_validate(time_t target_backup_id,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
/* Skip differential backups are ok */
|
/* It`s ok to skip incremental backup */
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -220,6 +222,9 @@ do_restore_or_validate(time_t target_backup_id,
|
|||||||
if (is_restore)
|
if (is_restore)
|
||||||
check_tablespace_mapping(dest_backup);
|
check_tablespace_mapping(dest_backup);
|
||||||
|
|
||||||
|
if (dest_backup->backup_mode != BACKUP_MODE_FULL)
|
||||||
|
elog(INFO, "Validating parents for backup %s", base36enc(dest_backup->start_time));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Validate backups from base_full_backup to dest_backup.
|
* Validate backups from base_full_backup to dest_backup.
|
||||||
*/
|
*/
|
||||||
@ -227,8 +232,17 @@ do_restore_or_validate(time_t target_backup_id,
|
|||||||
{
|
{
|
||||||
pgBackup *backup = (pgBackup *) parray_get(backups, i);
|
pgBackup *backup = (pgBackup *) parray_get(backups, i);
|
||||||
pgBackupValidate(backup);
|
pgBackupValidate(backup);
|
||||||
|
if (backup->status == BACKUP_STATUS_CORRUPT)
|
||||||
|
{
|
||||||
|
corrupted_backup = backup;
|
||||||
|
corrupted_backup_index = i;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
/* There is no point in wal validation
|
||||||
|
* if there is corrupted backup between base_backup and dest_backup
|
||||||
|
*/
|
||||||
|
if (!corrupted_backup)
|
||||||
/*
|
/*
|
||||||
* Validate corresponding WAL files.
|
* Validate corresponding WAL files.
|
||||||
* We pass base_full_backup timeline as last argument to this function,
|
* We pass base_full_backup timeline as last argument to this function,
|
||||||
@ -237,19 +251,48 @@ do_restore_or_validate(time_t target_backup_id,
|
|||||||
validate_wal(dest_backup, arclog_path, rt->recovery_target_time,
|
validate_wal(dest_backup, arclog_path, rt->recovery_target_time,
|
||||||
rt->recovery_target_xid, base_full_backup->tli);
|
rt->recovery_target_xid, base_full_backup->tli);
|
||||||
|
|
||||||
|
/* Set every incremental backup between corrupted backup and nearest FULL backup as orphans */
|
||||||
|
if (corrupted_backup)
|
||||||
|
{
|
||||||
|
for (i = corrupted_backup_index - 1; i >= 0; i--)
|
||||||
|
{
|
||||||
|
pgBackup *backup = (pgBackup *) parray_get(backups, i);
|
||||||
|
/* Mark incremental OK backup as orphan */
|
||||||
|
if (backup->backup_mode == BACKUP_MODE_FULL)
|
||||||
|
break;
|
||||||
|
if (backup->status != BACKUP_STATUS_OK)
|
||||||
|
continue;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
backup->status = BACKUP_STATUS_ORPHAN;
|
||||||
|
pgBackupWriteBackupControlFile(backup);
|
||||||
|
elog(WARNING, "Backup %s is orphaned because his parent %s is corrupted",
|
||||||
|
base36enc(backup->start_time), base36enc(corrupted_backup->start_time));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If dest backup is corrupted or was orphaned in previous check
|
||||||
|
* produce corresponding error message
|
||||||
|
*/
|
||||||
|
if (dest_backup->status == BACKUP_STATUS_OK)
|
||||||
|
elog(INFO, "Backup %s is valid.", base36enc(dest_backup->start_time));
|
||||||
|
else if (dest_backup->status == BACKUP_STATUS_CORRUPT)
|
||||||
|
elog(ERROR, "Backup %s is corrupt.", base36enc(dest_backup->start_time));
|
||||||
|
else if (dest_backup->status == BACKUP_STATUS_ORPHAN)
|
||||||
|
elog(ERROR, "Backup %s is orphan.", base36enc(dest_backup->start_time));
|
||||||
|
else
|
||||||
|
elog(ERROR, "Backup %s has status: %s",
|
||||||
|
base36enc(dest_backup->start_time), status2str(dest_backup->status));
|
||||||
|
|
||||||
/* We ensured that all backups are valid, now restore if required */
|
/* We ensured that all backups are valid, now restore if required */
|
||||||
if (is_restore)
|
if (is_restore)
|
||||||
{
|
{
|
||||||
pgBackup *backup;
|
|
||||||
for (i = base_full_backup_index; i >= dest_backup_index; i--)
|
for (i = base_full_backup_index; i >= dest_backup_index; i--)
|
||||||
{
|
{
|
||||||
backup = (pgBackup *) parray_get(backups, i);
|
pgBackup *backup = (pgBackup *) parray_get(backups, i);
|
||||||
if (backup->status == BACKUP_STATUS_OK)
|
|
||||||
restore_backup(backup);
|
restore_backup(backup);
|
||||||
else
|
|
||||||
elog(ERROR, "backup %s is not valid",
|
|
||||||
base36enc(backup->start_time));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -230,9 +230,9 @@ status2str(BackupStatus status)
|
|||||||
"DELETING",
|
"DELETING",
|
||||||
"DELETED",
|
"DELETED",
|
||||||
"DONE",
|
"DONE",
|
||||||
|
"ORPHAN",
|
||||||
"CORRUPT"
|
"CORRUPT"
|
||||||
};
|
};
|
||||||
|
|
||||||
if (status < BACKUP_STATUS_INVALID || BACKUP_STATUS_CORRUPT < status)
|
if (status < BACKUP_STATUS_INVALID || BACKUP_STATUS_CORRUPT < status)
|
||||||
return "UNKNOWN";
|
return "UNKNOWN";
|
||||||
|
|
||||||
|
@ -97,9 +97,9 @@ pgBackupValidate(pgBackup *backup)
|
|||||||
pgBackupWriteBackupControlFile(backup);
|
pgBackupWriteBackupControlFile(backup);
|
||||||
|
|
||||||
if (corrupted)
|
if (corrupted)
|
||||||
elog(WARNING, "Backup %s is corrupted", backup_id_string);
|
elog(WARNING, "Backup %s data files are corrupted", backup_id_string);
|
||||||
else
|
else
|
||||||
elog(INFO, "Backup %s is valid", backup_id_string);
|
elog(INFO, "Backup %s data files are valid", backup_id_string);
|
||||||
free(backup_id_string);
|
free(backup_id_string);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -269,8 +269,6 @@ do_validate_instance(void)
|
|||||||
current_backup = (pgBackup *) parray_get(backups, i);
|
current_backup = (pgBackup *) parray_get(backups, i);
|
||||||
backup_id = base36enc(current_backup->start_time);
|
backup_id = base36enc(current_backup->start_time);
|
||||||
|
|
||||||
elog(INFO, "Validate backup %s", backup_id);
|
|
||||||
|
|
||||||
if (current_backup->backup_mode != BACKUP_MODE_FULL)
|
if (current_backup->backup_mode != BACKUP_MODE_FULL)
|
||||||
{
|
{
|
||||||
int j;
|
int j;
|
||||||
@ -301,9 +299,27 @@ do_validate_instance(void)
|
|||||||
validate_wal(current_backup, arclog_path, 0,
|
validate_wal(current_backup, arclog_path, 0,
|
||||||
0, base_full_backup->tli);
|
0, base_full_backup->tli);
|
||||||
}
|
}
|
||||||
|
/* Set every incremental backup between corrupted backup and nearest FULL backup as orphans */
|
||||||
if (current_backup->status != BACKUP_STATUS_OK)
|
if (current_backup->status != BACKUP_STATUS_OK)
|
||||||
|
{
|
||||||
|
int j;
|
||||||
corrupted_backup_found = true;
|
corrupted_backup_found = true;
|
||||||
|
for (j = i - 1; j >= 0; j--)
|
||||||
|
{
|
||||||
|
pgBackup *backup = (pgBackup *) parray_get(backups, j);
|
||||||
|
if (backup->backup_mode == BACKUP_MODE_FULL)
|
||||||
|
break;
|
||||||
|
if (backup->status != BACKUP_STATUS_OK)
|
||||||
|
continue;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
backup->status = BACKUP_STATUS_ORPHAN;
|
||||||
|
pgBackupWriteBackupControlFile(backup);
|
||||||
|
elog(WARNING, "Backup %s is orphaned because his parent %s is corrupted",
|
||||||
|
base36enc(backup->start_time), base36enc(current_backup->start_time));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
free(backup_id);
|
free(backup_id);
|
||||||
}
|
}
|
||||||
|
@ -94,7 +94,19 @@ class ValidateTest(ProbackupTest, unittest.TestCase):
|
|||||||
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
|
||||||
# Validate with backup ID
|
# Validate with backup ID
|
||||||
self.assertIn("INFO: backup validation completed successfully",
|
self.assertIn("INFO: Validating backup {0}".format(backup_id),
|
||||||
|
self.validate_pb(backup_dir, 'node', backup_id),
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(self.output), self.cmd))
|
||||||
|
self.assertIn("INFO: Backup {0} data files are valid".format(backup_id),
|
||||||
|
self.validate_pb(backup_dir, 'node', backup_id),
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(self.output), self.cmd))
|
||||||
|
self.assertIn("INFO: Backup {0} WAL segments are valid".format(backup_id),
|
||||||
|
self.validate_pb(backup_dir, 'node', backup_id),
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(self.output), self.cmd))
|
||||||
|
self.assertIn("INFO: Backup {0} is valid".format(backup_id),
|
||||||
|
self.validate_pb(backup_dir, 'node', backup_id),
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(self.output), self.cmd))
|
||||||
|
self.assertIn("INFO: Validate of backup {0} completed".format(backup_id),
|
||||||
self.validate_pb(backup_dir, 'node', backup_id),
|
self.validate_pb(backup_dir, 'node', backup_id),
|
||||||
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(self.output), self.cmd))
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(self.output), self.cmd))
|
||||||
|
|
||||||
@ -102,8 +114,9 @@ class ValidateTest(ProbackupTest, unittest.TestCase):
|
|||||||
self.del_test_dir(module_name, fname)
|
self.del_test_dir(module_name, fname)
|
||||||
|
|
||||||
# @unittest.skip("skip")
|
# @unittest.skip("skip")
|
||||||
def test_validate_corrupt_wal_1(self):
|
def test_validate_corrupted_intermediate_backup(self):
|
||||||
"""make archive node, make archive backup, corrupt all wal files, run validate, expect errors"""
|
"""make archive node, take FULL, PAGE1, PAGE2 backups, corrupt file in PAGE1 backup,
|
||||||
|
run validate on PAGE1, expect PAGE1 to gain status CORRUPT and PAGE2 get status INVALID"""
|
||||||
fname = self.id().split('.')[3]
|
fname = self.id().split('.')[3]
|
||||||
node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname),
|
node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname),
|
||||||
initdb_params=['--data-checksums'],
|
initdb_params=['--data-checksums'],
|
||||||
@ -115,13 +128,587 @@ class ValidateTest(ProbackupTest, unittest.TestCase):
|
|||||||
self.set_archiving(backup_dir, 'node', node)
|
self.set_archiving(backup_dir, 'node', node)
|
||||||
node.start()
|
node.start()
|
||||||
|
|
||||||
backup_id = self.backup_node(backup_dir, 'node', node)
|
# FULL
|
||||||
|
backup_id_1 = self.backup_node(backup_dir, 'node', node)
|
||||||
|
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i")
|
||||||
|
file_path = node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"select pg_relation_filepath('t_heap')").rstrip()
|
||||||
|
# PAGE1
|
||||||
|
backup_id_2 = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||||
|
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(10000,20000) i")
|
||||||
|
# PAGE2
|
||||||
|
backup_id_3 = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||||
|
|
||||||
|
# Corrupt some file
|
||||||
|
file = os.path.join(backup_dir, 'backups/node', backup_id_2, 'database', file_path)
|
||||||
|
with open(file, "rb+", 0) as f:
|
||||||
|
f.seek(42)
|
||||||
|
f.write(b"blah")
|
||||||
|
f.flush()
|
||||||
|
f.close
|
||||||
|
|
||||||
|
# Simple validate
|
||||||
|
try:
|
||||||
|
self.validate_pb(backup_dir, 'node', backup_id=backup_id_2,
|
||||||
|
options=['--log-level-file=verbose'])
|
||||||
|
self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format(
|
||||||
|
repr(self.output), self.cmd))
|
||||||
|
except ProbackupException as e:
|
||||||
|
self.assertTrue(
|
||||||
|
'INFO: Validating parents for backup {0}'.format(backup_id_2) in e.message
|
||||||
|
and 'ERROR: Backup {0} is corrupt'.format(backup_id_2) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
|
||||||
|
self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "CORRUPT"')
|
||||||
|
self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_3)['status'], 'Backup STATUS should be "ORPHAN"')
|
||||||
|
|
||||||
|
# Clean after yourself
|
||||||
|
# self.del_test_dir(module_name, fname)
|
||||||
|
|
||||||
|
# @unittest.skip("skip")
|
||||||
|
def test_validate_corrupted_intermediate_backups(self):
|
||||||
|
"""make archive node, take FULL, PAGE1, PAGE2 backups,
|
||||||
|
corrupt file in FULL and PAGE1 backupd, run validate on PAGE1,
|
||||||
|
expect FULL and PAGE1 to gain status CORRUPT and PAGE2 get status ORPHAN"""
|
||||||
|
fname = self.id().split('.')[3]
|
||||||
|
node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname),
|
||||||
|
initdb_params=['--data-checksums'],
|
||||||
|
pg_options={'wal_level': 'replica'}
|
||||||
|
)
|
||||||
|
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||||
|
self.init_pb(backup_dir)
|
||||||
|
self.add_instance(backup_dir, 'node', node)
|
||||||
|
self.set_archiving(backup_dir, 'node', node)
|
||||||
|
node.start()
|
||||||
|
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i")
|
||||||
|
file_path_t_heap = node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"select pg_relation_filepath('t_heap')").rstrip()
|
||||||
|
# FULL
|
||||||
|
backup_id_1 = self.backup_node(backup_dir, 'node', node)
|
||||||
|
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"create table t_heap_1 as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i")
|
||||||
|
file_path_t_heap_1 = node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"select pg_relation_filepath('t_heap_1')").rstrip()
|
||||||
|
# PAGE1
|
||||||
|
backup_id_2 = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||||
|
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(20000,30000) i")
|
||||||
|
# PAGE2
|
||||||
|
backup_id_3 = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||||
|
|
||||||
|
# Corrupt some file in FULL backup
|
||||||
|
file_full = os.path.join(backup_dir, 'backups/node', backup_id_1, 'database', file_path_t_heap)
|
||||||
|
with open(file_full, "rb+", 0) as f:
|
||||||
|
f.seek(84)
|
||||||
|
f.write(b"blah")
|
||||||
|
f.flush()
|
||||||
|
f.close
|
||||||
|
|
||||||
|
# Corrupt some file in PAGE1 backup
|
||||||
|
file_page1 = os.path.join(backup_dir, 'backups/node', backup_id_2, 'database', file_path_t_heap_1)
|
||||||
|
with open(file_page1, "rb+", 0) as f:
|
||||||
|
f.seek(42)
|
||||||
|
f.write(b"blah")
|
||||||
|
f.flush()
|
||||||
|
f.close
|
||||||
|
|
||||||
|
# Validate PAGE1
|
||||||
|
try:
|
||||||
|
self.validate_pb(backup_dir, 'node', backup_id=backup_id_2,
|
||||||
|
options=['--log-level-file=verbose'])
|
||||||
|
self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format(
|
||||||
|
repr(self.output), self.cmd))
|
||||||
|
except ProbackupException as e:
|
||||||
|
self.assertTrue('INFO: Validating parents for backup {0}'.format(backup_id_2) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertTrue(
|
||||||
|
'INFO: Validating backup {0}'.format(backup_id_1) in e.message
|
||||||
|
and 'WARNING: Invalid CRC of backup file "{0}"'.format(file_full) in e.message
|
||||||
|
and 'WARNING: Backup {0} data files are corrupted'.format(backup_id_1) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertTrue(
|
||||||
|
'WARNING: Backup {0} is orphaned because his parent'.format(backup_id_2) in e.message
|
||||||
|
and 'WARNING: Backup {0} is orphaned because his parent'.format(backup_id_3) in e.message
|
||||||
|
and 'ERROR: Backup {0} is orphan.'.format(backup_id_2) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
|
||||||
|
self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_1)['status'], 'Backup STATUS should be "CORRUPT"')
|
||||||
|
self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "ORPHAN"')
|
||||||
|
self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_3)['status'], 'Backup STATUS should be "ORPHAN"')
|
||||||
|
|
||||||
|
# Clean after yourself
|
||||||
|
# self.del_test_dir(module_name, fname)
|
||||||
|
|
||||||
|
# @unittest.skip("skip")
|
||||||
|
def test_validate_corrupted_intermediate_backups_1(self):
|
||||||
|
"""make archive node, take FULL1, PAGE1, PAGE2, PAGE3, PAGE4, PAGE5, FULL2 backups,
|
||||||
|
corrupt file in PAGE1 and PAGE4, run validate on PAGE3,
|
||||||
|
expect PAGE1 to gain status CORRUPT, PAGE2, PAGE3, PAGE4 and PAGE5 to gain status ORPHAN"""
|
||||||
|
fname = self.id().split('.')[3]
|
||||||
|
node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname),
|
||||||
|
initdb_params=['--data-checksums'],
|
||||||
|
pg_options={'wal_level': 'replica'}
|
||||||
|
)
|
||||||
|
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||||
|
self.init_pb(backup_dir)
|
||||||
|
self.add_instance(backup_dir, 'node', node)
|
||||||
|
self.set_archiving(backup_dir, 'node', node)
|
||||||
|
node.start()
|
||||||
|
|
||||||
|
# FULL1
|
||||||
|
backup_id_1 = self.backup_node(backup_dir, 'node', node)
|
||||||
|
|
||||||
|
# PAGE1
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i")
|
||||||
|
backup_id_2 = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||||
|
|
||||||
|
# PAGE2
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i")
|
||||||
|
file_page_2 = node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"select pg_relation_filepath('t_heap')").rstrip()
|
||||||
|
backup_id_3 = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||||
|
|
||||||
|
# PAGE3
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(10000,20000) i")
|
||||||
|
backup_id_4 = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||||
|
|
||||||
|
# PAGE4
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(20000,30000) i")
|
||||||
|
backup_id_5 = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||||
|
|
||||||
|
# PAGE5
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"create table t_heap1 as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i")
|
||||||
|
file_page_5 = node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"select pg_relation_filepath('t_heap1')").rstrip()
|
||||||
|
backup_id_6 = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||||
|
|
||||||
|
# PAGE6
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(30000,40000) i")
|
||||||
|
backup_id_7 = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||||
|
|
||||||
|
# FULL2
|
||||||
|
backup_id_8 = self.backup_node(backup_dir, 'node', node)
|
||||||
|
|
||||||
|
# Corrupt some file in PAGE2 and PAGE5 backups
|
||||||
|
file_page1 = os.path.join(backup_dir, 'backups/node', backup_id_3, 'database', file_page_2)
|
||||||
|
with open(file_page1, "rb+", 0) as f:
|
||||||
|
f.seek(84)
|
||||||
|
f.write(b"blah")
|
||||||
|
f.flush()
|
||||||
|
f.close
|
||||||
|
|
||||||
|
file_page4 = os.path.join(backup_dir, 'backups/node', backup_id_6, 'database', file_page_5)
|
||||||
|
with open(file_page4, "rb+", 0) as f:
|
||||||
|
f.seek(42)
|
||||||
|
f.write(b"blah")
|
||||||
|
f.flush()
|
||||||
|
f.close
|
||||||
|
|
||||||
|
# Validate PAGE3
|
||||||
|
try:
|
||||||
|
self.validate_pb(backup_dir, 'node', backup_id=backup_id_4,
|
||||||
|
options=['--log-level-file=verbose'])
|
||||||
|
self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format(
|
||||||
|
repr(self.output), self.cmd))
|
||||||
|
except ProbackupException as e:
|
||||||
|
self.assertTrue(
|
||||||
|
'INFO: Validating parents for backup {0}'.format(backup_id_4) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertTrue(
|
||||||
|
'INFO: Validating backup {0}'.format(backup_id_1) in e.message
|
||||||
|
and 'INFO: Backup {0} data files are valid'.format(backup_id_1) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertTrue(
|
||||||
|
'INFO: Validating backup {0}'.format(backup_id_2) in e.message
|
||||||
|
and 'INFO: Backup {0} data files are valid'.format(backup_id_2) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertTrue(
|
||||||
|
'INFO: Validating backup {0}'.format(backup_id_3) in e.message
|
||||||
|
and 'WARNING: Invalid CRC of backup file "{0}"'.format(file_page1) in e.message
|
||||||
|
and 'WARNING: Backup {0} data files are corrupted'.format(backup_id_3) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertTrue(
|
||||||
|
'WARNING: Backup {0} is orphaned because his parent {1} is corrupted'.format(backup_id_4, backup_id_3) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertTrue(
|
||||||
|
'WARNING: Backup {0} is orphaned because his parent {1} is corrupted'.format(backup_id_5, backup_id_3) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertTrue(
|
||||||
|
'WARNING: Backup {0} is orphaned because his parent {1} is corrupted'.format(backup_id_6, backup_id_3) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertTrue(
|
||||||
|
'WARNING: Backup {0} is orphaned because his parent {1} is corrupted'.format(backup_id_7, backup_id_3) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertTrue(
|
||||||
|
'ERROR: Backup {0} is orphan'.format(backup_id_4) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
|
||||||
|
self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_1)['status'], 'Backup STATUS should be "OK"')
|
||||||
|
self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "OK"')
|
||||||
|
self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_3)['status'], 'Backup STATUS should be "CORRUPT"')
|
||||||
|
self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_4)['status'], 'Backup STATUS should be "ORPHAN"')
|
||||||
|
self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_5)['status'], 'Backup STATUS should be "ORPHAN"')
|
||||||
|
self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_6)['status'], 'Backup STATUS should be "ORPHAN"')
|
||||||
|
self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_7)['status'], 'Backup STATUS should be "ORPHAN"')
|
||||||
|
self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_8)['status'], 'Backup STATUS should be "OK"')
|
||||||
|
|
||||||
|
# Clean after yourself
|
||||||
|
# self.del_test_dir(module_name, fname)
|
||||||
|
|
||||||
|
# @unittest.skip("skip")
|
||||||
|
def test_validate_specific_target_corrupted_intermediate_backups(self):
|
||||||
|
"""make archive node, take FULL1, PAGE1, PAGE2, PAGE3, PAGE4, PAGE5, FULL2 backups,
|
||||||
|
corrupt file in PAGE1 and PAGE4, run validate on PAGE3 to specific xid,
|
||||||
|
expect PAGE1 to gain status CORRUPT, PAGE2, PAGE3, PAGE4 and PAGE5 to gain status ORPHAN"""
|
||||||
|
fname = self.id().split('.')[3]
|
||||||
|
node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname),
|
||||||
|
initdb_params=['--data-checksums'],
|
||||||
|
pg_options={'wal_level': 'replica'}
|
||||||
|
)
|
||||||
|
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||||
|
self.init_pb(backup_dir)
|
||||||
|
self.add_instance(backup_dir, 'node', node)
|
||||||
|
self.set_archiving(backup_dir, 'node', node)
|
||||||
|
node.start()
|
||||||
|
|
||||||
|
# FULL1
|
||||||
|
backup_id_1 = self.backup_node(backup_dir, 'node', node)
|
||||||
|
|
||||||
|
# PAGE1
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i")
|
||||||
|
backup_id_2 = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||||
|
|
||||||
|
# PAGE2
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i")
|
||||||
|
file_page_2 = node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"select pg_relation_filepath('t_heap')").rstrip()
|
||||||
|
backup_id_3 = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||||
|
|
||||||
|
# PAGE3
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(10000,20000) i")
|
||||||
|
backup_id_4 = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||||
|
|
||||||
|
# PAGE4
|
||||||
|
target_xid = node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(20000,30000) i RETURNING (xmin)")[0][0]
|
||||||
|
backup_id_5 = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||||
|
|
||||||
|
# PAGE5
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"create table t_heap1 as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i")
|
||||||
|
file_page_5 = node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"select pg_relation_filepath('t_heap1')").rstrip()
|
||||||
|
backup_id_6 = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||||
|
|
||||||
|
# PAGE6
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(30000,40000) i")
|
||||||
|
backup_id_7 = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||||
|
|
||||||
|
# FULL2
|
||||||
|
backup_id_8 = self.backup_node(backup_dir, 'node', node)
|
||||||
|
|
||||||
|
# Corrupt some file in PAGE2 and PAGE5 backups
|
||||||
|
file_page1 = os.path.join(backup_dir, 'backups/node', backup_id_3, 'database', file_page_2)
|
||||||
|
with open(file_page1, "rb+", 0) as f:
|
||||||
|
f.seek(84)
|
||||||
|
f.write(b"blah")
|
||||||
|
f.flush()
|
||||||
|
f.close
|
||||||
|
|
||||||
|
file_page4 = os.path.join(backup_dir, 'backups/node', backup_id_6, 'database', file_page_5)
|
||||||
|
with open(file_page4, "rb+", 0) as f:
|
||||||
|
f.seek(42)
|
||||||
|
f.write(b"blah")
|
||||||
|
f.flush()
|
||||||
|
f.close
|
||||||
|
|
||||||
|
# Validate PAGE3
|
||||||
|
try:
|
||||||
|
self.validate_pb(backup_dir, 'node',
|
||||||
|
options=['--log-level-file=verbose', '-i', backup_id_4, '--xid={0}'.format(target_xid)])
|
||||||
|
self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format(
|
||||||
|
repr(self.output), self.cmd))
|
||||||
|
except ProbackupException as e:
|
||||||
|
self.assertTrue(
|
||||||
|
'INFO: Validating parents for backup {0}'.format(backup_id_4) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertTrue(
|
||||||
|
'INFO: Validating backup {0}'.format(backup_id_1) in e.message
|
||||||
|
and 'INFO: Backup {0} data files are valid'.format(backup_id_1) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertTrue(
|
||||||
|
'INFO: Validating backup {0}'.format(backup_id_2) in e.message
|
||||||
|
and 'INFO: Backup {0} data files are valid'.format(backup_id_2) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertTrue(
|
||||||
|
'INFO: Validating backup {0}'.format(backup_id_3) in e.message
|
||||||
|
and 'WARNING: Invalid CRC of backup file "{0}"'.format(file_page1) in e.message
|
||||||
|
and 'WARNING: Backup {0} data files are corrupted'.format(backup_id_3) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertTrue(
|
||||||
|
'WARNING: Backup {0} is orphaned because his parent {1} is corrupted'.format(backup_id_4, backup_id_3) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertTrue(
|
||||||
|
'WARNING: Backup {0} is orphaned because his parent {1} is corrupted'.format(backup_id_5, backup_id_3) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertTrue(
|
||||||
|
'WARNING: Backup {0} is orphaned because his parent {1} is corrupted'.format(backup_id_6, backup_id_3) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertTrue(
|
||||||
|
'WARNING: Backup {0} is orphaned because his parent {1} is corrupted'.format(backup_id_7, backup_id_3) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertTrue(
|
||||||
|
'ERROR: Backup {0} is orphan'.format(backup_id_4) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
|
||||||
|
self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_1)['status'], 'Backup STATUS should be "OK"')
|
||||||
|
self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "OK"')
|
||||||
|
self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_3)['status'], 'Backup STATUS should be "CORRUPT"')
|
||||||
|
self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_4)['status'], 'Backup STATUS should be "ORPHAN"')
|
||||||
|
self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_5)['status'], 'Backup STATUS should be "ORPHAN"')
|
||||||
|
self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_6)['status'], 'Backup STATUS should be "ORPHAN"')
|
||||||
|
self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_7)['status'], 'Backup STATUS should be "ORPHAN"')
|
||||||
|
self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_8)['status'], 'Backup STATUS should be "OK"')
|
||||||
|
|
||||||
|
# Clean after yourself
|
||||||
|
# self.del_test_dir(module_name, fname)
|
||||||
|
|
||||||
|
# @unittest.skip("skip")
|
||||||
|
def test_validate_instance_with_corrupted_page(self):
|
||||||
|
"""make archive node, take FULL, PAGE1, PAGE2, FULL2, PAGE3 backups,
|
||||||
|
corrupt file in PAGE1 backup and run validate on instance,
|
||||||
|
expect PAGE1 to gain status CORRUPT, PAGE2 to gain status ORPHAN"""
|
||||||
|
fname = self.id().split('.')[3]
|
||||||
|
node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname),
|
||||||
|
initdb_params=['--data-checksums'],
|
||||||
|
pg_options={'wal_level': 'replica'}
|
||||||
|
)
|
||||||
|
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||||
|
self.init_pb(backup_dir)
|
||||||
|
self.add_instance(backup_dir, 'node', node)
|
||||||
|
self.set_archiving(backup_dir, 'node', node)
|
||||||
|
node.start()
|
||||||
|
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i")
|
||||||
|
# FULL1
|
||||||
|
backup_id_1 = self.backup_node(backup_dir, 'node', node)
|
||||||
|
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"create table t_heap1 as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i")
|
||||||
|
file_path_t_heap1 = node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"select pg_relation_filepath('t_heap1')").rstrip()
|
||||||
|
# PAGE1
|
||||||
|
backup_id_2 = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||||
|
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(20000,30000) i")
|
||||||
|
# PAGE2
|
||||||
|
backup_id_3 = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||||
|
# FULL1
|
||||||
|
backup_id_4 = self.backup_node(backup_dir, 'node', node)
|
||||||
|
# PAGE3
|
||||||
|
backup_id_5 = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||||
|
|
||||||
|
# Corrupt some file in FULL backup
|
||||||
|
file_full = os.path.join(backup_dir, 'backups/node', backup_id_2, 'database', file_path_t_heap1)
|
||||||
|
with open(file_full, "rb+", 0) as f:
|
||||||
|
f.seek(84)
|
||||||
|
f.write(b"blah")
|
||||||
|
f.flush()
|
||||||
|
f.close
|
||||||
|
|
||||||
|
# Validate Instance
|
||||||
|
try:
|
||||||
|
self.validate_pb(backup_dir, 'node', options=['--log-level-file=verbose'])
|
||||||
|
self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format(
|
||||||
|
repr(self.output), self.cmd))
|
||||||
|
except ProbackupException as e:
|
||||||
|
self.assertTrue(
|
||||||
|
"INFO: Validate backups of the instance 'node'" in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertTrue(
|
||||||
|
'INFO: Validating backup {0}'.format(backup_id_5) in e.message
|
||||||
|
and 'INFO: Backup {0} data files are valid'.format(backup_id_5) in e.message
|
||||||
|
and 'INFO: Backup {0} WAL segments are valid'.format(backup_id_5) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertTrue(
|
||||||
|
'INFO: Validating backup {0}'.format(backup_id_4) in e.message
|
||||||
|
and 'INFO: Backup {0} data files are valid'.format(backup_id_4) in e.message
|
||||||
|
and 'INFO: Backup {0} WAL segments are valid'.format(backup_id_4) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertTrue(
|
||||||
|
'INFO: Validating backup {0}'.format(backup_id_3) in e.message
|
||||||
|
and 'INFO: Backup {0} data files are valid'.format(backup_id_3) in e.message
|
||||||
|
and 'INFO: Backup {0} WAL segments are valid'.format(backup_id_3) in e.message
|
||||||
|
and 'WARNING: Backup {0} is orphaned because his parent {1} is corrupted'.format(backup_id_3, backup_id_2) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertTrue(
|
||||||
|
'INFO: Validating backup {0}'.format(backup_id_2) in e.message
|
||||||
|
and 'WARNING: Invalid CRC of backup file "{0}"'.format(file_full) in e.message
|
||||||
|
and 'WARNING: Backup {0} data files are corrupted'.format(backup_id_2) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertTrue(
|
||||||
|
'INFO: Validating backup {0}'.format(backup_id_1) in e.message
|
||||||
|
and 'INFO: Backup {0} data files are valid'.format(backup_id_1) in e.message
|
||||||
|
and 'INFO: Backup {0} WAL segments are valid'.format(backup_id_1) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertTrue(
|
||||||
|
'INFO: Some backups are not valid' in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
|
||||||
|
self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_1)['status'], 'Backup STATUS should be "OK"')
|
||||||
|
self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "CORRUPT"')
|
||||||
|
self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_3)['status'], 'Backup STATUS should be "ORPHAN"')
|
||||||
|
self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_4)['status'], 'Backup STATUS should be "OK"')
|
||||||
|
self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_5)['status'], 'Backup STATUS should be "OK"')
|
||||||
|
|
||||||
|
# Clean after yourself
|
||||||
|
# self.del_test_dir(module_name, fname)
|
||||||
|
|
||||||
|
# @unittest.skip("skip")
|
||||||
|
def test_validate_instance_with_corrupted_full(self):
|
||||||
|
"""make archive node, take FULL, PAGE1, PAGE2, FULL2, PAGE3 backups,
|
||||||
|
corrupt file in FULL backup and run validate on instance,
|
||||||
|
expect FULL to gain status CORRUPT, PAGE1 and PAGE2 to gain status ORPHAN"""
|
||||||
|
fname = self.id().split('.')[3]
|
||||||
|
node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname),
|
||||||
|
initdb_params=['--data-checksums'],
|
||||||
|
pg_options={'wal_level': 'replica'}
|
||||||
|
)
|
||||||
|
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||||
|
self.init_pb(backup_dir)
|
||||||
|
self.add_instance(backup_dir, 'node', node)
|
||||||
|
self.set_archiving(backup_dir, 'node', node)
|
||||||
|
node.start()
|
||||||
|
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i")
|
||||||
|
file_path_t_heap = node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"select pg_relation_filepath('t_heap')").rstrip()
|
||||||
|
# FULL1
|
||||||
|
backup_id_1 = self.backup_node(backup_dir, 'node', node)
|
||||||
|
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i")
|
||||||
|
# PAGE1
|
||||||
|
backup_id_2 = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||||
|
|
||||||
|
# PAGE2
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(20000,30000) i")
|
||||||
|
backup_id_3 = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||||
|
|
||||||
|
# FULL1
|
||||||
|
backup_id_4 = self.backup_node(backup_dir, 'node', node)
|
||||||
|
|
||||||
|
# PAGE3
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(30000,40000) i")
|
||||||
|
backup_id_5 = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||||
|
|
||||||
|
# Corrupt some file in FULL backup
|
||||||
|
file_full = os.path.join(backup_dir, 'backups/node', backup_id_1, 'database', file_path_t_heap)
|
||||||
|
with open(file_full, "rb+", 0) as f:
|
||||||
|
f.seek(84)
|
||||||
|
f.write(b"blah")
|
||||||
|
f.flush()
|
||||||
|
f.close
|
||||||
|
|
||||||
|
# Validate Instance
|
||||||
|
try:
|
||||||
|
self.validate_pb(backup_dir, 'node', options=['--log-level-file=verbose'])
|
||||||
|
self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format(
|
||||||
|
repr(self.output), self.cmd))
|
||||||
|
except ProbackupException as e:
|
||||||
|
self.assertTrue(
|
||||||
|
'INFO: Validating backup {0}'.format(backup_id_1) in e.message
|
||||||
|
and "INFO: Validate backups of the instance 'node'" in e.message
|
||||||
|
and 'WARNING: Invalid CRC of backup file "{0}"'.format(file_full) in e.message
|
||||||
|
and 'WARNING: Backup {0} data files are corrupted'.format(backup_id_1) in e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
|
||||||
|
self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_1)['status'], 'Backup STATUS should be "CORRUPT"')
|
||||||
|
self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "ORPHAN"')
|
||||||
|
self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_3)['status'], 'Backup STATUS should be "ORPHAN"')
|
||||||
|
self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_4)['status'], 'Backup STATUS should be "OK"')
|
||||||
|
self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_5)['status'], 'Backup STATUS should be "OK"')
|
||||||
|
|
||||||
|
# Clean after yourself
|
||||||
|
# self.del_test_dir(module_name, fname)
|
||||||
|
|
||||||
|
# @unittest.skip("skip")
|
||||||
|
def test_validate_corrupt_wal_1(self):
|
||||||
|
"""make archive node, take FULL1, PAGE1,PAGE2,FULL2,PAGE3,PAGE4 backups, corrupt all wal files, run validate, expect errors"""
|
||||||
|
fname = self.id().split('.')[3]
|
||||||
|
node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname),
|
||||||
|
initdb_params=['--data-checksums'],
|
||||||
|
pg_options={'wal_level': 'replica'}
|
||||||
|
)
|
||||||
|
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||||
|
self.init_pb(backup_dir)
|
||||||
|
self.add_instance(backup_dir, 'node', node)
|
||||||
|
self.set_archiving(backup_dir, 'node', node)
|
||||||
|
node.start()
|
||||||
|
|
||||||
|
backup_id_1 = self.backup_node(backup_dir, 'node', node)
|
||||||
|
|
||||||
with node.connect("postgres") as con:
|
with node.connect("postgres") as con:
|
||||||
con.execute("CREATE TABLE tbl0005 (a text)")
|
con.execute("CREATE TABLE tbl0005 (a text)")
|
||||||
con.commit()
|
con.commit()
|
||||||
|
|
||||||
backup_id = self.backup_node(backup_dir, 'node', node)
|
backup_id_2 = self.backup_node(backup_dir, 'node', node)
|
||||||
|
|
||||||
# Corrupt WAL
|
# Corrupt WAL
|
||||||
wals_dir = os.path.join(backup_dir, 'wal', 'node')
|
wals_dir = os.path.join(backup_dir, 'wal', 'node')
|
||||||
@ -143,7 +730,8 @@ class ValidateTest(ProbackupTest, unittest.TestCase):
|
|||||||
self.assertTrue('Possible WAL CORRUPTION' in e.message,
|
self.assertTrue('Possible WAL CORRUPTION' in e.message,
|
||||||
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
|
||||||
self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id)['status'], 'Backup STATUS should be "CORRUPT"')
|
self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_1)['status'], 'Backup STATUS should be "CORRUPT"')
|
||||||
|
self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "CORRUPT"')
|
||||||
|
|
||||||
# Clean after yourself
|
# Clean after yourself
|
||||||
self.del_test_dir(module_name, fname)
|
self.del_test_dir(module_name, fname)
|
||||||
@ -190,8 +778,8 @@ class ValidateTest(ProbackupTest, unittest.TestCase):
|
|||||||
self.assertEqual(1, 0, "Expecting Error because of wal segments corruption.\n Output: {0} \n CMD: {1}".format(
|
self.assertEqual(1, 0, "Expecting Error because of wal segments corruption.\n Output: {0} \n CMD: {1}".format(
|
||||||
repr(self.output), self.cmd))
|
repr(self.output), self.cmd))
|
||||||
except ProbackupException as e:
|
except ProbackupException as e:
|
||||||
self.assertTrue('Possible WAL CORRUPTION' in e.message),
|
self.assertTrue('Possible WAL CORRUPTION' in e.message,
|
||||||
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
|
||||||
self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id)['status'], 'Backup STATUS should be "CORRUPT"')
|
self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id)['status'], 'Backup STATUS should be "CORRUPT"')
|
||||||
|
|
||||||
@ -236,8 +824,14 @@ class ValidateTest(ProbackupTest, unittest.TestCase):
|
|||||||
self.assertEqual(1, 0, "Expecting Error because of wal segment disappearance.\n Output: {0} \n CMD: {1}".format(
|
self.assertEqual(1, 0, "Expecting Error because of wal segment disappearance.\n Output: {0} \n CMD: {1}".format(
|
||||||
repr(self.output), self.cmd))
|
repr(self.output), self.cmd))
|
||||||
except ProbackupException as e:
|
except ProbackupException as e:
|
||||||
self.assertIn('WARNING: WAL segment "{0}" is absent\nERROR: there are not enough WAL records to restore'.format(
|
self.assertIn('WARNING: WAL segment "{0}" is absent'.format(file), e.message,
|
||||||
file), e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertIn('WARNING: There are not enough WAL records to consistenly restore backup {0}'.format(backup_id), e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertIn('WARNING: Backup {0} WAL segments are corrupted'.format(backup_id), e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
self.assertIn('INFO: Some backups are not valid', e.message,
|
||||||
|
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
|
||||||
|
|
||||||
self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id)['status'], 'Backup {0} should have STATUS "CORRUPT"')
|
self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id)['status'], 'Backup {0} should have STATUS "CORRUPT"')
|
||||||
|
|
||||||
@ -293,6 +887,12 @@ class ValidateTest(ProbackupTest, unittest.TestCase):
|
|||||||
os.remove(file)
|
os.remove(file)
|
||||||
if self.archive_compress:
|
if self.archive_compress:
|
||||||
file = file[:-3]
|
file = file[:-3]
|
||||||
|
|
||||||
|
# Try to validate
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Try to restore
|
||||||
try:
|
try:
|
||||||
backup_id = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
backup_id = self.backup_node(backup_dir, 'node', node, backup_type='page')
|
||||||
self.assertEqual(1, 0, "Expecting Error because of wal segment disappearance.\n Output: {0} \n CMD: {1}".format(
|
self.assertEqual(1, 0, "Expecting Error because of wal segment disappearance.\n Output: {0} \n CMD: {1}".format(
|
||||||
@ -398,10 +998,76 @@ class ValidateTest(ProbackupTest, unittest.TestCase):
|
|||||||
archive_command_node2 = node2.safe_psql("postgres", "show archive_command")
|
archive_command_node2 = node2.safe_psql("postgres", "show archive_command")
|
||||||
self.assertEqual(archive_command_node1, archive_command_node2, "Archive command on Master and Node should be equal. This is unexpected")
|
self.assertEqual(archive_command_node1, archive_command_node2, "Archive command on Master and Node should be equal. This is unexpected")
|
||||||
|
|
||||||
result = node2.safe_psql("postgres", "select last_failed_wal from pg_stat_get_archiver() where last_failed_wal is not NULL")
|
#result = node2.safe_psql("postgres", "select last_failed_wal from pg_stat_get_archiver() where last_failed_wal is not NULL")
|
||||||
# self.assertEqual(res, six.b(""), 'Restored Node1 failed to archive segment {0} due to having the same archive command as Master'.format(res.rstrip()))
|
## self.assertEqual(res, six.b(""), 'Restored Node1 failed to archive segment {0} due to having the same archive command as Master'.format(res.rstrip()))
|
||||||
if result == "":
|
#if result == "":
|
||||||
self.assertEqual(1, 0, 'Error is expected due to Master and Node1 having the common archive and archive_command')
|
# self.assertEqual(1, 0, 'Error is expected due to Master and Node1 having the common archive and archive_command')
|
||||||
|
|
||||||
|
log_file = os.path.join(node2.logs_dir, 'postgresql.log')
|
||||||
|
with open(log_file, 'r') as f:
|
||||||
|
log_content = f.read()
|
||||||
|
self.assertTrue('LOG: archive command failed with exit code 1' in log_content
|
||||||
|
and 'DETAIL: The failed archive command was:' in log_content
|
||||||
|
and 'INFO: pg_probackup archive-push from' in log_content,
|
||||||
|
'Expecting error messages about failed archive_command'
|
||||||
|
)
|
||||||
|
self.assertFalse('pg_probackup archive-push completed successfully' in log_content)
|
||||||
|
|
||||||
# Clean after yourself
|
# Clean after yourself
|
||||||
self.del_test_dir(module_name, fname)
|
# self.del_test_dir(module_name, fname)
|
||||||
|
|
||||||
|
# @unittest.skip("skip")
|
||||||
|
# @unittest.expectedFailure
|
||||||
|
def test_pgpro561(self):
|
||||||
|
"""make node with archiving, make stream backup, restore it to node1, check that archiving is not successful on node1"""
|
||||||
|
fname = self.id().split('.')[3]
|
||||||
|
node1 = self.make_simple_node(base_dir="{0}/{1}/node1".format(module_name, fname),
|
||||||
|
set_replication=True,
|
||||||
|
initdb_params=['--data-checksums'],
|
||||||
|
pg_options={'wal_level': 'replica', 'max_wal_senders': '2'}
|
||||||
|
)
|
||||||
|
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||||
|
self.init_pb(backup_dir)
|
||||||
|
self.add_instance(backup_dir, 'node1', node1)
|
||||||
|
self.set_archiving(backup_dir, 'node1', node1)
|
||||||
|
node1.start()
|
||||||
|
|
||||||
|
backup_id = self.backup_node(backup_dir, 'node1', node1, options=["--stream"])
|
||||||
|
|
||||||
|
node2 = self.make_simple_node(base_dir="{0}/{1}/node2".format(module_name, fname))
|
||||||
|
node2.cleanup()
|
||||||
|
|
||||||
|
node1.psql(
|
||||||
|
"postgres",
|
||||||
|
"create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,256) i")
|
||||||
|
|
||||||
|
self.backup_node(backup_dir, 'node1', node1, backup_type='page', options=["--stream"])
|
||||||
|
self.restore_node(backup_dir, 'node1', data_dir=node2.data_dir)
|
||||||
|
node2.append_conf('postgresql.auto.conf', 'port = {0}'.format(node2.port))
|
||||||
|
node2.start()
|
||||||
|
|
||||||
|
timeline_node1 = node1.get_control_data()["Latest checkpoint's TimeLineID"]
|
||||||
|
timeline_node2 = node2.get_control_data()["Latest checkpoint's TimeLineID"]
|
||||||
|
self.assertEqual(timeline_node1, timeline_node2, "Timelines on Master and Node1 should be equal. This is unexpected")
|
||||||
|
|
||||||
|
archive_command_node1 = node1.safe_psql("postgres", "show archive_command")
|
||||||
|
archive_command_node2 = node2.safe_psql("postgres", "show archive_command")
|
||||||
|
self.assertEqual(archive_command_node1, archive_command_node2, "Archive command on Master and Node should be equal. This is unexpected")
|
||||||
|
|
||||||
|
#result = node2.safe_psql("postgres", "select last_failed_wal from pg_stat_get_archiver() where last_failed_wal is not NULL")
|
||||||
|
## self.assertEqual(res, six.b(""), 'Restored Node1 failed to archive segment {0} due to having the same archive command as Master'.format(res.rstrip()))
|
||||||
|
#if result == "":
|
||||||
|
# self.assertEqual(1, 0, 'Error is expected due to Master and Node1 having the common archive and archive_command')
|
||||||
|
|
||||||
|
log_file = os.path.join(node2.logs_dir, 'postgresql.log')
|
||||||
|
with open(log_file, 'r') as f:
|
||||||
|
log_content = f.read()
|
||||||
|
self.assertTrue('LOG: archive command failed with exit code 1' in log_content
|
||||||
|
and 'DETAIL: The failed archive command was:' in log_content
|
||||||
|
and 'INFO: pg_probackup archive-push from' in log_content,
|
||||||
|
'Expecting error messages about failed archive_command'
|
||||||
|
)
|
||||||
|
self.assertFalse('pg_probackup archive-push completed successfully' in log_content)
|
||||||
|
|
||||||
|
# Clean after yourself
|
||||||
|
# self.del_test_dir(module_name, fname)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user