From b6f1dd8fbeb01e8593a1c14474ddfe399a75e4dd Mon Sep 17 00:00:00 2001 From: Grigory Smolkin Date: Tue, 19 Dec 2017 12:56:03 +0300 Subject: [PATCH] PGPRO-1248: new status ORPHAN for backup with corrupted parent --- src/catalog.c | 2 + src/parsexlog.c | 15 +- src/pg_probackup.h | 1 + src/restore.c | 71 ++++- src/util.c | 2 +- src/validate.c | 26 +- tests/validate_test.py | 696 ++++++++++++++++++++++++++++++++++++++++- 7 files changed, 774 insertions(+), 39 deletions(-) diff --git a/src/catalog.c b/src/catalog.c index a5bc8ff0..de61b7b1 100644 --- a/src/catalog.c +++ b/src/catalog.c @@ -556,6 +556,8 @@ readBackupControlFile(const char *path) backup->status = BACKUP_STATUS_DELETED; else if (strcmp(status, "DONE") == 0) backup->status = BACKUP_STATUS_DONE; + else if (strcmp(status, "ORPHAN") == 0) + backup->status = BACKUP_STATUS_ORPHAN; else if (strcmp(status, "CORRUPT") == 0) backup->status = BACKUP_STATUS_CORRUPT; else diff --git a/src/parsexlog.c b/src/parsexlog.c index d767660a..a758e210 100644 --- a/src/parsexlog.c +++ b/src/parsexlog.c @@ -258,7 +258,9 @@ validate_backup_wal_from_start_to_stop(pgBackup *backup, */ backup->status = BACKUP_STATUS_CORRUPT; pgBackupWriteBackupControlFile(backup); - elog(ERROR, "there are not enough WAL records to restore from %X/%X to %X/%X", + elog(WARNING, "There are not enough WAL records to consistenly restore " + "backup %s from START LSN: %X/%X to STOP LSN: %X/%X", + base36enc(backup->start_time), (uint32) (backup->start_lsn >> 32), (uint32) (backup->start_lsn), (uint32) (backup->stop_lsn >> 32), @@ -329,14 +331,19 @@ validate_wal(pgBackup *backup, free(backup_id); + if (backup->status == BACKUP_STATUS_CORRUPT) + { + elog(WARNING, "Backup %s WAL segments are corrupted", base36enc(backup->start_time)); + return; + } /* * If recovery target is provided check that we can restore backup to a - * recoverty target time or xid. + * recovery target time or xid. */ if (!TransactionIdIsValid(target_xid) && target_time == 0) { - /* Recoverty target is not given so exit */ - elog(INFO, "backup validation completed successfully"); + /* Recovery target is not given so exit */ + elog(INFO, "Backup %s WAL segments are valid", base36enc(backup->start_time)); return; } diff --git a/src/pg_probackup.h b/src/pg_probackup.h index 3074f01b..4ddc0fe9 100644 --- a/src/pg_probackup.h +++ b/src/pg_probackup.h @@ -118,6 +118,7 @@ typedef enum BackupStatus BACKUP_STATUS_DELETING, /* data files are being deleted */ BACKUP_STATUS_DELETED, /* data files have been deleted */ BACKUP_STATUS_DONE, /* completed but not validated yet */ + BACKUP_STATUS_ORPHAN, /* backup validity is unknown but at least one parent backup is corrupted */ BACKUP_STATUS_CORRUPT /* files are corrupted, not available */ } BackupStatus; diff --git a/src/restore.c b/src/restore.c index b30f4dc8..0c7fa073 100644 --- a/src/restore.c +++ b/src/restore.c @@ -90,8 +90,10 @@ do_restore_or_validate(time_t target_backup_id, pgBackup *current_backup = NULL; pgBackup *dest_backup = NULL; pgBackup *base_full_backup = NULL; + pgBackup *corrupted_backup = NULL; int dest_backup_index = 0; int base_full_backup_index = 0; + int corrupted_backup_index = 0; char *action = is_restore ? "Restore":"Validate"; if (is_restore) @@ -205,7 +207,7 @@ do_restore_or_validate(time_t target_backup_id, } } else - /* Skip differential backups are ok */ + /* It`s ok to skip incremental backup */ continue; } } @@ -220,6 +222,9 @@ do_restore_or_validate(time_t target_backup_id, if (is_restore) check_tablespace_mapping(dest_backup); + if (dest_backup->backup_mode != BACKUP_MODE_FULL) + elog(INFO, "Validating parents for backup %s", base36enc(dest_backup->start_time)); + /* * Validate backups from base_full_backup to dest_backup. */ @@ -227,29 +232,67 @@ do_restore_or_validate(time_t target_backup_id, { pgBackup *backup = (pgBackup *) parray_get(backups, i); pgBackupValidate(backup); + if (backup->status == BACKUP_STATUS_CORRUPT) + { + corrupted_backup = backup; + corrupted_backup_index = i; + break; + } + } + /* There is no point in wal validation + * if there is corrupted backup between base_backup and dest_backup + */ + if (!corrupted_backup) + /* + * Validate corresponding WAL files. + * We pass base_full_backup timeline as last argument to this function, + * because it's needed to form the name of xlog file. + */ + validate_wal(dest_backup, arclog_path, rt->recovery_target_time, + rt->recovery_target_xid, base_full_backup->tli); + + /* Set every incremental backup between corrupted backup and nearest FULL backup as orphans */ + if (corrupted_backup) + { + for (i = corrupted_backup_index - 1; i >= 0; i--) + { + pgBackup *backup = (pgBackup *) parray_get(backups, i); + /* Mark incremental OK backup as orphan */ + if (backup->backup_mode == BACKUP_MODE_FULL) + break; + if (backup->status != BACKUP_STATUS_OK) + continue; + else + { + backup->status = BACKUP_STATUS_ORPHAN; + pgBackupWriteBackupControlFile(backup); + elog(WARNING, "Backup %s is orphaned because his parent %s is corrupted", + base36enc(backup->start_time), base36enc(corrupted_backup->start_time)); + } + } } /* - * Validate corresponding WAL files. - * We pass base_full_backup timeline as last argument to this function, - * because it's needed to form the name of xlog file. + * If dest backup is corrupted or was orphaned in previous check + * produce corresponding error message */ - validate_wal(dest_backup, arclog_path, rt->recovery_target_time, - rt->recovery_target_xid, base_full_backup->tli); - + if (dest_backup->status == BACKUP_STATUS_OK) + elog(INFO, "Backup %s is valid.", base36enc(dest_backup->start_time)); + else if (dest_backup->status == BACKUP_STATUS_CORRUPT) + elog(ERROR, "Backup %s is corrupt.", base36enc(dest_backup->start_time)); + else if (dest_backup->status == BACKUP_STATUS_ORPHAN) + elog(ERROR, "Backup %s is orphan.", base36enc(dest_backup->start_time)); + else + elog(ERROR, "Backup %s has status: %s", + base36enc(dest_backup->start_time), status2str(dest_backup->status)); /* We ensured that all backups are valid, now restore if required */ if (is_restore) { - pgBackup *backup; for (i = base_full_backup_index; i >= dest_backup_index; i--) { - backup = (pgBackup *) parray_get(backups, i); - if (backup->status == BACKUP_STATUS_OK) - restore_backup(backup); - else - elog(ERROR, "backup %s is not valid", - base36enc(backup->start_time)); + pgBackup *backup = (pgBackup *) parray_get(backups, i); + restore_backup(backup); } /* diff --git a/src/util.c b/src/util.c index 998035ee..b4cfdd6b 100644 --- a/src/util.c +++ b/src/util.c @@ -230,9 +230,9 @@ status2str(BackupStatus status) "DELETING", "DELETED", "DONE", + "ORPHAN", "CORRUPT" }; - if (status < BACKUP_STATUS_INVALID || BACKUP_STATUS_CORRUPT < status) return "UNKNOWN"; diff --git a/src/validate.c b/src/validate.c index b3437265..3a60efd5 100644 --- a/src/validate.c +++ b/src/validate.c @@ -97,9 +97,9 @@ pgBackupValidate(pgBackup *backup) pgBackupWriteBackupControlFile(backup); if (corrupted) - elog(WARNING, "Backup %s is corrupted", backup_id_string); + elog(WARNING, "Backup %s data files are corrupted", backup_id_string); else - elog(INFO, "Backup %s is valid", backup_id_string); + elog(INFO, "Backup %s data files are valid", backup_id_string); free(backup_id_string); } @@ -269,8 +269,6 @@ do_validate_instance(void) current_backup = (pgBackup *) parray_get(backups, i); backup_id = base36enc(current_backup->start_time); - elog(INFO, "Validate backup %s", backup_id); - if (current_backup->backup_mode != BACKUP_MODE_FULL) { int j; @@ -301,9 +299,27 @@ do_validate_instance(void) validate_wal(current_backup, arclog_path, 0, 0, base_full_backup->tli); } - + /* Set every incremental backup between corrupted backup and nearest FULL backup as orphans */ if (current_backup->status != BACKUP_STATUS_OK) + { + int j; corrupted_backup_found = true; + for (j = i - 1; j >= 0; j--) + { + pgBackup *backup = (pgBackup *) parray_get(backups, j); + if (backup->backup_mode == BACKUP_MODE_FULL) + break; + if (backup->status != BACKUP_STATUS_OK) + continue; + else + { + backup->status = BACKUP_STATUS_ORPHAN; + pgBackupWriteBackupControlFile(backup); + elog(WARNING, "Backup %s is orphaned because his parent %s is corrupted", + base36enc(backup->start_time), base36enc(current_backup->start_time)); + } + } + } free(backup_id); } diff --git a/tests/validate_test.py b/tests/validate_test.py index 84494d85..587a453c 100644 --- a/tests/validate_test.py +++ b/tests/validate_test.py @@ -94,7 +94,19 @@ class ValidateTest(ProbackupTest, unittest.TestCase): '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) # Validate with backup ID - self.assertIn("INFO: backup validation completed successfully", + self.assertIn("INFO: Validating backup {0}".format(backup_id), + self.validate_pb(backup_dir, 'node', backup_id), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(self.output), self.cmd)) + self.assertIn("INFO: Backup {0} data files are valid".format(backup_id), + self.validate_pb(backup_dir, 'node', backup_id), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(self.output), self.cmd)) + self.assertIn("INFO: Backup {0} WAL segments are valid".format(backup_id), + self.validate_pb(backup_dir, 'node', backup_id), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(self.output), self.cmd)) + self.assertIn("INFO: Backup {0} is valid".format(backup_id), + self.validate_pb(backup_dir, 'node', backup_id), + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(self.output), self.cmd)) + self.assertIn("INFO: Validate of backup {0} completed".format(backup_id), self.validate_pb(backup_dir, 'node', backup_id), '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(self.output), self.cmd)) @@ -102,8 +114,9 @@ class ValidateTest(ProbackupTest, unittest.TestCase): self.del_test_dir(module_name, fname) # @unittest.skip("skip") - def test_validate_corrupt_wal_1(self): - """make archive node, make archive backup, corrupt all wal files, run validate, expect errors""" + def test_validate_corrupted_intermediate_backup(self): + """make archive node, take FULL, PAGE1, PAGE2 backups, corrupt file in PAGE1 backup, + run validate on PAGE1, expect PAGE1 to gain status CORRUPT and PAGE2 get status INVALID""" fname = self.id().split('.')[3] node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), initdb_params=['--data-checksums'], @@ -115,13 +128,587 @@ class ValidateTest(ProbackupTest, unittest.TestCase): self.set_archiving(backup_dir, 'node', node) node.start() - backup_id = self.backup_node(backup_dir, 'node', node) + # FULL + backup_id_1 = self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + "postgres", + "create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") + file_path = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + # PAGE1 + backup_id_2 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(10000,20000) i") + # PAGE2 + backup_id_3 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + # Corrupt some file + file = os.path.join(backup_dir, 'backups/node', backup_id_2, 'database', file_path) + with open(file, "rb+", 0) as f: + f.seek(42) + f.write(b"blah") + f.flush() + f.close + + # Simple validate + try: + self.validate_pb(backup_dir, 'node', backup_id=backup_id_2, + options=['--log-level-file=verbose']) + self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'INFO: Validating parents for backup {0}'.format(backup_id_2) in e.message + and 'ERROR: Backup {0} is corrupt'.format(backup_id_2) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + + self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "CORRUPT"') + self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_3)['status'], 'Backup STATUS should be "ORPHAN"') + + # Clean after yourself + # self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_corrupted_intermediate_backups(self): + """make archive node, take FULL, PAGE1, PAGE2 backups, + corrupt file in FULL and PAGE1 backupd, run validate on PAGE1, + expect FULL and PAGE1 to gain status CORRUPT and PAGE2 get status ORPHAN""" + fname = self.id().split('.')[3] + node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), + initdb_params=['--data-checksums'], + pg_options={'wal_level': 'replica'} + ) + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.start() + + node.safe_psql( + "postgres", + "create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") + file_path_t_heap = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + # FULL + backup_id_1 = self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + "postgres", + "create table t_heap_1 as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") + file_path_t_heap_1 = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap_1')").rstrip() + # PAGE1 + backup_id_2 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(20000,30000) i") + # PAGE2 + backup_id_3 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + # Corrupt some file in FULL backup + file_full = os.path.join(backup_dir, 'backups/node', backup_id_1, 'database', file_path_t_heap) + with open(file_full, "rb+", 0) as f: + f.seek(84) + f.write(b"blah") + f.flush() + f.close + + # Corrupt some file in PAGE1 backup + file_page1 = os.path.join(backup_dir, 'backups/node', backup_id_2, 'database', file_path_t_heap_1) + with open(file_page1, "rb+", 0) as f: + f.seek(42) + f.write(b"blah") + f.flush() + f.close + + # Validate PAGE1 + try: + self.validate_pb(backup_dir, 'node', backup_id=backup_id_2, + options=['--log-level-file=verbose']) + self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue('INFO: Validating parents for backup {0}'.format(backup_id_2) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Validating backup {0}'.format(backup_id_1) in e.message + and 'WARNING: Invalid CRC of backup file "{0}"'.format(file_full) in e.message + and 'WARNING: Backup {0} data files are corrupted'.format(backup_id_1) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertTrue( + 'WARNING: Backup {0} is orphaned because his parent'.format(backup_id_2) in e.message + and 'WARNING: Backup {0} is orphaned because his parent'.format(backup_id_3) in e.message + and 'ERROR: Backup {0} is orphan.'.format(backup_id_2) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + + self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_1)['status'], 'Backup STATUS should be "CORRUPT"') + self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "ORPHAN"') + self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_3)['status'], 'Backup STATUS should be "ORPHAN"') + + # Clean after yourself + # self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_corrupted_intermediate_backups_1(self): + """make archive node, take FULL1, PAGE1, PAGE2, PAGE3, PAGE4, PAGE5, FULL2 backups, + corrupt file in PAGE1 and PAGE4, run validate on PAGE3, + expect PAGE1 to gain status CORRUPT, PAGE2, PAGE3, PAGE4 and PAGE5 to gain status ORPHAN""" + fname = self.id().split('.')[3] + node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), + initdb_params=['--data-checksums'], + pg_options={'wal_level': 'replica'} + ) + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.start() + + # FULL1 + backup_id_1 = self.backup_node(backup_dir, 'node', node) + + # PAGE1 + node.safe_psql( + "postgres", + "create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") + backup_id_2 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + # PAGE2 + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") + file_page_2 = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + backup_id_3 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + # PAGE3 + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(10000,20000) i") + backup_id_4 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + # PAGE4 + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(20000,30000) i") + backup_id_5 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + # PAGE5 + node.safe_psql( + "postgres", + "create table t_heap1 as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") + file_page_5 = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap1')").rstrip() + backup_id_6 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + # PAGE6 + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(30000,40000) i") + backup_id_7 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + # FULL2 + backup_id_8 = self.backup_node(backup_dir, 'node', node) + + # Corrupt some file in PAGE2 and PAGE5 backups + file_page1 = os.path.join(backup_dir, 'backups/node', backup_id_3, 'database', file_page_2) + with open(file_page1, "rb+", 0) as f: + f.seek(84) + f.write(b"blah") + f.flush() + f.close + + file_page4 = os.path.join(backup_dir, 'backups/node', backup_id_6, 'database', file_page_5) + with open(file_page4, "rb+", 0) as f: + f.seek(42) + f.write(b"blah") + f.flush() + f.close + + # Validate PAGE3 + try: + self.validate_pb(backup_dir, 'node', backup_id=backup_id_4, + options=['--log-level-file=verbose']) + self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'INFO: Validating parents for backup {0}'.format(backup_id_4) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Validating backup {0}'.format(backup_id_1) in e.message + and 'INFO: Backup {0} data files are valid'.format(backup_id_1) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Validating backup {0}'.format(backup_id_2) in e.message + and 'INFO: Backup {0} data files are valid'.format(backup_id_2) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Validating backup {0}'.format(backup_id_3) in e.message + and 'WARNING: Invalid CRC of backup file "{0}"'.format(file_page1) in e.message + and 'WARNING: Backup {0} data files are corrupted'.format(backup_id_3) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertTrue( + 'WARNING: Backup {0} is orphaned because his parent {1} is corrupted'.format(backup_id_4, backup_id_3) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertTrue( + 'WARNING: Backup {0} is orphaned because his parent {1} is corrupted'.format(backup_id_5, backup_id_3) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertTrue( + 'WARNING: Backup {0} is orphaned because his parent {1} is corrupted'.format(backup_id_6, backup_id_3) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertTrue( + 'WARNING: Backup {0} is orphaned because his parent {1} is corrupted'.format(backup_id_7, backup_id_3) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertTrue( + 'ERROR: Backup {0} is orphan'.format(backup_id_4) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + + self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_1)['status'], 'Backup STATUS should be "OK"') + self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "OK"') + self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_3)['status'], 'Backup STATUS should be "CORRUPT"') + self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_4)['status'], 'Backup STATUS should be "ORPHAN"') + self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_5)['status'], 'Backup STATUS should be "ORPHAN"') + self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_6)['status'], 'Backup STATUS should be "ORPHAN"') + self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_7)['status'], 'Backup STATUS should be "ORPHAN"') + self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_8)['status'], 'Backup STATUS should be "OK"') + + # Clean after yourself + # self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_specific_target_corrupted_intermediate_backups(self): + """make archive node, take FULL1, PAGE1, PAGE2, PAGE3, PAGE4, PAGE5, FULL2 backups, + corrupt file in PAGE1 and PAGE4, run validate on PAGE3 to specific xid, + expect PAGE1 to gain status CORRUPT, PAGE2, PAGE3, PAGE4 and PAGE5 to gain status ORPHAN""" + fname = self.id().split('.')[3] + node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), + initdb_params=['--data-checksums'], + pg_options={'wal_level': 'replica'} + ) + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.start() + + # FULL1 + backup_id_1 = self.backup_node(backup_dir, 'node', node) + + # PAGE1 + node.safe_psql( + "postgres", + "create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") + backup_id_2 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + # PAGE2 + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") + file_page_2 = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + backup_id_3 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + # PAGE3 + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(10000,20000) i") + backup_id_4 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + # PAGE4 + target_xid = node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(20000,30000) i RETURNING (xmin)")[0][0] + backup_id_5 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + # PAGE5 + node.safe_psql( + "postgres", + "create table t_heap1 as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") + file_page_5 = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap1')").rstrip() + backup_id_6 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + # PAGE6 + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(30000,40000) i") + backup_id_7 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + # FULL2 + backup_id_8 = self.backup_node(backup_dir, 'node', node) + + # Corrupt some file in PAGE2 and PAGE5 backups + file_page1 = os.path.join(backup_dir, 'backups/node', backup_id_3, 'database', file_page_2) + with open(file_page1, "rb+", 0) as f: + f.seek(84) + f.write(b"blah") + f.flush() + f.close + + file_page4 = os.path.join(backup_dir, 'backups/node', backup_id_6, 'database', file_page_5) + with open(file_page4, "rb+", 0) as f: + f.seek(42) + f.write(b"blah") + f.flush() + f.close + + # Validate PAGE3 + try: + self.validate_pb(backup_dir, 'node', + options=['--log-level-file=verbose', '-i', backup_id_4, '--xid={0}'.format(target_xid)]) + self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'INFO: Validating parents for backup {0}'.format(backup_id_4) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Validating backup {0}'.format(backup_id_1) in e.message + and 'INFO: Backup {0} data files are valid'.format(backup_id_1) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Validating backup {0}'.format(backup_id_2) in e.message + and 'INFO: Backup {0} data files are valid'.format(backup_id_2) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Validating backup {0}'.format(backup_id_3) in e.message + and 'WARNING: Invalid CRC of backup file "{0}"'.format(file_page1) in e.message + and 'WARNING: Backup {0} data files are corrupted'.format(backup_id_3) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertTrue( + 'WARNING: Backup {0} is orphaned because his parent {1} is corrupted'.format(backup_id_4, backup_id_3) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertTrue( + 'WARNING: Backup {0} is orphaned because his parent {1} is corrupted'.format(backup_id_5, backup_id_3) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertTrue( + 'WARNING: Backup {0} is orphaned because his parent {1} is corrupted'.format(backup_id_6, backup_id_3) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertTrue( + 'WARNING: Backup {0} is orphaned because his parent {1} is corrupted'.format(backup_id_7, backup_id_3) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertTrue( + 'ERROR: Backup {0} is orphan'.format(backup_id_4) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + + self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_1)['status'], 'Backup STATUS should be "OK"') + self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "OK"') + self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_3)['status'], 'Backup STATUS should be "CORRUPT"') + self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_4)['status'], 'Backup STATUS should be "ORPHAN"') + self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_5)['status'], 'Backup STATUS should be "ORPHAN"') + self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_6)['status'], 'Backup STATUS should be "ORPHAN"') + self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_7)['status'], 'Backup STATUS should be "ORPHAN"') + self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_8)['status'], 'Backup STATUS should be "OK"') + + # Clean after yourself + # self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_instance_with_corrupted_page(self): + """make archive node, take FULL, PAGE1, PAGE2, FULL2, PAGE3 backups, + corrupt file in PAGE1 backup and run validate on instance, + expect PAGE1 to gain status CORRUPT, PAGE2 to gain status ORPHAN""" + fname = self.id().split('.')[3] + node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), + initdb_params=['--data-checksums'], + pg_options={'wal_level': 'replica'} + ) + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.start() + + node.safe_psql( + "postgres", + "create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") + # FULL1 + backup_id_1 = self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + "postgres", + "create table t_heap1 as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") + file_path_t_heap1 = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap1')").rstrip() + # PAGE1 + backup_id_2 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(20000,30000) i") + # PAGE2 + backup_id_3 = self.backup_node(backup_dir, 'node', node, backup_type='page') + # FULL1 + backup_id_4 = self.backup_node(backup_dir, 'node', node) + # PAGE3 + backup_id_5 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + # Corrupt some file in FULL backup + file_full = os.path.join(backup_dir, 'backups/node', backup_id_2, 'database', file_path_t_heap1) + with open(file_full, "rb+", 0) as f: + f.seek(84) + f.write(b"blah") + f.flush() + f.close + + # Validate Instance + try: + self.validate_pb(backup_dir, 'node', options=['--log-level-file=verbose']) + self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + "INFO: Validate backups of the instance 'node'" in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Validating backup {0}'.format(backup_id_5) in e.message + and 'INFO: Backup {0} data files are valid'.format(backup_id_5) in e.message + and 'INFO: Backup {0} WAL segments are valid'.format(backup_id_5) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Validating backup {0}'.format(backup_id_4) in e.message + and 'INFO: Backup {0} data files are valid'.format(backup_id_4) in e.message + and 'INFO: Backup {0} WAL segments are valid'.format(backup_id_4) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Validating backup {0}'.format(backup_id_3) in e.message + and 'INFO: Backup {0} data files are valid'.format(backup_id_3) in e.message + and 'INFO: Backup {0} WAL segments are valid'.format(backup_id_3) in e.message + and 'WARNING: Backup {0} is orphaned because his parent {1} is corrupted'.format(backup_id_3, backup_id_2) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Validating backup {0}'.format(backup_id_2) in e.message + and 'WARNING: Invalid CRC of backup file "{0}"'.format(file_full) in e.message + and 'WARNING: Backup {0} data files are corrupted'.format(backup_id_2) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Validating backup {0}'.format(backup_id_1) in e.message + and 'INFO: Backup {0} data files are valid'.format(backup_id_1) in e.message + and 'INFO: Backup {0} WAL segments are valid'.format(backup_id_1) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertTrue( + 'INFO: Some backups are not valid' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + + self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_1)['status'], 'Backup STATUS should be "OK"') + self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "CORRUPT"') + self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_3)['status'], 'Backup STATUS should be "ORPHAN"') + self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_4)['status'], 'Backup STATUS should be "OK"') + self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_5)['status'], 'Backup STATUS should be "OK"') + + # Clean after yourself + # self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_instance_with_corrupted_full(self): + """make archive node, take FULL, PAGE1, PAGE2, FULL2, PAGE3 backups, + corrupt file in FULL backup and run validate on instance, + expect FULL to gain status CORRUPT, PAGE1 and PAGE2 to gain status ORPHAN""" + fname = self.id().split('.')[3] + node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), + initdb_params=['--data-checksums'], + pg_options={'wal_level': 'replica'} + ) + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.start() + + node.safe_psql( + "postgres", + "create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") + file_path_t_heap = node.safe_psql( + "postgres", + "select pg_relation_filepath('t_heap')").rstrip() + # FULL1 + backup_id_1 = self.backup_node(backup_dir, 'node', node) + + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i") + # PAGE1 + backup_id_2 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + # PAGE2 + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(20000,30000) i") + backup_id_3 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + # FULL1 + backup_id_4 = self.backup_node(backup_dir, 'node', node) + + # PAGE3 + node.safe_psql( + "postgres", + "insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(30000,40000) i") + backup_id_5 = self.backup_node(backup_dir, 'node', node, backup_type='page') + + # Corrupt some file in FULL backup + file_full = os.path.join(backup_dir, 'backups/node', backup_id_1, 'database', file_path_t_heap) + with open(file_full, "rb+", 0) as f: + f.seek(84) + f.write(b"blah") + f.flush() + f.close + + # Validate Instance + try: + self.validate_pb(backup_dir, 'node', options=['--log-level-file=verbose']) + self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format( + repr(self.output), self.cmd)) + except ProbackupException as e: + self.assertTrue( + 'INFO: Validating backup {0}'.format(backup_id_1) in e.message + and "INFO: Validate backups of the instance 'node'" in e.message + and 'WARNING: Invalid CRC of backup file "{0}"'.format(file_full) in e.message + and 'WARNING: Backup {0} data files are corrupted'.format(backup_id_1) in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + + self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_1)['status'], 'Backup STATUS should be "CORRUPT"') + self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "ORPHAN"') + self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_3)['status'], 'Backup STATUS should be "ORPHAN"') + self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_4)['status'], 'Backup STATUS should be "OK"') + self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_5)['status'], 'Backup STATUS should be "OK"') + + # Clean after yourself + # self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + def test_validate_corrupt_wal_1(self): + """make archive node, take FULL1, PAGE1,PAGE2,FULL2,PAGE3,PAGE4 backups, corrupt all wal files, run validate, expect errors""" + fname = self.id().split('.')[3] + node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname), + initdb_params=['--data-checksums'], + pg_options={'wal_level': 'replica'} + ) + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node', node) + self.set_archiving(backup_dir, 'node', node) + node.start() + + backup_id_1 = self.backup_node(backup_dir, 'node', node) with node.connect("postgres") as con: con.execute("CREATE TABLE tbl0005 (a text)") con.commit() - backup_id = self.backup_node(backup_dir, 'node', node) + backup_id_2 = self.backup_node(backup_dir, 'node', node) # Corrupt WAL wals_dir = os.path.join(backup_dir, 'wal', 'node') @@ -143,7 +730,8 @@ class ValidateTest(ProbackupTest, unittest.TestCase): self.assertTrue('Possible WAL CORRUPTION' in e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) - self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id)['status'], 'Backup STATUS should be "CORRUPT"') + self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_1)['status'], 'Backup STATUS should be "CORRUPT"') + self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "CORRUPT"') # Clean after yourself self.del_test_dir(module_name, fname) @@ -190,8 +778,8 @@ class ValidateTest(ProbackupTest, unittest.TestCase): self.assertEqual(1, 0, "Expecting Error because of wal segments corruption.\n Output: {0} \n CMD: {1}".format( repr(self.output), self.cmd)) except ProbackupException as e: - self.assertTrue('Possible WAL CORRUPTION' in e.message), - '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd) + self.assertTrue('Possible WAL CORRUPTION' in e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id)['status'], 'Backup STATUS should be "CORRUPT"') @@ -236,8 +824,14 @@ class ValidateTest(ProbackupTest, unittest.TestCase): self.assertEqual(1, 0, "Expecting Error because of wal segment disappearance.\n Output: {0} \n CMD: {1}".format( repr(self.output), self.cmd)) except ProbackupException as e: - self.assertIn('WARNING: WAL segment "{0}" is absent\nERROR: there are not enough WAL records to restore'.format( - file), e.message, '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertIn('WARNING: WAL segment "{0}" is absent'.format(file), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertIn('WARNING: There are not enough WAL records to consistenly restore backup {0}'.format(backup_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertIn('WARNING: Backup {0} WAL segments are corrupted'.format(backup_id), e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) + self.assertIn('INFO: Some backups are not valid', e.message, + '\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd)) self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id)['status'], 'Backup {0} should have STATUS "CORRUPT"') @@ -293,6 +887,12 @@ class ValidateTest(ProbackupTest, unittest.TestCase): os.remove(file) if self.archive_compress: file = file[:-3] + + # Try to validate + + + + # Try to restore try: backup_id = self.backup_node(backup_dir, 'node', node, backup_type='page') self.assertEqual(1, 0, "Expecting Error because of wal segment disappearance.\n Output: {0} \n CMD: {1}".format( @@ -398,10 +998,76 @@ class ValidateTest(ProbackupTest, unittest.TestCase): archive_command_node2 = node2.safe_psql("postgres", "show archive_command") self.assertEqual(archive_command_node1, archive_command_node2, "Archive command on Master and Node should be equal. This is unexpected") - result = node2.safe_psql("postgres", "select last_failed_wal from pg_stat_get_archiver() where last_failed_wal is not NULL") - # self.assertEqual(res, six.b(""), 'Restored Node1 failed to archive segment {0} due to having the same archive command as Master'.format(res.rstrip())) - if result == "": - self.assertEqual(1, 0, 'Error is expected due to Master and Node1 having the common archive and archive_command') + #result = node2.safe_psql("postgres", "select last_failed_wal from pg_stat_get_archiver() where last_failed_wal is not NULL") + ## self.assertEqual(res, six.b(""), 'Restored Node1 failed to archive segment {0} due to having the same archive command as Master'.format(res.rstrip())) + #if result == "": + # self.assertEqual(1, 0, 'Error is expected due to Master and Node1 having the common archive and archive_command') + + log_file = os.path.join(node2.logs_dir, 'postgresql.log') + with open(log_file, 'r') as f: + log_content = f.read() + self.assertTrue('LOG: archive command failed with exit code 1' in log_content + and 'DETAIL: The failed archive command was:' in log_content + and 'INFO: pg_probackup archive-push from' in log_content, + 'Expecting error messages about failed archive_command' + ) + self.assertFalse('pg_probackup archive-push completed successfully' in log_content) # Clean after yourself - self.del_test_dir(module_name, fname) + # self.del_test_dir(module_name, fname) + + # @unittest.skip("skip") + # @unittest.expectedFailure + def test_pgpro561(self): + """make node with archiving, make stream backup, restore it to node1, check that archiving is not successful on node1""" + fname = self.id().split('.')[3] + node1 = self.make_simple_node(base_dir="{0}/{1}/node1".format(module_name, fname), + set_replication=True, + initdb_params=['--data-checksums'], + pg_options={'wal_level': 'replica', 'max_wal_senders': '2'} + ) + backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup') + self.init_pb(backup_dir) + self.add_instance(backup_dir, 'node1', node1) + self.set_archiving(backup_dir, 'node1', node1) + node1.start() + + backup_id = self.backup_node(backup_dir, 'node1', node1, options=["--stream"]) + + node2 = self.make_simple_node(base_dir="{0}/{1}/node2".format(module_name, fname)) + node2.cleanup() + + node1.psql( + "postgres", + "create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,256) i") + + self.backup_node(backup_dir, 'node1', node1, backup_type='page', options=["--stream"]) + self.restore_node(backup_dir, 'node1', data_dir=node2.data_dir) + node2.append_conf('postgresql.auto.conf', 'port = {0}'.format(node2.port)) + node2.start() + + timeline_node1 = node1.get_control_data()["Latest checkpoint's TimeLineID"] + timeline_node2 = node2.get_control_data()["Latest checkpoint's TimeLineID"] + self.assertEqual(timeline_node1, timeline_node2, "Timelines on Master and Node1 should be equal. This is unexpected") + + archive_command_node1 = node1.safe_psql("postgres", "show archive_command") + archive_command_node2 = node2.safe_psql("postgres", "show archive_command") + self.assertEqual(archive_command_node1, archive_command_node2, "Archive command on Master and Node should be equal. This is unexpected") + + #result = node2.safe_psql("postgres", "select last_failed_wal from pg_stat_get_archiver() where last_failed_wal is not NULL") + ## self.assertEqual(res, six.b(""), 'Restored Node1 failed to archive segment {0} due to having the same archive command as Master'.format(res.rstrip())) + #if result == "": + # self.assertEqual(1, 0, 'Error is expected due to Master and Node1 having the common archive and archive_command') + + log_file = os.path.join(node2.logs_dir, 'postgresql.log') + with open(log_file, 'r') as f: + log_content = f.read() + self.assertTrue('LOG: archive command failed with exit code 1' in log_content + and 'DETAIL: The failed archive command was:' in log_content + and 'INFO: pg_probackup archive-push from' in log_content, + 'Expecting error messages about failed archive_command' + ) + self.assertFalse('pg_probackup archive-push completed successfully' in log_content) + + # Clean after yourself + # self.del_test_dir(module_name, fname)