1
0
mirror of https://github.com/postgrespro/pg_probackup.git synced 2025-03-17 21:18:00 +02:00

Merge branch 'pgpro-1286-no-validation-restore'

This commit is contained in:
Grigory Smolkin 2018-07-18 08:44:13 +03:00
commit 0076462f65
5 changed files with 220 additions and 55 deletions

View File

@ -268,7 +268,7 @@ help_restore(void)
printf(_(" [--timeline=timeline] [-T OLDDIR=NEWDIR]\n"));
printf(_(" [--immediate] [--recovery-target-name=target-name]\n"));
printf(_(" [--recovery-target-action=pause|promote|shutdown]\n"));
printf(_(" [--restore-as-replica]\n\n"));
printf(_(" [--restore-as-replica] [--no-validate]\n\n"));
printf(_(" -B, --backup-path=backup-path location of the backup storage area\n"));
printf(_(" --instance=instance_name name of the instance\n"));
@ -285,6 +285,7 @@ help_restore(void)
printf(_(" relocate the tablespace from directory OLDDIR to NEWDIR\n"));
printf(_(" --immediate end recovery as soon as a consistent state is reached\n"));
printf(_(" --no-validate disable backup validation during recovery\n"));
printf(_(" --recovery-target-name=target-name\n"));
printf(_(" the named restore point to which recovery will proceed\n"));
printf(_(" --recovery-target-action=pause|promote|shutdown\n"));

View File

@ -70,6 +70,7 @@ static char *target_action = NULL;
static pgRecoveryTarget *recovery_target_options = NULL;
bool restore_as_replica = false;
bool restore_no_validate = false;
/* delete options */
bool delete_wal = false;
@ -148,6 +149,7 @@ static pgut_option options[] =
{ 's', 25, "recovery-target-name", &target_name, SOURCE_CMDLINE },
{ 's', 26, "recovery-target-action", &target_action, SOURCE_CMDLINE },
{ 'b', 'R', "restore-as-replica", &restore_as_replica, SOURCE_CMDLINE },
{ 'b', 27, "no-validate", &restore_no_validate, SOURCE_CMDLINE },
/* delete options */
{ 'b', 130, "wal", &delete_wal, SOURCE_CMDLINE },
{ 'b', 131, "expired", &delete_expired, SOURCE_CMDLINE },
@ -435,7 +437,7 @@ main(int argc, char *argv[])
/* parse all recovery target options into recovery_target_options structure */
recovery_target_options = parseRecoveryTargetOptions(target_time, target_xid,
target_inclusive, target_tli, target_immediate,
target_name, target_action);
target_name, target_action, restore_no_validate);
}
if (num_threads < 1)

View File

@ -264,6 +264,7 @@ typedef struct pgRecoveryTarget
bool recovery_target_immediate;
const char *recovery_target_name;
const char *recovery_target_action;
bool restore_no_validate;
} pgRecoveryTarget;
/* Union to ease operations on relation pages */
@ -397,7 +398,7 @@ extern parray * readTimeLineHistory_probackup(TimeLineID targetTLI);
extern pgRecoveryTarget *parseRecoveryTargetOptions(
const char *target_time, const char *target_xid,
const char *target_inclusive, TimeLineID target_tli, bool target_immediate,
const char *target_name, const char *target_action);
const char *target_name, const char *target_action, bool restore_no_validate);
extern void opt_tablespace_map(pgut_option *opt, const char *arg);

View File

@ -243,66 +243,69 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
if (is_restore)
check_tablespace_mapping(dest_backup);
if (dest_backup->backup_mode != BACKUP_MODE_FULL)
elog(INFO, "Validating parents for backup %s", base36enc(dest_backup->start_time));
/*
* Validate backups from base_full_backup to dest_backup.
*/
for (i = base_full_backup_index; i >= dest_backup_index; i--)
if (!is_restore || !rt->restore_no_validate)
{
pgBackup *backup = (pgBackup *) parray_get(backups, i);
pgBackupValidate(backup);
/* Maybe we should be more paranoid and check for !BACKUP_STATUS_OK? */
if (backup->status == BACKUP_STATUS_CORRUPT)
{
corrupted_backup = backup;
corrupted_backup_index = i;
break;
}
/* We do not validate WAL files of intermediate backups
* It`s done to speed up restore
*/
}
/* There is no point in wal validation
* if there is corrupted backup between base_backup and dest_backup
*/
if (!corrupted_backup)
if (dest_backup->backup_mode != BACKUP_MODE_FULL)
elog(INFO, "Validating parents for backup %s", base36enc(dest_backup->start_time));
/*
* Validate corresponding WAL files.
* We pass base_full_backup timeline as last argument to this function,
* because it's needed to form the name of xlog file.
* Validate backups from base_full_backup to dest_backup.
*/
validate_wal(dest_backup, arclog_path, rt->recovery_target_time,
rt->recovery_target_xid, base_full_backup->tli);
/* Set every incremental backup between corrupted backup and nearest FULL backup as orphans */
if (corrupted_backup)
{
for (i = corrupted_backup_index - 1; i >= 0; i--)
for (i = base_full_backup_index; i >= dest_backup_index; i--)
{
pgBackup *backup = (pgBackup *) parray_get(backups, i);
/* Mark incremental OK backup as orphan */
if (backup->backup_mode == BACKUP_MODE_FULL)
break;
if (backup->status != BACKUP_STATUS_OK)
continue;
else
pgBackupValidate(backup);
/* Maybe we should be more paranoid and check for !BACKUP_STATUS_OK? */
if (backup->status == BACKUP_STATUS_CORRUPT)
{
char *backup_id,
*corrupted_backup_id;
corrupted_backup = backup;
corrupted_backup_index = i;
break;
}
/* We do not validate WAL files of intermediate backups
* It`s done to speed up restore
*/
}
/* There is no point in wal validation
* if there is corrupted backup between base_backup and dest_backup
*/
if (!corrupted_backup)
/*
* Validate corresponding WAL files.
* We pass base_full_backup timeline as last argument to this function,
* because it's needed to form the name of xlog file.
*/
validate_wal(dest_backup, arclog_path, rt->recovery_target_time,
rt->recovery_target_xid, base_full_backup->tli);
backup->status = BACKUP_STATUS_ORPHAN;
pgBackupWriteBackupControlFile(backup);
/* Set every incremental backup between corrupted backup and nearest FULL backup as orphans */
if (corrupted_backup)
{
for (i = corrupted_backup_index - 1; i >= 0; i--)
{
pgBackup *backup = (pgBackup *) parray_get(backups, i);
/* Mark incremental OK backup as orphan */
if (backup->backup_mode == BACKUP_MODE_FULL)
break;
if (backup->status != BACKUP_STATUS_OK)
continue;
else
{
char *backup_id,
*corrupted_backup_id;
backup_id = base36enc_dup(backup->start_time);
corrupted_backup_id = base36enc_dup(corrupted_backup->start_time);
backup->status = BACKUP_STATUS_ORPHAN;
pgBackupWriteBackupControlFile(backup);
elog(WARNING, "Backup %s is orphaned because his parent %s is corrupted",
backup_id, corrupted_backup_id);
backup_id = base36enc_dup(backup->start_time);
corrupted_backup_id = base36enc_dup(corrupted_backup->start_time);
free(backup_id);
free(corrupted_backup_id);
elog(WARNING, "Backup %s is orphaned because his parent %s is corrupted",
backup_id, corrupted_backup_id);
free(backup_id);
free(corrupted_backup_id);
}
}
}
}
@ -312,7 +315,12 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
* produce corresponding error message
*/
if (dest_backup->status == BACKUP_STATUS_OK)
elog(INFO, "Backup %s is valid.", base36enc(dest_backup->start_time));
{
if (rt->restore_no_validate)
elog(INFO, "Backup %s is used without validation.", base36enc(dest_backup->start_time));
else
elog(INFO, "Backup %s is valid.", base36enc(dest_backup->start_time));
}
else if (dest_backup->status == BACKUP_STATUS_CORRUPT)
elog(ERROR, "Backup %s is corrupt.", base36enc(dest_backup->start_time));
else if (dest_backup->status == BACKUP_STATUS_ORPHAN)
@ -1003,7 +1011,8 @@ parseRecoveryTargetOptions(const char *target_time,
TimeLineID target_tli,
bool target_immediate,
const char *target_name,
const char *target_action)
const char *target_action,
bool restore_no_validate)
{
time_t dummy_time;
TransactionId dummy_xid;
@ -1028,6 +1037,7 @@ parseRecoveryTargetOptions(const char *target_time,
rt->recovery_target_immediate = false;
rt->recovery_target_name = NULL;
rt->recovery_target_action = NULL;
rt->restore_no_validate = false;
/* parse given options */
if (target_time)
@ -1074,6 +1084,11 @@ parseRecoveryTargetOptions(const char *target_time,
rt->recovery_target_immediate = target_immediate;
}
if (restore_no_validate)
{
rt->restore_no_validate = restore_no_validate;
}
if (target_name)
{
recovery_target_specified++;

View File

@ -748,6 +748,93 @@ class ValidateTest(ProbackupTest, unittest.TestCase):
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_validate_instance_with_corrupted_full_and_try_restore(self):
"""make archive node, take FULL, PAGE1, PAGE2, FULL2, PAGE3 backups,
corrupt file in FULL backup and run validate on instance,
expect FULL to gain status CORRUPT, PAGE1 and PAGE2 to gain status ORPHAN,
try to restore backup with --no-validation option"""
fname = self.id().split('.')[3]
node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname),
initdb_params=['--data-checksums'],
pg_options={'wal_level': 'replica'}
)
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.start()
node.safe_psql(
"postgres",
"create table t_heap as select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i")
file_path_t_heap = node.safe_psql(
"postgres",
"select pg_relation_filepath('t_heap')").rstrip()
# FULL1
backup_id_1 = self.backup_node(backup_dir, 'node', node)
node.safe_psql(
"postgres",
"insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(0,10000) i")
# PAGE1
backup_id_2 = self.backup_node(backup_dir, 'node', node, backup_type='page')
# PAGE2
node.safe_psql(
"postgres",
"insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(20000,30000) i")
backup_id_3 = self.backup_node(backup_dir, 'node', node, backup_type='page')
# FULL1
backup_id_4 = self.backup_node(backup_dir, 'node', node)
# PAGE3
node.safe_psql(
"postgres",
"insert into t_heap select i as id, md5(i::text) as text, md5(repeat(i::text,10))::tsvector as tsvector from generate_series(30000,40000) i")
backup_id_5 = self.backup_node(backup_dir, 'node', node, backup_type='page')
# Corrupt some file in FULL backup
file_full = os.path.join(backup_dir, 'backups/node', backup_id_1, 'database', file_path_t_heap)
with open(file_full, "rb+", 0) as f:
f.seek(84)
f.write(b"blah")
f.flush()
f.close
# Validate Instance
try:
self.validate_pb(backup_dir, 'node', options=['--log-level-file=verbose'])
self.assertEqual(1, 0, "Expecting Error because of data files corruption.\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertTrue(
'INFO: Validating backup {0}'.format(backup_id_1) in e.message
and "INFO: Validate backups of the instance 'node'" in e.message
and 'WARNING: Invalid CRC of backup file "{0}"'.format(file_full) in e.message
and 'WARNING: Backup {0} data files are corrupted'.format(backup_id_1) in e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(e.message), self.cmd))
self.assertEqual('CORRUPT', self.show_pb(backup_dir, 'node', backup_id_1)['status'], 'Backup STATUS should be "CORRUPT"')
self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_2)['status'], 'Backup STATUS should be "ORPHAN"')
self.assertEqual('ORPHAN', self.show_pb(backup_dir, 'node', backup_id_3)['status'], 'Backup STATUS should be "ORPHAN"')
self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_4)['status'], 'Backup STATUS should be "OK"')
self.assertEqual('OK', self.show_pb(backup_dir, 'node', backup_id_5)['status'], 'Backup STATUS should be "OK"')
node.cleanup()
restore_out = self.restore_node(
backup_dir, 'node', node,
options=["--no-validate"])
self.assertIn(
"INFO: Restore of backup {0} completed.".format(backup_id_5),
restore_out,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(self.output), self.cmd))
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_validate_instance_with_corrupted_full(self):
"""make archive node, take FULL, PAGE1, PAGE2, FULL2, PAGE3 backups,
@ -1582,3 +1669,62 @@ class ValidateTest(ProbackupTest, unittest.TestCase):
# Clean after yourself
self.del_test_dir(module_name, fname)
def test_file_size_corruption_no_validate(self):
fname = self.id().split('.')[3]
node = self.make_simple_node(
base_dir="{0}/{1}/node".format(module_name, fname),
# initdb_params=['--data-checksums'],
pg_options={'wal_level': 'replica'}
)
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.start()
node.safe_psql(
"postgres",
"create table t_heap as select 1 as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,1000) i")
node.safe_psql(
"postgres",
"CHECKPOINT;")
heap_path = node.safe_psql(
"postgres",
"select pg_relation_filepath('t_heap')").rstrip()
heap_size = node.safe_psql(
"postgres",
"select pg_relation_size('t_heap')")
backup_id = self.backup_node(
backup_dir, 'node', node, backup_type="full",
options=["-j", "4"], async=False, gdb=False)
node.stop()
node.cleanup()
# Let`s do file corruption
with open(os.path.join(backup_dir, "backups", 'node', backup_id, "database", heap_path), "rb+", 0) as f:
f.truncate(int(heap_size) - 4096)
f.flush()
f.close
node.cleanup()
try:
self.restore_node(
backup_dir, 'node', node,
options=["--no-validate"])
except ProbackupException as e:
self.assertTrue("ERROR: Data files restoring failed" in e.message, repr(e.message))
print "\nExpected error: \n" + e.message
# Clean after yourself
self.del_test_dir(module_name, fname)