1
0
mirror of https://github.com/postgrespro/pg_probackup.git synced 2025-02-03 14:01:57 +02:00

PGPRO-2425: previously successfull restore was relying on that there are no failed backups between base_full_backup and dest_backup, it could lead to false-positive validation or restore errors despite the fact that parent chain is valid. There even was a small chance of data corruption: if between base_full_backup and dest_backup were located backups from parallel chain. TLDR: restore was rolling backups blindly. See test test_restore_chain_with_corrupted_backup() for an example.

This commit is contained in:
Grigory Smolkin 2019-02-09 02:15:06 +03:00
parent 2028275c10
commit 628cdce778
3 changed files with 47 additions and 24 deletions

View File

@ -981,6 +981,9 @@ is_parent(time_t parent_backup_time, pgBackup *child_backup, bool inclusive)
if (!child_backup)
elog(ERROR, "Target backup cannot be NULL");
if (inclusive && child_backup->start_time == parent_backup_time)
return true;
while (child_backup->parent_backup_link &&
child_backup->parent_backup != parent_backup_time)
{
@ -990,8 +993,8 @@ is_parent(time_t parent_backup_time, pgBackup *child_backup, bool inclusive)
if (child_backup->parent_backup == parent_backup_time)
return true;
if (inclusive && child_backup->start_time == parent_backup_time)
return true;
//if (inclusive && child_backup->start_time == parent_backup_time)
// return true;
return false;
}

View File

@ -1047,11 +1047,11 @@ create_data_directories(const char *data_dir, const char *backup_dir,
}
if (link_sep)
elog(LOG, "create directory \"%s\" and symbolic link \"%.*s\"",
elog(VERBOSE, "create directory \"%s\" and symbolic link \"%.*s\"",
linked_path,
(int) (link_sep - relative_ptr), relative_ptr);
else
elog(LOG, "create directory \"%s\" and symbolic link \"%s\"",
elog(VERBOSE, "create directory \"%s\" and symbolic link \"%s\"",
linked_path, relative_ptr);
/* Firstly, create linked directory */
@ -1082,7 +1082,7 @@ create_data_directories(const char *data_dir, const char *backup_dir,
}
create_directory:
elog(LOG, "create directory \"%s\"", relative_ptr);
elog(VERBOSE, "create directory \"%s\"", relative_ptr);
/* This is not symlink, create directory */
join_path_components(to_path, data_dir, relative_ptr);

View File

@ -57,6 +57,7 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
int base_full_backup_index = 0;
int corrupted_backup_index = 0;
char *action = is_restore ? "Restore":"Validate";
parray *parent_chain = NULL;
if (is_restore)
{
@ -285,6 +286,27 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
if (is_restore)
check_tablespace_mapping(dest_backup);
/* At this point we are sure that parent chain is whole
* so we can build separate array, containing all needed backups,
* to simplify validation and restore
*/
parent_chain = parray_new();
/* Take every backup that is a child of base_backup AND parent of dest_backup
* including base_backup and dest_backup
*/
for (i = base_full_backup_index; i >= dest_backup_index; i--)
{
tmp_backup = (pgBackup *) parray_get(backups, i);
if (is_parent(base_full_backup->start_time, tmp_backup, true) &&
is_parent(tmp_backup->start_time, dest_backup, true))
{
parray_append(parent_chain, tmp_backup);
}
}
/* for validation or restore with enabled validation */
if (!is_restore || !rt->restore_no_validate)
{
if (dest_backup->backup_mode != BACKUP_MODE_FULL)
@ -292,27 +314,25 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
/*
* Validate backups from base_full_backup to dest_backup.
* At this point we are sure that parent chain is intact.
*/
for (i = base_full_backup_index; i >= dest_backup_index; i--)
for (i = 0; i < parray_num(parent_chain); i++)
{
tmp_backup = (pgBackup *) parray_get(backups, i);
tmp_backup = (pgBackup *) parray_get(parent_chain, i);
if (is_parent(base_full_backup->start_time, tmp_backup, true))
pgBackupValidate(tmp_backup);
/* Maybe we should be more paranoid and check for !BACKUP_STATUS_OK? */
if (tmp_backup->status == BACKUP_STATUS_CORRUPT)
{
pgBackupValidate(tmp_backup);
/* Maybe we should be more paranoid and check for !BACKUP_STATUS_OK? */
if (tmp_backup->status == BACKUP_STATUS_CORRUPT)
{
corrupted_backup = tmp_backup;
corrupted_backup_index = i;
break;
}
/* We do not validate WAL files of intermediate backups
* It`s done to speed up restore
corrupted_backup = tmp_backup;
/* we need corrupted backup index from 'backups' not parent_chain
* so we can properly orphanize all its descendants
*/
corrupted_backup_index = get_backup_index_number(backups, corrupted_backup);
break;
}
/* We do not validate WAL files of intermediate backups
* It`s done to speed up restore
*/
}
/* There is no point in wal validation of corrupted backups */
@ -355,7 +375,6 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
}
}
// TODO: rewrite restore to use parent_chain
/*
* If dest backup is corrupted or was orphaned in previous check
* produce corresponding error message
@ -376,13 +395,12 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
base36enc(dest_backup->start_time), status2str(dest_backup->status));
/* We ensured that all backups are valid, now restore if required
* TODO: use parent_link
*/
if (is_restore)
{
for (i = base_full_backup_index; i >= dest_backup_index; i--)
for (i = 0; i < parray_num(parent_chain); i++)
{
pgBackup *backup = (pgBackup *) parray_get(backups, i);
pgBackup *backup = (pgBackup *) parray_get(parent_chain, i);
if (rt->lsn_specified && parse_server_version(backup->server_version) < 100000)
elog(ERROR, "Backup %s was created for version %s which doesn't support recovery_target_lsn",
@ -405,6 +423,7 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
/* cleanup */
parray_walk(backups, pgBackupFree);
parray_free(backups);
parray_free(parent_chain);
elog(INFO, "%s of backup %s completed.",
action, base36enc(dest_backup->start_time));
@ -480,6 +499,7 @@ restore_backup(pgBackup *backup)
/* By default there are some error */
threads_args[i].ret = 1;
/* Useless message TODO: rewrite */
elog(LOG, "Start thread for num:%zu", parray_num(files));
pthread_create(&threads[i], NULL, restore_files, arg);