mirror of
https://github.com/postgrespro/pg_probackup.git
synced 2025-02-03 14:01:57 +02:00
PGPRO-2425: previously successfull restore was relying on that there are no failed backups between base_full_backup and dest_backup, it could lead to false-positive validation or restore errors despite the fact that parent chain is valid. There even was a small chance of data corruption: if between base_full_backup and dest_backup were located backups from parallel chain. TLDR: restore was rolling backups blindly. See test test_restore_chain_with_corrupted_backup() for an example.
This commit is contained in:
parent
2028275c10
commit
628cdce778
@ -981,6 +981,9 @@ is_parent(time_t parent_backup_time, pgBackup *child_backup, bool inclusive)
|
||||
if (!child_backup)
|
||||
elog(ERROR, "Target backup cannot be NULL");
|
||||
|
||||
if (inclusive && child_backup->start_time == parent_backup_time)
|
||||
return true;
|
||||
|
||||
while (child_backup->parent_backup_link &&
|
||||
child_backup->parent_backup != parent_backup_time)
|
||||
{
|
||||
@ -990,8 +993,8 @@ is_parent(time_t parent_backup_time, pgBackup *child_backup, bool inclusive)
|
||||
if (child_backup->parent_backup == parent_backup_time)
|
||||
return true;
|
||||
|
||||
if (inclusive && child_backup->start_time == parent_backup_time)
|
||||
return true;
|
||||
//if (inclusive && child_backup->start_time == parent_backup_time)
|
||||
// return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@ -1047,11 +1047,11 @@ create_data_directories(const char *data_dir, const char *backup_dir,
|
||||
}
|
||||
|
||||
if (link_sep)
|
||||
elog(LOG, "create directory \"%s\" and symbolic link \"%.*s\"",
|
||||
elog(VERBOSE, "create directory \"%s\" and symbolic link \"%.*s\"",
|
||||
linked_path,
|
||||
(int) (link_sep - relative_ptr), relative_ptr);
|
||||
else
|
||||
elog(LOG, "create directory \"%s\" and symbolic link \"%s\"",
|
||||
elog(VERBOSE, "create directory \"%s\" and symbolic link \"%s\"",
|
||||
linked_path, relative_ptr);
|
||||
|
||||
/* Firstly, create linked directory */
|
||||
@ -1082,7 +1082,7 @@ create_data_directories(const char *data_dir, const char *backup_dir,
|
||||
}
|
||||
|
||||
create_directory:
|
||||
elog(LOG, "create directory \"%s\"", relative_ptr);
|
||||
elog(VERBOSE, "create directory \"%s\"", relative_ptr);
|
||||
|
||||
/* This is not symlink, create directory */
|
||||
join_path_components(to_path, data_dir, relative_ptr);
|
||||
|
@ -57,6 +57,7 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
|
||||
int base_full_backup_index = 0;
|
||||
int corrupted_backup_index = 0;
|
||||
char *action = is_restore ? "Restore":"Validate";
|
||||
parray *parent_chain = NULL;
|
||||
|
||||
if (is_restore)
|
||||
{
|
||||
@ -285,6 +286,27 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
|
||||
if (is_restore)
|
||||
check_tablespace_mapping(dest_backup);
|
||||
|
||||
/* At this point we are sure that parent chain is whole
|
||||
* so we can build separate array, containing all needed backups,
|
||||
* to simplify validation and restore
|
||||
*/
|
||||
parent_chain = parray_new();
|
||||
|
||||
/* Take every backup that is a child of base_backup AND parent of dest_backup
|
||||
* including base_backup and dest_backup
|
||||
*/
|
||||
for (i = base_full_backup_index; i >= dest_backup_index; i--)
|
||||
{
|
||||
tmp_backup = (pgBackup *) parray_get(backups, i);
|
||||
|
||||
if (is_parent(base_full_backup->start_time, tmp_backup, true) &&
|
||||
is_parent(tmp_backup->start_time, dest_backup, true))
|
||||
{
|
||||
parray_append(parent_chain, tmp_backup);
|
||||
}
|
||||
}
|
||||
|
||||
/* for validation or restore with enabled validation */
|
||||
if (!is_restore || !rt->restore_no_validate)
|
||||
{
|
||||
if (dest_backup->backup_mode != BACKUP_MODE_FULL)
|
||||
@ -292,27 +314,25 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
|
||||
|
||||
/*
|
||||
* Validate backups from base_full_backup to dest_backup.
|
||||
* At this point we are sure that parent chain is intact.
|
||||
*/
|
||||
for (i = base_full_backup_index; i >= dest_backup_index; i--)
|
||||
for (i = 0; i < parray_num(parent_chain); i++)
|
||||
{
|
||||
tmp_backup = (pgBackup *) parray_get(backups, i);
|
||||
tmp_backup = (pgBackup *) parray_get(parent_chain, i);
|
||||
|
||||
if (is_parent(base_full_backup->start_time, tmp_backup, true))
|
||||
pgBackupValidate(tmp_backup);
|
||||
/* Maybe we should be more paranoid and check for !BACKUP_STATUS_OK? */
|
||||
if (tmp_backup->status == BACKUP_STATUS_CORRUPT)
|
||||
{
|
||||
|
||||
pgBackupValidate(tmp_backup);
|
||||
/* Maybe we should be more paranoid and check for !BACKUP_STATUS_OK? */
|
||||
if (tmp_backup->status == BACKUP_STATUS_CORRUPT)
|
||||
{
|
||||
corrupted_backup = tmp_backup;
|
||||
corrupted_backup_index = i;
|
||||
break;
|
||||
}
|
||||
/* We do not validate WAL files of intermediate backups
|
||||
* It`s done to speed up restore
|
||||
corrupted_backup = tmp_backup;
|
||||
/* we need corrupted backup index from 'backups' not parent_chain
|
||||
* so we can properly orphanize all its descendants
|
||||
*/
|
||||
corrupted_backup_index = get_backup_index_number(backups, corrupted_backup);
|
||||
break;
|
||||
}
|
||||
/* We do not validate WAL files of intermediate backups
|
||||
* It`s done to speed up restore
|
||||
*/
|
||||
}
|
||||
|
||||
/* There is no point in wal validation of corrupted backups */
|
||||
@ -355,7 +375,6 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: rewrite restore to use parent_chain
|
||||
/*
|
||||
* If dest backup is corrupted or was orphaned in previous check
|
||||
* produce corresponding error message
|
||||
@ -376,13 +395,12 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
|
||||
base36enc(dest_backup->start_time), status2str(dest_backup->status));
|
||||
|
||||
/* We ensured that all backups are valid, now restore if required
|
||||
* TODO: use parent_link
|
||||
*/
|
||||
if (is_restore)
|
||||
{
|
||||
for (i = base_full_backup_index; i >= dest_backup_index; i--)
|
||||
for (i = 0; i < parray_num(parent_chain); i++)
|
||||
{
|
||||
pgBackup *backup = (pgBackup *) parray_get(backups, i);
|
||||
pgBackup *backup = (pgBackup *) parray_get(parent_chain, i);
|
||||
|
||||
if (rt->lsn_specified && parse_server_version(backup->server_version) < 100000)
|
||||
elog(ERROR, "Backup %s was created for version %s which doesn't support recovery_target_lsn",
|
||||
@ -405,6 +423,7 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt,
|
||||
/* cleanup */
|
||||
parray_walk(backups, pgBackupFree);
|
||||
parray_free(backups);
|
||||
parray_free(parent_chain);
|
||||
|
||||
elog(INFO, "%s of backup %s completed.",
|
||||
action, base36enc(dest_backup->start_time));
|
||||
@ -480,6 +499,7 @@ restore_backup(pgBackup *backup)
|
||||
/* By default there are some error */
|
||||
threads_args[i].ret = 1;
|
||||
|
||||
/* Useless message TODO: rewrite */
|
||||
elog(LOG, "Start thread for num:%zu", parray_num(files));
|
||||
|
||||
pthread_create(&threads[i], NULL, restore_files, arg);
|
||||
|
Loading…
x
Reference in New Issue
Block a user