1
0
mirror of https://github.com/postgrespro/pg_probackup.git synced 2025-09-16 09:26:30 +02:00

[PBCKP-218] Incremental restore and missing pg_control (issue #304)

- pg_control file backup after all other files in backup
- pg_control file restore last in full restore
- rename pg_control to pg_control.pbk.bak at start of non-full restore
- remove pg_control.pbk.bak in the end of successfull non-full restore
- use pg_control.pbk.bak after failed non-full restore
- added tests for full and incremental restore

Tags: backup, catchup, restore
This commit is contained in:
Oleg Gurev
2023-12-03 23:59:35 +03:00
parent d26df12019
commit 52e47fe196
9 changed files with 354 additions and 83 deletions

View File

@@ -122,6 +122,8 @@ do_backup_pg(InstanceState *instanceState, PGconn *backup_conn,
char pretty_time[20]; char pretty_time[20];
char pretty_bytes[20]; char pretty_bytes[20];
pgFile *src_pg_control_file = NULL;
elog(INFO, "Database backup start"); elog(INFO, "Database backup start");
if(current.external_dir_str) if(current.external_dir_str)
{ {
@@ -424,6 +426,24 @@ do_backup_pg(InstanceState *instanceState, PGconn *backup_conn,
} }
/*
* find pg_control file
* We'll copy it last
*/
{
int control_file_elem_index;
pgFile search_key;
MemSet(&search_key, 0, sizeof(pgFile));
/* pgFileCompareRelPathWithExternal uses only .rel_path and .external_dir_num for comparision */
search_key.rel_path = XLOG_CONTROL_FILE;
search_key.external_dir_num = 0;
control_file_elem_index = parray_bsearch_index(backup_files_list, &search_key, pgFileCompareRelPathWithExternal);
if (control_file_elem_index < 0)
elog(ERROR, "File \"%s\" not found in PGDATA %s", XLOG_CONTROL_FILE, current.database_dir);
src_pg_control_file = (pgFile *)parray_get(backup_files_list, control_file_elem_index);
}
/* setup thread locks */ /* setup thread locks */
pfilearray_clear_locks(backup_files_list); pfilearray_clear_locks(backup_files_list);
@@ -483,6 +503,26 @@ do_backup_pg(InstanceState *instanceState, PGconn *backup_conn,
backup_isok = false; backup_isok = false;
} }
/* copy pg_control at very end */
if (backup_isok)
{
elog(progress ? INFO : LOG, "Progress: Backup file \"%s\"",
src_pg_control_file->rel_path);
char from_fullpath[MAXPGPATH];
char to_fullpath[MAXPGPATH];
join_path_components(from_fullpath, instance_config.pgdata, src_pg_control_file->rel_path);
join_path_components(to_fullpath, current.database_dir, src_pg_control_file->rel_path);
backup_non_data_file(src_pg_control_file, NULL,
from_fullpath, to_fullpath,
current.backup_mode, current.parent_backup,
true);
}
time(&end_time); time(&end_time);
pretty_time_interval(difftime(end_time, start_time), pretty_time_interval(difftime(end_time, start_time),
pretty_time, lengthof(pretty_time)); pretty_time, lengthof(pretty_time));
@@ -510,17 +550,8 @@ do_backup_pg(InstanceState *instanceState, PGconn *backup_conn,
{ {
pgFile *pg_control = NULL; pgFile *pg_control = NULL;
for (i = 0; i < parray_num(backup_files_list); i++) pg_control = src_pg_control_file;
{
pgFile *tmp_file = (pgFile *) parray_get(backup_files_list, i);
if (tmp_file->external_dir_num == 0 &&
(strcmp(tmp_file->rel_path, XLOG_CONTROL_FILE) == 0))
{
pg_control = tmp_file;
break;
}
}
if (!pg_control) if (!pg_control)
elog(ERROR, "Failed to find file \"%s\" in backup filelist.", elog(ERROR, "Failed to find file \"%s\" in backup filelist.",
@@ -2076,6 +2107,13 @@ backup_files(void *arg)
/* We have already copied all directories */ /* We have already copied all directories */
if (S_ISDIR(file->mode)) if (S_ISDIR(file->mode))
continue; continue;
/*
* Don't copy the pg_control file now, we'll copy it last
*/
if(file->external_dir_num == 0 && pg_strcasecmp(file->rel_path, XLOG_CONTROL_FILE) == 0)
{
continue;
}
if (arguments->thread_num == 1) if (arguments->thread_num == 1)
{ {

View File

@@ -171,10 +171,13 @@ catchup_preflight_checks(PGNodeInfo *source_node_info, PGconn *source_conn,
if (current.backup_mode != BACKUP_MODE_FULL) if (current.backup_mode != BACKUP_MODE_FULL)
{ {
dest_id = get_system_identifier(dest_pgdata, FIO_LOCAL_HOST, false); ControlFileData dst_control;
get_control_file_or_back_file(dest_pgdata, FIO_LOCAL_HOST, &dst_control);
dest_id = dst_control.system_identifier;
if (source_conn_id != dest_id) if (source_conn_id != dest_id)
elog(ERROR, "Database identifiers mismatch: we connected to DB id %lu, but in \"%s\" we found id %lu", elog(ERROR, "Database identifiers mismatch: we connected to DB id %llu, but in \"%s\" we found id %llu",
source_conn_id, dest_pgdata, dest_id); (long long)source_conn_id, dest_pgdata, (long long)dest_id);
} }
} }
@@ -640,6 +643,9 @@ do_catchup(const char *source_pgdata, const char *dest_pgdata, int num_threads,
ssize_t transfered_walfiles_bytes = 0; ssize_t transfered_walfiles_bytes = 0;
char pretty_source_bytes[20]; char pretty_source_bytes[20];
char dest_pg_control_fullpath[MAXPGPATH];
char dest_pg_control_bak_fullpath[MAXPGPATH];
source_conn = catchup_init_state(&source_node_info, source_pgdata, dest_pgdata); source_conn = catchup_init_state(&source_node_info, source_pgdata, dest_pgdata);
catchup_preflight_checks(&source_node_info, source_conn, source_pgdata, dest_pgdata); catchup_preflight_checks(&source_node_info, source_conn, source_pgdata, dest_pgdata);
@@ -935,6 +941,9 @@ do_catchup(const char *source_pgdata, const char *dest_pgdata, int num_threads,
Assert(file->external_dir_num == 0); Assert(file->external_dir_num == 0);
if (pg_strcasecmp(file->name, RELMAPPER_FILENAME) == 0) if (pg_strcasecmp(file->name, RELMAPPER_FILENAME) == 0)
redundant = true; redundant = true;
/* global/pg_control.pbk.bak is always keeped, because it's needed for restart failed incremental restore */
if (pg_strcasecmp(file->rel_path, XLOG_CONTROL_BAK_FILE) == 0)
redundant = false;
/* if file does not exists in destination list, then we can safely unlink it */ /* if file does not exists in destination list, then we can safely unlink it */
if (redundant) if (redundant)
@@ -966,6 +975,28 @@ do_catchup(const char *source_pgdata, const char *dest_pgdata, int num_threads,
if (dest_filelist) if (dest_filelist)
parray_qsort(dest_filelist, pgFileCompareRelPathWithExternal); parray_qsort(dest_filelist, pgFileCompareRelPathWithExternal);
join_path_components(dest_pg_control_fullpath, dest_pgdata, XLOG_CONTROL_FILE);
join_path_components(dest_pg_control_bak_fullpath, dest_pgdata, XLOG_CONTROL_BAK_FILE);
/*
* rename (if it exist) dest control file before restoring
* if it doesn't exist, that mean, that we already restoring in a previously failed
* pgdata, where XLOG_CONTROL_BAK_FILE exist
*/
if (current.backup_mode != BACKUP_MODE_FULL && !dry_run)
{
if (!fio_access(dest_pg_control_fullpath, F_OK, FIO_LOCAL_HOST))
{
pgFile *dst_control;
dst_control = pgFileNew(dest_pg_control_bak_fullpath, XLOG_CONTROL_BAK_FILE,
true,0, FIO_BACKUP_HOST);
if(!fio_access(dest_pg_control_bak_fullpath, F_OK, FIO_LOCAL_HOST))
fio_delete(dst_control->mode, dest_pg_control_bak_fullpath, FIO_LOCAL_HOST);
fio_rename(dest_pg_control_fullpath, dest_pg_control_bak_fullpath, FIO_LOCAL_HOST);
pgFileFree(dst_control);
}
}
/* run copy threads */ /* run copy threads */
elog(INFO, "Start transferring data files"); elog(INFO, "Start transferring data files");
time(&start_time); time(&start_time);
@@ -985,6 +1016,15 @@ do_catchup(const char *source_pgdata, const char *dest_pgdata, int num_threads,
copy_pgcontrol_file(from_fullpath, FIO_DB_HOST, copy_pgcontrol_file(from_fullpath, FIO_DB_HOST,
to_fullpath, FIO_LOCAL_HOST, source_pg_control_file); to_fullpath, FIO_LOCAL_HOST, source_pg_control_file);
transfered_datafiles_bytes += source_pg_control_file->size; transfered_datafiles_bytes += source_pg_control_file->size;
/* Now backup control file can be deled */
if (current.backup_mode != BACKUP_MODE_FULL && !fio_access(dest_pg_control_bak_fullpath, F_OK, FIO_LOCAL_HOST)){
pgFile *dst_control;
dst_control = pgFileNew(dest_pg_control_bak_fullpath, XLOG_CONTROL_BAK_FILE,
true,0, FIO_BACKUP_HOST);
fio_delete(dst_control->mode, dest_pg_control_bak_fullpath, FIO_LOCAL_HOST);
pgFileFree(dst_control);
}
} }
if (!catchup_isok && !dry_run) if (!catchup_isok && !dry_run)

View File

@@ -1867,4 +1867,4 @@ set_forkname(pgFile *file)
file->segno = segno; file->segno = segno;
file->is_datafile = file->forkName == none; file->is_datafile = file->forkName == none;
return true; return true;
} }

View File

@@ -91,6 +91,7 @@ extern const char *PROGRAM_EMAIL;
#define DATABASE_MAP "database_map" #define DATABASE_MAP "database_map"
#define HEADER_MAP "page_header_map" #define HEADER_MAP "page_header_map"
#define HEADER_MAP_TMP "page_header_map_tmp" #define HEADER_MAP_TMP "page_header_map_tmp"
#define XLOG_CONTROL_BAK_FILE XLOG_CONTROL_FILE".pbk.bak"
/* default replication slot names */ /* default replication slot names */
#define DEFAULT_TEMP_SLOT_NAME "pg_probackup_slot"; #define DEFAULT_TEMP_SLOT_NAME "pg_probackup_slot";
@@ -1209,6 +1210,8 @@ extern uint32 get_xlog_seg_size(const char *pgdata_path);
extern void get_redo(const char *pgdata_path, fio_location pgdata_location, RedoParams *redo); extern void get_redo(const char *pgdata_path, fio_location pgdata_location, RedoParams *redo);
extern void set_min_recovery_point(pgFile *file, const char *backup_path, extern void set_min_recovery_point(pgFile *file, const char *backup_path,
XLogRecPtr stop_backup_lsn); XLogRecPtr stop_backup_lsn);
extern void get_control_file_or_back_file(const char *pgdata_path, fio_location location,
ControlFileData *control);
extern void copy_pgcontrol_file(const char *from_fullpath, fio_location from_location, extern void copy_pgcontrol_file(const char *from_fullpath, fio_location from_location,
const char *to_fullpath, fio_location to_location, pgFile *file); const char *to_fullpath, fio_location to_location, pgFile *file);

View File

@@ -39,6 +39,8 @@ typedef struct
int ret; int ret;
} restore_files_arg; } restore_files_arg;
static bool control_downloaded = false;
static ControlFileData instance_control;
static void static void
print_recovery_settings(InstanceState *instanceState, FILE *fp, pgBackup *backup, print_recovery_settings(InstanceState *instanceState, FILE *fp, pgBackup *backup,
@@ -501,6 +503,9 @@ do_restore_or_validate(InstanceState *instanceState, time_t target_backup_id, pg
if (redo.checksum_version == 0) if (redo.checksum_version == 0)
elog(ERROR, "Incremental restore in 'lsn' mode require " elog(ERROR, "Incremental restore in 'lsn' mode require "
"data_checksums to be enabled in destination data directory"); "data_checksums to be enabled in destination data directory");
if (!control_downloaded)
get_control_file_or_back_file(instance_config.pgdata, FIO_DB_HOST,
&instance_control);
timelines = read_timeline_history(instanceState->instance_wal_subdir_path, timelines = read_timeline_history(instanceState->instance_wal_subdir_path,
redo.tli, false); redo.tli, false);
@@ -719,6 +724,10 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
parray *pgdata_files = NULL; parray *pgdata_files = NULL;
parray *dest_files = NULL; parray *dest_files = NULL;
parray *external_dirs = NULL; parray *external_dirs = NULL;
pgFile *dest_pg_control_file = NULL;
char dest_pg_control_fullpath[MAXPGPATH];
char dest_pg_control_bak_fullpath[MAXPGPATH];
/* arrays with meta info for multi threaded backup */ /* arrays with meta info for multi threaded backup */
pthread_t *threads; pthread_t *threads;
restore_files_arg *threads_args; restore_files_arg *threads_args;
@@ -922,6 +931,11 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
pg_strcasecmp(file->name, RELMAPPER_FILENAME) == 0) pg_strcasecmp(file->name, RELMAPPER_FILENAME) == 0)
redundant = true; redundant = true;
/* global/pg_control.pbk.bak are always keeped, because it's needed for restart failed incremental restore */
if (file->external_dir_num == 0 &&
pg_strcasecmp(file->rel_path, XLOG_CONTROL_BAK_FILE) == 0)
redundant = false;
/* do not delete the useful internal directories */ /* do not delete the useful internal directories */
if (S_ISDIR(file->mode) && !redundant) if (S_ISDIR(file->mode) && !redundant)
continue; continue;
@@ -974,6 +988,42 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
dest_bytes = dest_backup->pgdata_bytes; dest_bytes = dest_backup->pgdata_bytes;
pretty_size(dest_bytes, pretty_dest_bytes, lengthof(pretty_dest_bytes)); pretty_size(dest_bytes, pretty_dest_bytes, lengthof(pretty_dest_bytes));
/*
* [Issue #313]
* find pg_control file (in already sorted earlier dest_files, see parray_qsort(backup->files...))
* and exclude it from list for future special processing
*/
{
int control_file_elem_index;
pgFile search_key;
MemSet(&search_key, 0, sizeof(pgFile));
/* pgFileCompareRelPathWithExternal uses only .rel_path and .external_dir_num for comparision */
search_key.rel_path = XLOG_CONTROL_FILE;
search_key.external_dir_num = 0;
control_file_elem_index = parray_bsearch_index(dest_files, &search_key, pgFileCompareRelPathWithExternal);
if (control_file_elem_index < 0)
elog(ERROR, "File \"%s\" not found in backup %s", XLOG_CONTROL_FILE, base36enc(dest_backup->start_time));
dest_pg_control_file = (pgFile *) parray_get(dest_files, control_file_elem_index);
parray_remove(dest_files, control_file_elem_index);
join_path_components(dest_pg_control_fullpath, pgdata_path, XLOG_CONTROL_FILE);
join_path_components(dest_pg_control_bak_fullpath, pgdata_path, XLOG_CONTROL_BAK_FILE);
/*
* rename (if it exist) dest control file before restoring
* if it doesn't exist, that mean, that we already restoring in a previously failed
* pgdata, where XLOG_CONTROL_BAK_FILE exist
*/
if (params->incremental_mode != INCR_NONE)
{
if (fio_access(dest_pg_control_fullpath,F_OK,FIO_DB_HOST) == 0){
if (fio_rename(dest_pg_control_fullpath, dest_pg_control_bak_fullpath, FIO_DB_HOST) < 0)
elog(WARNING, "Cannot rename file \"%s\" to \"%s\": %s",
dest_pg_control_fullpath, dest_pg_control_bak_fullpath, strerror(errno));
}
}
}
elog(INFO, "Start restoring backup files. PGDATA size: %s", pretty_dest_bytes); elog(INFO, "Start restoring backup files. PGDATA size: %s", pretty_dest_bytes);
time(&start_time); time(&start_time);
thread_interrupted = false; thread_interrupted = false;
@@ -1014,6 +1064,32 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
total_bytes += threads_args[i].restored_bytes; total_bytes += threads_args[i].restored_bytes;
} }
/* [Issue #313] copy pg_control at very end */
if (restore_isok)
{
FILE *out = NULL;
elog(progress ? INFO : LOG, "Progress: Restore file \"%s\"",
dest_pg_control_file->rel_path);
out = fio_fopen(dest_pg_control_fullpath, PG_BINARY_R "+", FIO_DB_HOST);
total_bytes += restore_non_data_file(parent_chain,
dest_backup,
dest_pg_control_file,
out,
dest_pg_control_fullpath, false);
fio_fclose(out);
/* Now backup control file can be deleted */
if (params->incremental_mode != INCR_NONE)
{
pgFile *dst_control;
dst_control = pgFileNew(dest_pg_control_bak_fullpath, XLOG_CONTROL_BAK_FILE,
true,0, FIO_BACKUP_HOST);
fio_delete(dst_control->mode, dest_pg_control_bak_fullpath, FIO_LOCAL_HOST);
pgFileFree(dst_control);
}
}
time(&end_time); time(&end_time);
pretty_time_interval(difftime(end_time, start_time), pretty_time_interval(difftime(end_time, start_time),
pretty_time, lengthof(pretty_time)); pretty_time, lengthof(pretty_time));
@@ -1098,6 +1174,8 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
parray_free(pgdata_files); parray_free(pgdata_files);
} }
if(dest_pg_control_file) pgFileFree(dest_pg_control_file);
for (i = parray_num(parent_chain) - 1; i >= 0; i--) for (i = parray_num(parent_chain) - 1; i >= 0; i--)
{ {
pgBackup *backup = (pgBackup *) parray_get(parent_chain, i); pgBackup *backup = (pgBackup *) parray_get(parent_chain, i);
@@ -2230,7 +2308,10 @@ check_incremental_compatibility(const char *pgdata, uint64 system_identifier,
*/ */
elog(LOG, "Trying to read pg_control file in destination directory"); elog(LOG, "Trying to read pg_control file in destination directory");
system_id_pgdata = get_system_identifier(pgdata, FIO_DB_HOST, false); get_control_file_or_back_file(pgdata, FIO_DB_HOST, &instance_control);
control_downloaded = true;
system_id_pgdata = instance_control.system_identifier;
if (system_id_pgdata == instance_config.system_identifier) if (system_id_pgdata == instance_config.system_identifier)
system_id_match = true; system_id_match = true;

View File

@@ -190,6 +190,26 @@ get_current_timeline_from_control(const char *pgdata_path, fio_location location
return ControlFile.checkPointCopy.ThisTimeLineID; return ControlFile.checkPointCopy.ThisTimeLineID;
} }
void
get_control_file_or_back_file(const char *pgdata_path, fio_location location, ControlFileData *control)
{
char *buffer;
size_t size;
/* First fetch file... */
buffer = slurpFile(pgdata_path, XLOG_CONTROL_FILE, &size, true, location);
if (!buffer || size == 0){
/* Error read XLOG_CONTROL_FILE or file is truncated, trying read backup */
buffer = slurpFile(pgdata_path, XLOG_CONTROL_BAK_FILE, &size, true, location);
if (!buffer)
elog(ERROR, "Could not read %s and %s files\n", XLOG_CONTROL_FILE, XLOG_CONTROL_BAK_FILE); /* Maybe it should be PANIC? */
}
digestControlFile(control, buffer, size);
pg_free(buffer);
}
/* /*
* Get last check point record ptr from pg_tonrol. * Get last check point record ptr from pg_tonrol.
*/ */

View File

@@ -1783,7 +1783,7 @@ class ProbackupTest(object):
'ptrack_control', 'ptrack_init', 'pg_control', 'ptrack_control', 'ptrack_init', 'pg_control',
'probackup_recovery.conf', 'recovery.signal', 'probackup_recovery.conf', 'recovery.signal',
'standby.signal', 'ptrack.map', 'ptrack.map.mmap', 'standby.signal', 'ptrack.map', 'ptrack.map.mmap',
'ptrack.map.tmp' 'ptrack.map.tmp', 'recovery.done','backup_label.old'
] ]
if exclude_dirs: if exclude_dirs:

View File

@@ -9,8 +9,9 @@ from datetime import datetime, timedelta
import hashlib import hashlib
import shutil import shutil
import json import json
from testgres import QueryException from testgres import QueryException, StartNodeException
import stat
from stat import S_ISDIR
class IncrRestoreTest(ProbackupTest, unittest.TestCase): class IncrRestoreTest(ProbackupTest, unittest.TestCase):
@@ -2426,3 +2427,86 @@ class IncrRestoreTest(ProbackupTest, unittest.TestCase):
'select 1') 'select 1')
# check that MinRecPoint and BackupStartLsn are correctly used in case of --incrementa-lsn # check that MinRecPoint and BackupStartLsn are correctly used in case of --incrementa-lsn
# @unittest.skip("skip")
def test_incr_restore_issue_313(self):
"""
Check that failed incremental restore can be restarted
"""
self._check_gdb_flag_or_skip_test
node = self.make_simple_node('node',
set_replication=True,
initdb_params=['--data-checksums'])
backup_dir = os.path.join(self.tmp_path, self.module_name, self.fname, 'backup')
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
node.pgbench_init(scale = 50)
full_backup_id = self.backup_node(backup_dir, 'node', node, backup_type='full')
pgbench = node.pgbench(
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
options=['-T', '10', '-c', '1', '--no-vacuum'])
pgbench.wait()
pgbench.stdout.close()
last_backup_id = self.backup_node(backup_dir, 'node', node, backup_type='delta')
pgdata = self.pgdata_content(node.data_dir)
node.cleanup()
self.restore_node(backup_dir, 'node', node, backup_id=full_backup_id)
count = 0
filelist = self.get_backup_filelist(backup_dir, 'node', last_backup_id)
for file in filelist:
# count only nondata files
if int(filelist[file]['is_datafile']) == 0 and \
not stat.S_ISDIR(int(filelist[file]['mode'])) and \
not filelist[file]['size'] == '0' and \
file != 'database_map':
count += 1
gdb = self.restore_node(backup_dir, 'node', node, gdb=True,
backup_id=last_backup_id, options=['--progress', '--incremental-mode=checksum'])
gdb.verbose = False
gdb.set_breakpoint('restore_non_data_file')
gdb.run_until_break()
gdb.continue_execution_until_break(count - 1)
gdb.quit()
bak_file = os.path.join(node.data_dir, 'global', 'pg_control.pbk.bak')
self.assertTrue(
os.path.exists(bak_file),
"pg_control bak File should not exist: {0}".format(bak_file))
try:
node.slow_start()
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because backup is not fully restored")
except StartNodeException as e:
self.assertIn(
'Cannot start node',
e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
with open(os.path.join(node.logs_dir, 'postgresql.log'), 'r') as f:
if self.pg_config_version >= 120000:
self.assertIn(
"PANIC: could not read file \"global/pg_control\"",
f.read())
else:
self.assertIn(
"PANIC: could not read from control file",
f.read())
self.restore_node(backup_dir, 'node', node,
backup_id=last_backup_id, options=['--progress', '--incremental-mode=checksum'])
node.slow_start()
self.compare_pgdata(pgdata, self.pgdata_content(node.data_dir))

View File

@@ -3,11 +3,11 @@ import unittest
from .helpers.ptrack_helpers import ProbackupTest, ProbackupException from .helpers.ptrack_helpers import ProbackupTest, ProbackupException
import subprocess import subprocess
import sys import sys
from time import sleep
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
import hashlib import hashlib
import shutil import shutil
import json import json
import stat
from shutil import copyfile from shutil import copyfile
from testgres import QueryException, StartNodeException from testgres import QueryException, StartNodeException
from stat import S_ISDIR from stat import S_ISDIR
@@ -3709,66 +3709,6 @@ class RestoreTest(ProbackupTest, unittest.TestCase):
self.compare_pgdata(pgdata1, pgdata2) self.compare_pgdata(pgdata1, pgdata2)
self.compare_pgdata(pgdata2, pgdata3) self.compare_pgdata(pgdata2, pgdata3)
# skip this test until https://github.com/postgrespro/pg_probackup/pull/399
@unittest.skip("skip")
def test_restore_issue_313(self):
"""
Check that partially restored PostgreSQL instance cannot be started
"""
backup_dir = os.path.join(self.tmp_path, self.module_name, self.fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(self.module_name, self.fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'])
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
# FULL backup
backup_id = self.backup_node(backup_dir, 'node', node)
node.cleanup()
count = 0
filelist = self.get_backup_filelist(backup_dir, 'node', backup_id)
for file in filelist:
# count only nondata files
if int(filelist[file]['is_datafile']) == 0 and int(filelist[file]['size']) > 0:
count += 1
node_restored = self.make_simple_node(
base_dir=os.path.join(self.module_name, self.fname, 'node_restored'))
node_restored.cleanup()
self.restore_node(backup_dir, 'node', node_restored)
gdb = self.restore_node(backup_dir, 'node', node, gdb=True, options=['--progress'])
gdb.verbose = False
gdb.set_breakpoint('restore_non_data_file')
gdb.run_until_break()
gdb.continue_execution_until_break(count - 2)
gdb.quit()
# emulate the user or HA taking care of PG configuration
for fname in os.listdir(node_restored.data_dir):
if fname.endswith('.conf'):
os.rename(
os.path.join(node_restored.data_dir, fname),
os.path.join(node.data_dir, fname))
try:
node.slow_start()
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because backup is not fully restored")
except StartNodeException as e:
self.assertIn(
'Cannot start node',
e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
# @unittest.skip("skip") # @unittest.skip("skip")
def test_restore_with_waldir(self): def test_restore_with_waldir(self):
"""recovery using tablespace-mapping option and page backup""" """recovery using tablespace-mapping option and page backup"""
@@ -3833,8 +3773,6 @@ class RestoreTest(ProbackupTest, unittest.TestCase):
self.add_instance(backup_dir, 'node', node) self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node) self.set_archiving(backup_dir, 'node', node)
node.slow_start() node.slow_start()
node.pgbench_init(scale=2) node.pgbench_init(scale=2)
before1 = node.table_checksum("pgbench_branches") before1 = node.table_checksum("pgbench_branches")
@@ -3850,8 +3788,6 @@ class RestoreTest(ProbackupTest, unittest.TestCase):
'\n Unexpected Error Message: {0}\n CMD: {1}'.format( '\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(self.output), self.cmd)) repr(self.output), self.cmd))
node.slow_start() node.slow_start()
pgbench = node.pgbench( pgbench = node.pgbench(
stdout=subprocess.PIPE, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
@@ -3925,3 +3861,72 @@ class RestoreTest(ProbackupTest, unittest.TestCase):
# check for the current updates # check for the current updates
after = node.table_checksum("pgbench_branches") after = node.table_checksum("pgbench_branches")
self.assertEqual(before1, after) self.assertEqual(before1, after)
def test_restore_issue_313(self):
"""
Check that partially restored PostgreSQL instance cannot be started
"""
self._check_gdb_flag_or_skip_test
node = self.make_simple_node('node',
set_replication=True,
initdb_params=['--data-checksums'])
backup_dir = os.path.join(self.tmp_path, self.module_name, self.fname, 'backup')
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
# FULL backup
backup_id = self.backup_node(backup_dir, 'node', node)
node.cleanup()
count = 0
filelist = self.get_backup_filelist(backup_dir, 'node', backup_id)
for file in filelist:
# count only nondata files
if int(filelist[file]['is_datafile']) == 0 and \
not stat.S_ISDIR(int(filelist[file]['mode'])) and \
not filelist[file]['size'] == '0' and \
file != 'database_map':
count += 1
node_restored = self.make_simple_node('node_restored')
node_restored.cleanup()
self.restore_node(backup_dir, 'node', node_restored)
gdb = self.restore_node(backup_dir, 'node', node, gdb=True, options=['--progress'])
gdb.verbose = False
gdb.set_breakpoint('restore_non_data_file')
gdb.run_until_break()
gdb.continue_execution_until_break(count - 1)
gdb.quit()
# emulate the user or HA taking care of PG configuration
for fname in os.listdir(node_restored.data_dir):
if fname.endswith('.conf'):
os.rename(
os.path.join(node_restored.data_dir, fname),
os.path.join(node.data_dir, fname))
try:
node.slow_start()
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because backup is not fully restored")
except StartNodeException as e:
self.assertIn(
'Cannot start node',
e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
with open(os.path.join(node.logs_dir, 'postgresql.log'), 'r') as f:
if self.pg_config_version >= 120000:
self.assertIn(
"PANIC: could not read file \"global/pg_control\"",
f.read())
else:
self.assertIn(
"PANIC: could not read from control file",
f.read())