You've already forked pg_probackup
mirror of
https://github.com/postgrespro/pg_probackup.git
synced 2025-09-16 09:26:30 +02:00
[PBCKP-218] Incremental restore and missing pg_control (issue #304)
- pg_control file backup after all other files in backup - pg_control file restore last in full restore - rename pg_control to pg_control.pbk.bak at start of non-full restore - remove pg_control.pbk.bak in the end of successfull non-full restore - use pg_control.pbk.bak after failed non-full restore - added tests for full and incremental restore Tags: backup, catchup, restore
This commit is contained in:
58
src/backup.c
58
src/backup.c
@@ -122,6 +122,8 @@ do_backup_pg(InstanceState *instanceState, PGconn *backup_conn,
|
||||
char pretty_time[20];
|
||||
char pretty_bytes[20];
|
||||
|
||||
pgFile *src_pg_control_file = NULL;
|
||||
|
||||
elog(INFO, "Database backup start");
|
||||
if(current.external_dir_str)
|
||||
{
|
||||
@@ -424,6 +426,24 @@ do_backup_pg(InstanceState *instanceState, PGconn *backup_conn,
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* find pg_control file
|
||||
* We'll copy it last
|
||||
*/
|
||||
{
|
||||
int control_file_elem_index;
|
||||
pgFile search_key;
|
||||
MemSet(&search_key, 0, sizeof(pgFile));
|
||||
/* pgFileCompareRelPathWithExternal uses only .rel_path and .external_dir_num for comparision */
|
||||
search_key.rel_path = XLOG_CONTROL_FILE;
|
||||
search_key.external_dir_num = 0;
|
||||
control_file_elem_index = parray_bsearch_index(backup_files_list, &search_key, pgFileCompareRelPathWithExternal);
|
||||
|
||||
if (control_file_elem_index < 0)
|
||||
elog(ERROR, "File \"%s\" not found in PGDATA %s", XLOG_CONTROL_FILE, current.database_dir);
|
||||
src_pg_control_file = (pgFile *)parray_get(backup_files_list, control_file_elem_index);
|
||||
}
|
||||
|
||||
/* setup thread locks */
|
||||
pfilearray_clear_locks(backup_files_list);
|
||||
|
||||
@@ -483,6 +503,26 @@ do_backup_pg(InstanceState *instanceState, PGconn *backup_conn,
|
||||
backup_isok = false;
|
||||
}
|
||||
|
||||
/* copy pg_control at very end */
|
||||
if (backup_isok)
|
||||
{
|
||||
|
||||
elog(progress ? INFO : LOG, "Progress: Backup file \"%s\"",
|
||||
src_pg_control_file->rel_path);
|
||||
|
||||
char from_fullpath[MAXPGPATH];
|
||||
char to_fullpath[MAXPGPATH];
|
||||
join_path_components(from_fullpath, instance_config.pgdata, src_pg_control_file->rel_path);
|
||||
join_path_components(to_fullpath, current.database_dir, src_pg_control_file->rel_path);
|
||||
|
||||
backup_non_data_file(src_pg_control_file, NULL,
|
||||
from_fullpath, to_fullpath,
|
||||
current.backup_mode, current.parent_backup,
|
||||
true);
|
||||
}
|
||||
|
||||
|
||||
|
||||
time(&end_time);
|
||||
pretty_time_interval(difftime(end_time, start_time),
|
||||
pretty_time, lengthof(pretty_time));
|
||||
@@ -510,17 +550,8 @@ do_backup_pg(InstanceState *instanceState, PGconn *backup_conn,
|
||||
{
|
||||
pgFile *pg_control = NULL;
|
||||
|
||||
for (i = 0; i < parray_num(backup_files_list); i++)
|
||||
{
|
||||
pgFile *tmp_file = (pgFile *) parray_get(backup_files_list, i);
|
||||
pg_control = src_pg_control_file;
|
||||
|
||||
if (tmp_file->external_dir_num == 0 &&
|
||||
(strcmp(tmp_file->rel_path, XLOG_CONTROL_FILE) == 0))
|
||||
{
|
||||
pg_control = tmp_file;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!pg_control)
|
||||
elog(ERROR, "Failed to find file \"%s\" in backup filelist.",
|
||||
@@ -2076,6 +2107,13 @@ backup_files(void *arg)
|
||||
/* We have already copied all directories */
|
||||
if (S_ISDIR(file->mode))
|
||||
continue;
|
||||
/*
|
||||
* Don't copy the pg_control file now, we'll copy it last
|
||||
*/
|
||||
if(file->external_dir_num == 0 && pg_strcasecmp(file->rel_path, XLOG_CONTROL_FILE) == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (arguments->thread_num == 1)
|
||||
{
|
||||
|
@@ -171,10 +171,13 @@ catchup_preflight_checks(PGNodeInfo *source_node_info, PGconn *source_conn,
|
||||
|
||||
if (current.backup_mode != BACKUP_MODE_FULL)
|
||||
{
|
||||
dest_id = get_system_identifier(dest_pgdata, FIO_LOCAL_HOST, false);
|
||||
ControlFileData dst_control;
|
||||
get_control_file_or_back_file(dest_pgdata, FIO_LOCAL_HOST, &dst_control);
|
||||
dest_id = dst_control.system_identifier;
|
||||
|
||||
if (source_conn_id != dest_id)
|
||||
elog(ERROR, "Database identifiers mismatch: we connected to DB id %lu, but in \"%s\" we found id %lu",
|
||||
source_conn_id, dest_pgdata, dest_id);
|
||||
elog(ERROR, "Database identifiers mismatch: we connected to DB id %llu, but in \"%s\" we found id %llu",
|
||||
(long long)source_conn_id, dest_pgdata, (long long)dest_id);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -640,6 +643,9 @@ do_catchup(const char *source_pgdata, const char *dest_pgdata, int num_threads,
|
||||
ssize_t transfered_walfiles_bytes = 0;
|
||||
char pretty_source_bytes[20];
|
||||
|
||||
char dest_pg_control_fullpath[MAXPGPATH];
|
||||
char dest_pg_control_bak_fullpath[MAXPGPATH];
|
||||
|
||||
source_conn = catchup_init_state(&source_node_info, source_pgdata, dest_pgdata);
|
||||
catchup_preflight_checks(&source_node_info, source_conn, source_pgdata, dest_pgdata);
|
||||
|
||||
@@ -935,6 +941,9 @@ do_catchup(const char *source_pgdata, const char *dest_pgdata, int num_threads,
|
||||
Assert(file->external_dir_num == 0);
|
||||
if (pg_strcasecmp(file->name, RELMAPPER_FILENAME) == 0)
|
||||
redundant = true;
|
||||
/* global/pg_control.pbk.bak is always keeped, because it's needed for restart failed incremental restore */
|
||||
if (pg_strcasecmp(file->rel_path, XLOG_CONTROL_BAK_FILE) == 0)
|
||||
redundant = false;
|
||||
|
||||
/* if file does not exists in destination list, then we can safely unlink it */
|
||||
if (redundant)
|
||||
@@ -966,6 +975,28 @@ do_catchup(const char *source_pgdata, const char *dest_pgdata, int num_threads,
|
||||
if (dest_filelist)
|
||||
parray_qsort(dest_filelist, pgFileCompareRelPathWithExternal);
|
||||
|
||||
join_path_components(dest_pg_control_fullpath, dest_pgdata, XLOG_CONTROL_FILE);
|
||||
join_path_components(dest_pg_control_bak_fullpath, dest_pgdata, XLOG_CONTROL_BAK_FILE);
|
||||
/*
|
||||
* rename (if it exist) dest control file before restoring
|
||||
* if it doesn't exist, that mean, that we already restoring in a previously failed
|
||||
* pgdata, where XLOG_CONTROL_BAK_FILE exist
|
||||
*/
|
||||
if (current.backup_mode != BACKUP_MODE_FULL && !dry_run)
|
||||
{
|
||||
if (!fio_access(dest_pg_control_fullpath, F_OK, FIO_LOCAL_HOST))
|
||||
{
|
||||
pgFile *dst_control;
|
||||
dst_control = pgFileNew(dest_pg_control_bak_fullpath, XLOG_CONTROL_BAK_FILE,
|
||||
true,0, FIO_BACKUP_HOST);
|
||||
|
||||
if(!fio_access(dest_pg_control_bak_fullpath, F_OK, FIO_LOCAL_HOST))
|
||||
fio_delete(dst_control->mode, dest_pg_control_bak_fullpath, FIO_LOCAL_HOST);
|
||||
fio_rename(dest_pg_control_fullpath, dest_pg_control_bak_fullpath, FIO_LOCAL_HOST);
|
||||
pgFileFree(dst_control);
|
||||
}
|
||||
}
|
||||
|
||||
/* run copy threads */
|
||||
elog(INFO, "Start transferring data files");
|
||||
time(&start_time);
|
||||
@@ -985,6 +1016,15 @@ do_catchup(const char *source_pgdata, const char *dest_pgdata, int num_threads,
|
||||
copy_pgcontrol_file(from_fullpath, FIO_DB_HOST,
|
||||
to_fullpath, FIO_LOCAL_HOST, source_pg_control_file);
|
||||
transfered_datafiles_bytes += source_pg_control_file->size;
|
||||
|
||||
/* Now backup control file can be deled */
|
||||
if (current.backup_mode != BACKUP_MODE_FULL && !fio_access(dest_pg_control_bak_fullpath, F_OK, FIO_LOCAL_HOST)){
|
||||
pgFile *dst_control;
|
||||
dst_control = pgFileNew(dest_pg_control_bak_fullpath, XLOG_CONTROL_BAK_FILE,
|
||||
true,0, FIO_BACKUP_HOST);
|
||||
fio_delete(dst_control->mode, dest_pg_control_bak_fullpath, FIO_LOCAL_HOST);
|
||||
pgFileFree(dst_control);
|
||||
}
|
||||
}
|
||||
|
||||
if (!catchup_isok && !dry_run)
|
||||
|
@@ -1867,4 +1867,4 @@ set_forkname(pgFile *file)
|
||||
file->segno = segno;
|
||||
file->is_datafile = file->forkName == none;
|
||||
return true;
|
||||
}
|
||||
}
|
@@ -91,6 +91,7 @@ extern const char *PROGRAM_EMAIL;
|
||||
#define DATABASE_MAP "database_map"
|
||||
#define HEADER_MAP "page_header_map"
|
||||
#define HEADER_MAP_TMP "page_header_map_tmp"
|
||||
#define XLOG_CONTROL_BAK_FILE XLOG_CONTROL_FILE".pbk.bak"
|
||||
|
||||
/* default replication slot names */
|
||||
#define DEFAULT_TEMP_SLOT_NAME "pg_probackup_slot";
|
||||
@@ -1209,6 +1210,8 @@ extern uint32 get_xlog_seg_size(const char *pgdata_path);
|
||||
extern void get_redo(const char *pgdata_path, fio_location pgdata_location, RedoParams *redo);
|
||||
extern void set_min_recovery_point(pgFile *file, const char *backup_path,
|
||||
XLogRecPtr stop_backup_lsn);
|
||||
extern void get_control_file_or_back_file(const char *pgdata_path, fio_location location,
|
||||
ControlFileData *control);
|
||||
extern void copy_pgcontrol_file(const char *from_fullpath, fio_location from_location,
|
||||
const char *to_fullpath, fio_location to_location, pgFile *file);
|
||||
|
||||
|
@@ -39,6 +39,8 @@ typedef struct
|
||||
int ret;
|
||||
} restore_files_arg;
|
||||
|
||||
static bool control_downloaded = false;
|
||||
static ControlFileData instance_control;
|
||||
|
||||
static void
|
||||
print_recovery_settings(InstanceState *instanceState, FILE *fp, pgBackup *backup,
|
||||
@@ -501,6 +503,9 @@ do_restore_or_validate(InstanceState *instanceState, time_t target_backup_id, pg
|
||||
if (redo.checksum_version == 0)
|
||||
elog(ERROR, "Incremental restore in 'lsn' mode require "
|
||||
"data_checksums to be enabled in destination data directory");
|
||||
if (!control_downloaded)
|
||||
get_control_file_or_back_file(instance_config.pgdata, FIO_DB_HOST,
|
||||
&instance_control);
|
||||
|
||||
timelines = read_timeline_history(instanceState->instance_wal_subdir_path,
|
||||
redo.tli, false);
|
||||
@@ -719,6 +724,10 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
|
||||
parray *pgdata_files = NULL;
|
||||
parray *dest_files = NULL;
|
||||
parray *external_dirs = NULL;
|
||||
pgFile *dest_pg_control_file = NULL;
|
||||
char dest_pg_control_fullpath[MAXPGPATH];
|
||||
char dest_pg_control_bak_fullpath[MAXPGPATH];
|
||||
|
||||
/* arrays with meta info for multi threaded backup */
|
||||
pthread_t *threads;
|
||||
restore_files_arg *threads_args;
|
||||
@@ -922,6 +931,11 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
|
||||
pg_strcasecmp(file->name, RELMAPPER_FILENAME) == 0)
|
||||
redundant = true;
|
||||
|
||||
/* global/pg_control.pbk.bak are always keeped, because it's needed for restart failed incremental restore */
|
||||
if (file->external_dir_num == 0 &&
|
||||
pg_strcasecmp(file->rel_path, XLOG_CONTROL_BAK_FILE) == 0)
|
||||
redundant = false;
|
||||
|
||||
/* do not delete the useful internal directories */
|
||||
if (S_ISDIR(file->mode) && !redundant)
|
||||
continue;
|
||||
@@ -974,6 +988,42 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
|
||||
dest_bytes = dest_backup->pgdata_bytes;
|
||||
|
||||
pretty_size(dest_bytes, pretty_dest_bytes, lengthof(pretty_dest_bytes));
|
||||
/*
|
||||
* [Issue #313]
|
||||
* find pg_control file (in already sorted earlier dest_files, see parray_qsort(backup->files...))
|
||||
* and exclude it from list for future special processing
|
||||
*/
|
||||
{
|
||||
int control_file_elem_index;
|
||||
pgFile search_key;
|
||||
MemSet(&search_key, 0, sizeof(pgFile));
|
||||
/* pgFileCompareRelPathWithExternal uses only .rel_path and .external_dir_num for comparision */
|
||||
search_key.rel_path = XLOG_CONTROL_FILE;
|
||||
search_key.external_dir_num = 0;
|
||||
control_file_elem_index = parray_bsearch_index(dest_files, &search_key, pgFileCompareRelPathWithExternal);
|
||||
|
||||
if (control_file_elem_index < 0)
|
||||
elog(ERROR, "File \"%s\" not found in backup %s", XLOG_CONTROL_FILE, base36enc(dest_backup->start_time));
|
||||
dest_pg_control_file = (pgFile *) parray_get(dest_files, control_file_elem_index);
|
||||
parray_remove(dest_files, control_file_elem_index);
|
||||
|
||||
join_path_components(dest_pg_control_fullpath, pgdata_path, XLOG_CONTROL_FILE);
|
||||
join_path_components(dest_pg_control_bak_fullpath, pgdata_path, XLOG_CONTROL_BAK_FILE);
|
||||
/*
|
||||
* rename (if it exist) dest control file before restoring
|
||||
* if it doesn't exist, that mean, that we already restoring in a previously failed
|
||||
* pgdata, where XLOG_CONTROL_BAK_FILE exist
|
||||
*/
|
||||
if (params->incremental_mode != INCR_NONE)
|
||||
{
|
||||
if (fio_access(dest_pg_control_fullpath,F_OK,FIO_DB_HOST) == 0){
|
||||
if (fio_rename(dest_pg_control_fullpath, dest_pg_control_bak_fullpath, FIO_DB_HOST) < 0)
|
||||
elog(WARNING, "Cannot rename file \"%s\" to \"%s\": %s",
|
||||
dest_pg_control_fullpath, dest_pg_control_bak_fullpath, strerror(errno));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
elog(INFO, "Start restoring backup files. PGDATA size: %s", pretty_dest_bytes);
|
||||
time(&start_time);
|
||||
thread_interrupted = false;
|
||||
@@ -1014,6 +1064,32 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
|
||||
total_bytes += threads_args[i].restored_bytes;
|
||||
}
|
||||
|
||||
/* [Issue #313] copy pg_control at very end */
|
||||
if (restore_isok)
|
||||
{
|
||||
FILE *out = NULL;
|
||||
elog(progress ? INFO : LOG, "Progress: Restore file \"%s\"",
|
||||
dest_pg_control_file->rel_path);
|
||||
|
||||
out = fio_fopen(dest_pg_control_fullpath, PG_BINARY_R "+", FIO_DB_HOST);
|
||||
|
||||
total_bytes += restore_non_data_file(parent_chain,
|
||||
dest_backup,
|
||||
dest_pg_control_file,
|
||||
out,
|
||||
dest_pg_control_fullpath, false);
|
||||
fio_fclose(out);
|
||||
/* Now backup control file can be deleted */
|
||||
if (params->incremental_mode != INCR_NONE)
|
||||
{
|
||||
pgFile *dst_control;
|
||||
dst_control = pgFileNew(dest_pg_control_bak_fullpath, XLOG_CONTROL_BAK_FILE,
|
||||
true,0, FIO_BACKUP_HOST);
|
||||
fio_delete(dst_control->mode, dest_pg_control_bak_fullpath, FIO_LOCAL_HOST);
|
||||
pgFileFree(dst_control);
|
||||
}
|
||||
}
|
||||
|
||||
time(&end_time);
|
||||
pretty_time_interval(difftime(end_time, start_time),
|
||||
pretty_time, lengthof(pretty_time));
|
||||
@@ -1098,6 +1174,8 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain,
|
||||
parray_free(pgdata_files);
|
||||
}
|
||||
|
||||
if(dest_pg_control_file) pgFileFree(dest_pg_control_file);
|
||||
|
||||
for (i = parray_num(parent_chain) - 1; i >= 0; i--)
|
||||
{
|
||||
pgBackup *backup = (pgBackup *) parray_get(parent_chain, i);
|
||||
@@ -2230,7 +2308,10 @@ check_incremental_compatibility(const char *pgdata, uint64 system_identifier,
|
||||
*/
|
||||
elog(LOG, "Trying to read pg_control file in destination directory");
|
||||
|
||||
system_id_pgdata = get_system_identifier(pgdata, FIO_DB_HOST, false);
|
||||
get_control_file_or_back_file(pgdata, FIO_DB_HOST, &instance_control);
|
||||
control_downloaded = true;
|
||||
|
||||
system_id_pgdata = instance_control.system_identifier;
|
||||
|
||||
if (system_id_pgdata == instance_config.system_identifier)
|
||||
system_id_match = true;
|
||||
|
20
src/util.c
20
src/util.c
@@ -190,6 +190,26 @@ get_current_timeline_from_control(const char *pgdata_path, fio_location location
|
||||
return ControlFile.checkPointCopy.ThisTimeLineID;
|
||||
}
|
||||
|
||||
void
|
||||
get_control_file_or_back_file(const char *pgdata_path, fio_location location, ControlFileData *control)
|
||||
{
|
||||
char *buffer;
|
||||
size_t size;
|
||||
|
||||
/* First fetch file... */
|
||||
buffer = slurpFile(pgdata_path, XLOG_CONTROL_FILE, &size, true, location);
|
||||
|
||||
if (!buffer || size == 0){
|
||||
/* Error read XLOG_CONTROL_FILE or file is truncated, trying read backup */
|
||||
buffer = slurpFile(pgdata_path, XLOG_CONTROL_BAK_FILE, &size, true, location);
|
||||
if (!buffer)
|
||||
elog(ERROR, "Could not read %s and %s files\n", XLOG_CONTROL_FILE, XLOG_CONTROL_BAK_FILE); /* Maybe it should be PANIC? */
|
||||
}
|
||||
digestControlFile(control, buffer, size);
|
||||
pg_free(buffer);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Get last check point record ptr from pg_tonrol.
|
||||
*/
|
||||
|
@@ -1783,7 +1783,7 @@ class ProbackupTest(object):
|
||||
'ptrack_control', 'ptrack_init', 'pg_control',
|
||||
'probackup_recovery.conf', 'recovery.signal',
|
||||
'standby.signal', 'ptrack.map', 'ptrack.map.mmap',
|
||||
'ptrack.map.tmp'
|
||||
'ptrack.map.tmp', 'recovery.done','backup_label.old'
|
||||
]
|
||||
|
||||
if exclude_dirs:
|
||||
|
@@ -9,8 +9,9 @@ from datetime import datetime, timedelta
|
||||
import hashlib
|
||||
import shutil
|
||||
import json
|
||||
from testgres import QueryException
|
||||
|
||||
from testgres import QueryException, StartNodeException
|
||||
import stat
|
||||
from stat import S_ISDIR
|
||||
|
||||
class IncrRestoreTest(ProbackupTest, unittest.TestCase):
|
||||
|
||||
@@ -2426,3 +2427,86 @@ class IncrRestoreTest(ProbackupTest, unittest.TestCase):
|
||||
'select 1')
|
||||
|
||||
# check that MinRecPoint and BackupStartLsn are correctly used in case of --incrementa-lsn
|
||||
|
||||
# @unittest.skip("skip")
|
||||
def test_incr_restore_issue_313(self):
|
||||
"""
|
||||
Check that failed incremental restore can be restarted
|
||||
"""
|
||||
self._check_gdb_flag_or_skip_test
|
||||
node = self.make_simple_node('node',
|
||||
set_replication=True,
|
||||
initdb_params=['--data-checksums'])
|
||||
|
||||
backup_dir = os.path.join(self.tmp_path, self.module_name, self.fname, 'backup')
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'node', node)
|
||||
self.set_archiving(backup_dir, 'node', node)
|
||||
node.slow_start()
|
||||
|
||||
node.pgbench_init(scale = 50)
|
||||
|
||||
full_backup_id = self.backup_node(backup_dir, 'node', node, backup_type='full')
|
||||
|
||||
pgbench = node.pgbench(
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||
options=['-T', '10', '-c', '1', '--no-vacuum'])
|
||||
pgbench.wait()
|
||||
pgbench.stdout.close()
|
||||
|
||||
last_backup_id = self.backup_node(backup_dir, 'node', node, backup_type='delta')
|
||||
|
||||
pgdata = self.pgdata_content(node.data_dir)
|
||||
node.cleanup()
|
||||
|
||||
self.restore_node(backup_dir, 'node', node, backup_id=full_backup_id)
|
||||
|
||||
count = 0
|
||||
filelist = self.get_backup_filelist(backup_dir, 'node', last_backup_id)
|
||||
for file in filelist:
|
||||
# count only nondata files
|
||||
if int(filelist[file]['is_datafile']) == 0 and \
|
||||
not stat.S_ISDIR(int(filelist[file]['mode'])) and \
|
||||
not filelist[file]['size'] == '0' and \
|
||||
file != 'database_map':
|
||||
count += 1
|
||||
|
||||
gdb = self.restore_node(backup_dir, 'node', node, gdb=True,
|
||||
backup_id=last_backup_id, options=['--progress', '--incremental-mode=checksum'])
|
||||
gdb.verbose = False
|
||||
gdb.set_breakpoint('restore_non_data_file')
|
||||
gdb.run_until_break()
|
||||
gdb.continue_execution_until_break(count - 1)
|
||||
gdb.quit()
|
||||
|
||||
bak_file = os.path.join(node.data_dir, 'global', 'pg_control.pbk.bak')
|
||||
self.assertTrue(
|
||||
os.path.exists(bak_file),
|
||||
"pg_control bak File should not exist: {0}".format(bak_file))
|
||||
|
||||
try:
|
||||
node.slow_start()
|
||||
# we should die here because exception is what we expect to happen
|
||||
self.assertEqual(
|
||||
1, 0,
|
||||
"Expecting Error because backup is not fully restored")
|
||||
except StartNodeException as e:
|
||||
self.assertIn(
|
||||
'Cannot start node',
|
||||
e.message,
|
||||
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
|
||||
repr(e.message), self.cmd))
|
||||
|
||||
with open(os.path.join(node.logs_dir, 'postgresql.log'), 'r') as f:
|
||||
if self.pg_config_version >= 120000:
|
||||
self.assertIn(
|
||||
"PANIC: could not read file \"global/pg_control\"",
|
||||
f.read())
|
||||
else:
|
||||
self.assertIn(
|
||||
"PANIC: could not read from control file",
|
||||
f.read())
|
||||
self.restore_node(backup_dir, 'node', node,
|
||||
backup_id=last_backup_id, options=['--progress', '--incremental-mode=checksum'])
|
||||
node.slow_start()
|
||||
self.compare_pgdata(pgdata, self.pgdata_content(node.data_dir))
|
||||
|
@@ -3,11 +3,11 @@ import unittest
|
||||
from .helpers.ptrack_helpers import ProbackupTest, ProbackupException
|
||||
import subprocess
|
||||
import sys
|
||||
from time import sleep
|
||||
from datetime import datetime, timedelta, timezone
|
||||
import hashlib
|
||||
import shutil
|
||||
import json
|
||||
import stat
|
||||
from shutil import copyfile
|
||||
from testgres import QueryException, StartNodeException
|
||||
from stat import S_ISDIR
|
||||
@@ -3709,66 +3709,6 @@ class RestoreTest(ProbackupTest, unittest.TestCase):
|
||||
self.compare_pgdata(pgdata1, pgdata2)
|
||||
self.compare_pgdata(pgdata2, pgdata3)
|
||||
|
||||
# skip this test until https://github.com/postgrespro/pg_probackup/pull/399
|
||||
@unittest.skip("skip")
|
||||
def test_restore_issue_313(self):
|
||||
"""
|
||||
Check that partially restored PostgreSQL instance cannot be started
|
||||
"""
|
||||
backup_dir = os.path.join(self.tmp_path, self.module_name, self.fname, 'backup')
|
||||
node = self.make_simple_node(
|
||||
base_dir=os.path.join(self.module_name, self.fname, 'node'),
|
||||
set_replication=True,
|
||||
initdb_params=['--data-checksums'])
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'node', node)
|
||||
self.set_archiving(backup_dir, 'node', node)
|
||||
node.slow_start()
|
||||
|
||||
# FULL backup
|
||||
backup_id = self.backup_node(backup_dir, 'node', node)
|
||||
node.cleanup()
|
||||
|
||||
count = 0
|
||||
filelist = self.get_backup_filelist(backup_dir, 'node', backup_id)
|
||||
for file in filelist:
|
||||
# count only nondata files
|
||||
if int(filelist[file]['is_datafile']) == 0 and int(filelist[file]['size']) > 0:
|
||||
count += 1
|
||||
|
||||
node_restored = self.make_simple_node(
|
||||
base_dir=os.path.join(self.module_name, self.fname, 'node_restored'))
|
||||
node_restored.cleanup()
|
||||
self.restore_node(backup_dir, 'node', node_restored)
|
||||
|
||||
gdb = self.restore_node(backup_dir, 'node', node, gdb=True, options=['--progress'])
|
||||
gdb.verbose = False
|
||||
gdb.set_breakpoint('restore_non_data_file')
|
||||
gdb.run_until_break()
|
||||
gdb.continue_execution_until_break(count - 2)
|
||||
gdb.quit()
|
||||
|
||||
# emulate the user or HA taking care of PG configuration
|
||||
for fname in os.listdir(node_restored.data_dir):
|
||||
if fname.endswith('.conf'):
|
||||
os.rename(
|
||||
os.path.join(node_restored.data_dir, fname),
|
||||
os.path.join(node.data_dir, fname))
|
||||
|
||||
try:
|
||||
node.slow_start()
|
||||
# we should die here because exception is what we expect to happen
|
||||
self.assertEqual(
|
||||
1, 0,
|
||||
"Expecting Error because backup is not fully restored")
|
||||
except StartNodeException as e:
|
||||
self.assertIn(
|
||||
'Cannot start node',
|
||||
e.message,
|
||||
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
|
||||
repr(e.message), self.cmd))
|
||||
|
||||
# @unittest.skip("skip")
|
||||
def test_restore_with_waldir(self):
|
||||
"""recovery using tablespace-mapping option and page backup"""
|
||||
@@ -3833,8 +3773,6 @@ class RestoreTest(ProbackupTest, unittest.TestCase):
|
||||
self.add_instance(backup_dir, 'node', node)
|
||||
self.set_archiving(backup_dir, 'node', node)
|
||||
node.slow_start()
|
||||
|
||||
|
||||
node.pgbench_init(scale=2)
|
||||
|
||||
before1 = node.table_checksum("pgbench_branches")
|
||||
@@ -3850,8 +3788,6 @@ class RestoreTest(ProbackupTest, unittest.TestCase):
|
||||
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
|
||||
repr(self.output), self.cmd))
|
||||
|
||||
|
||||
|
||||
node.slow_start()
|
||||
pgbench = node.pgbench(
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||
@@ -3925,3 +3861,72 @@ class RestoreTest(ProbackupTest, unittest.TestCase):
|
||||
# check for the current updates
|
||||
after = node.table_checksum("pgbench_branches")
|
||||
self.assertEqual(before1, after)
|
||||
|
||||
def test_restore_issue_313(self):
|
||||
"""
|
||||
Check that partially restored PostgreSQL instance cannot be started
|
||||
"""
|
||||
self._check_gdb_flag_or_skip_test
|
||||
node = self.make_simple_node('node',
|
||||
set_replication=True,
|
||||
initdb_params=['--data-checksums'])
|
||||
|
||||
backup_dir = os.path.join(self.tmp_path, self.module_name, self.fname, 'backup')
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'node', node)
|
||||
self.set_archiving(backup_dir, 'node', node)
|
||||
node.slow_start()
|
||||
# FULL backup
|
||||
backup_id = self.backup_node(backup_dir, 'node', node)
|
||||
node.cleanup()
|
||||
|
||||
count = 0
|
||||
filelist = self.get_backup_filelist(backup_dir, 'node', backup_id)
|
||||
for file in filelist:
|
||||
# count only nondata files
|
||||
if int(filelist[file]['is_datafile']) == 0 and \
|
||||
not stat.S_ISDIR(int(filelist[file]['mode'])) and \
|
||||
not filelist[file]['size'] == '0' and \
|
||||
file != 'database_map':
|
||||
count += 1
|
||||
|
||||
node_restored = self.make_simple_node('node_restored')
|
||||
node_restored.cleanup()
|
||||
self.restore_node(backup_dir, 'node', node_restored)
|
||||
|
||||
gdb = self.restore_node(backup_dir, 'node', node, gdb=True, options=['--progress'])
|
||||
gdb.verbose = False
|
||||
gdb.set_breakpoint('restore_non_data_file')
|
||||
gdb.run_until_break()
|
||||
gdb.continue_execution_until_break(count - 1)
|
||||
gdb.quit()
|
||||
|
||||
# emulate the user or HA taking care of PG configuration
|
||||
for fname in os.listdir(node_restored.data_dir):
|
||||
if fname.endswith('.conf'):
|
||||
os.rename(
|
||||
os.path.join(node_restored.data_dir, fname),
|
||||
os.path.join(node.data_dir, fname))
|
||||
|
||||
try:
|
||||
node.slow_start()
|
||||
# we should die here because exception is what we expect to happen
|
||||
self.assertEqual(
|
||||
1, 0,
|
||||
"Expecting Error because backup is not fully restored")
|
||||
except StartNodeException as e:
|
||||
self.assertIn(
|
||||
'Cannot start node',
|
||||
e.message,
|
||||
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
|
||||
repr(e.message), self.cmd))
|
||||
|
||||
with open(os.path.join(node.logs_dir, 'postgresql.log'), 'r') as f:
|
||||
if self.pg_config_version >= 120000:
|
||||
self.assertIn(
|
||||
"PANIC: could not read file \"global/pg_control\"",
|
||||
f.read())
|
||||
else:
|
||||
self.assertIn(
|
||||
"PANIC: could not read from control file",
|
||||
f.read())
|
||||
|
Reference in New Issue
Block a user