mirror of
https://github.com/postgrespro/pg_probackup.git
synced 2025-02-13 14:58:35 +02:00
Merge branch 'master' into issue_92
This commit is contained in:
commit
eac3d8a1d0
@ -107,7 +107,7 @@ Current version - 2.1.3
|
||||
|
||||
As compared to other backup solutions, pg_probackup offers the following benefits that can help you implement different backup strategies and deal with large amounts of data:
|
||||
|
||||
- Incremental backup: page-level incremental backup allows you save to disk space, speed up backup and restore. With three different incremental modes you can plan the backup strategy in accordance with your data flow
|
||||
- Incremental backup: page-level incremental backup allows you to save disk space, speed up backup and restore. With three different incremental modes you can plan the backup strategy in accordance with your data flow
|
||||
- Validation: Automatic data consistency checks and on-demand backup validation without actual data recovery
|
||||
- Verification: On-demand verification of PostgreSQL instance via dedicated command `checkdb`
|
||||
- Retention: Managing backups in accordance with retention policies - Time and/or Redundancy based, with two retention methods: `delete expired` and `merge expired`
|
||||
@ -778,7 +778,7 @@ Specifies remote host user for SSH connection. If you omit this option, the curr
|
||||
Specifies pg_probackup installation directory on the remote system.
|
||||
|
||||
--ssh-options
|
||||
Specifies a string of SSH command-line options.
|
||||
Specifies a string of SSH command-line options. For example, the following options can used to set keep-alive for ssh connections opened by pg_probackup: `--ssh-options='-o ServerAliveCountMax=5 -o ServerAliveInterval=60'`. Full list of possible options can be found here: (https://linux.die.net/man/5/ssh_config)[https://linux.die.net/man/5/ssh_config]
|
||||
|
||||
#### Replica Options
|
||||
|
||||
|
22
src/backup.c
22
src/backup.c
@ -344,6 +344,9 @@ do_backup_instance(PGconn *backup_conn)
|
||||
dir_list_file(backup_files_list, parray_get(external_dirs, i),
|
||||
false, true, false, i+1, FIO_DB_HOST);
|
||||
|
||||
/* close ssh session in main thread */
|
||||
fio_disconnect();
|
||||
|
||||
/* Sanity check for backup_files_list, thank you, Windows:
|
||||
* https://github.com/postgrespro/pg_probackup/issues/48
|
||||
*/
|
||||
@ -512,6 +515,9 @@ do_backup_instance(PGconn *backup_conn)
|
||||
parray_free(prev_backup_filelist);
|
||||
}
|
||||
|
||||
/* Notify end of backup */
|
||||
pg_stop_backup(¤t, pg_startbackup_conn);
|
||||
|
||||
/* In case of backup from replica >= 9.6 we must fix minRecPoint,
|
||||
* First we must find pg_control in backup_files_list.
|
||||
*/
|
||||
@ -532,13 +538,16 @@ do_backup_instance(PGconn *backup_conn)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!pg_control)
|
||||
elog(ERROR, "Failed to find file \"%s\" in backup filelist.",
|
||||
pg_control_path);
|
||||
|
||||
set_min_recovery_point(pg_control, database_path, current.stop_lsn);
|
||||
}
|
||||
|
||||
/* Notify end of backup */
|
||||
pg_stop_backup(¤t, pg_startbackup_conn);
|
||||
|
||||
if (current.from_replica && !exclusive_backup)
|
||||
set_min_recovery_point(pg_control, database_path, current.stop_lsn);
|
||||
/* close ssh session in main thread */
|
||||
fio_disconnect();
|
||||
|
||||
/* Add archived xlog files into the list of files of this backup */
|
||||
if (stream_wal)
|
||||
@ -2143,6 +2152,9 @@ backup_files(void *arg)
|
||||
elog(WARNING, "unexpected file type %d", buf.st_mode);
|
||||
}
|
||||
|
||||
/* ssh connection to longer needed */
|
||||
fio_disconnect();
|
||||
|
||||
/* Close connection */
|
||||
if (arguments->conn_arg.conn)
|
||||
pgut_disconnect(arguments->conn_arg.conn);
|
||||
|
@ -578,6 +578,7 @@ extern bool in_backup_list(parray *backup_list, pgBackup *target_backup);
|
||||
extern int get_backup_index_number(parray *backup_list, pgBackup *backup);
|
||||
extern bool launch_agent(void);
|
||||
extern void launch_ssh(char* argv[]);
|
||||
extern void wait_ssh(void);
|
||||
|
||||
#define COMPRESS_ALG_DEFAULT NOT_DEFINED_COMPRESS
|
||||
#define COMPRESS_LEVEL_DEFAULT 1
|
||||
|
@ -333,6 +333,21 @@ int fio_open(char const* path, int mode, fio_location location)
|
||||
return fd;
|
||||
}
|
||||
|
||||
|
||||
/* Close ssh session */
|
||||
void
|
||||
fio_disconnect(void)
|
||||
{
|
||||
if (fio_stdin)
|
||||
{
|
||||
SYS_CHECK(close(fio_stdin));
|
||||
SYS_CHECK(close(fio_stdout));
|
||||
fio_stdin = 0;
|
||||
fio_stdout = 0;
|
||||
wait_ssh();
|
||||
}
|
||||
}
|
||||
|
||||
/* Open stdio file */
|
||||
FILE* fio_fopen(char const* path, char const* mode, fio_location location)
|
||||
{
|
||||
@ -340,14 +355,30 @@ FILE* fio_fopen(char const* path, char const* mode, fio_location location)
|
||||
|
||||
if (fio_is_remote(location))
|
||||
{
|
||||
int flags = O_RDWR|O_CREAT;
|
||||
int flags = 0;
|
||||
int fd;
|
||||
if (strcmp(mode, PG_BINARY_W) == 0) {
|
||||
flags |= O_TRUNC|PG_BINARY;
|
||||
} else if (strncmp(mode, PG_BINARY_R, strlen(PG_BINARY_R)) == 0) {
|
||||
flags |= PG_BINARY;
|
||||
flags = O_TRUNC|PG_BINARY|O_RDWR|O_CREAT;
|
||||
} else if (strcmp(mode, "w") == 0) {
|
||||
flags = O_TRUNC|O_RDWR|O_CREAT;
|
||||
} else if (strcmp(mode, PG_BINARY_R) == 0) {
|
||||
flags = O_RDONLY|PG_BINARY;
|
||||
} else if (strcmp(mode, "r") == 0) {
|
||||
flags = O_RDONLY;
|
||||
} else if (strcmp(mode, PG_BINARY_R "+") == 0) {
|
||||
/* stdio fopen("rb+") actually doesn't create unexisted file, but probackup frequently
|
||||
* needs to open existed file or create new one if not exists.
|
||||
* In stdio it can be done using two fopen calls: fopen("r+") and if failed then fopen("w").
|
||||
* But to eliminate extra call which especially critical in case of remote connection
|
||||
* we change r+ semantic to create file if not exists.
|
||||
*/
|
||||
flags = O_RDWR|O_CREAT|PG_BINARY;
|
||||
} else if (strcmp(mode, "r+") == 0) { /* see comment above */
|
||||
flags |= O_RDWR|O_CREAT;
|
||||
} else if (strcmp(mode, "a") == 0) {
|
||||
flags |= O_APPEND;
|
||||
flags |= O_CREAT|O_RDWR|O_APPEND;
|
||||
} else {
|
||||
Assert(false);
|
||||
}
|
||||
fd = fio_open(path, flags, location);
|
||||
if (fd >= 0)
|
||||
|
@ -90,6 +90,7 @@ extern int fio_seek(int fd, off_t offs);
|
||||
extern int fio_fstat(int fd, struct stat* st);
|
||||
extern int fio_truncate(int fd, off_t size);
|
||||
extern int fio_close(int fd);
|
||||
extern void fio_disconnect(void);
|
||||
|
||||
extern int fio_rename(char const* old_path, char const* new_path, fio_location location);
|
||||
extern int fio_symlink(char const* target, char const* link_path, fio_location location);
|
||||
|
@ -5,6 +5,12 @@
|
||||
#include <sys/wait.h>
|
||||
#include <signal.h>
|
||||
|
||||
#ifdef WIN32
|
||||
#define __thread __declspec(thread)
|
||||
#else
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
#include "pg_probackup.h"
|
||||
#include "file.h"
|
||||
|
||||
@ -52,7 +58,8 @@ static int split_options(int argc, char* argv[], int max_options, char* options)
|
||||
return argc;
|
||||
}
|
||||
|
||||
static int child_pid;
|
||||
static __thread int child_pid;
|
||||
|
||||
#if 0
|
||||
static void kill_child(void)
|
||||
{
|
||||
@ -60,6 +67,14 @@ static void kill_child(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void wait_ssh(void)
|
||||
{
|
||||
int status;
|
||||
waitpid(child_pid, &status, 0);
|
||||
elog(LOG, "SSH process %d is terminated with status %d", child_pid, status);
|
||||
}
|
||||
|
||||
#ifdef WIN32
|
||||
void launch_ssh(char* argv[])
|
||||
{
|
||||
|
@ -12,8 +12,9 @@ from . import init, merge, option, show, compatibility, \
|
||||
def load_tests(loader, tests, pattern):
|
||||
suite = unittest.TestSuite()
|
||||
|
||||
if os.environ['PG_PROBACKUP_TEST_BASIC'] == 'ON':
|
||||
loader.testMethodPrefix = 'test_basic'
|
||||
if 'PG_PROBACKUP_TEST_BASIC' in os.environ:
|
||||
if os.environ['PG_PROBACKUP_TEST_BASIC'] == 'ON':
|
||||
loader.testMethodPrefix = 'test_basic'
|
||||
|
||||
# suite.addTests(loader.loadTestsFromModule(auth_test))
|
||||
suite.addTests(loader.loadTestsFromModule(archive))
|
||||
|
@ -452,22 +452,23 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
|
||||
filename = filename_orig + '.partial'
|
||||
file = os.path.join(wals_dir, filename)
|
||||
|
||||
# emulate stale .partial file
|
||||
with open(file, 'a') as f:
|
||||
f.write(b"blahblah")
|
||||
f.flush()
|
||||
f.close()
|
||||
|
||||
self.switch_wal_segment(node)
|
||||
sleep(15)
|
||||
sleep(20)
|
||||
|
||||
# check that segment is archived
|
||||
if self.archive_compress:
|
||||
filename_orig = filename_orig + '.gz'
|
||||
|
||||
file = os.path.join(wals_dir, filename_orig)
|
||||
|
||||
self.assertTrue(os.path.isfile(file))
|
||||
|
||||
# successful validate means that archive-push reused stale wal segment
|
||||
self.validate_pb(
|
||||
backup_dir, 'node',
|
||||
options=['--recovery-target-xid={0}'.format(xid)])
|
||||
|
@ -469,16 +469,74 @@ class BackupTest(ProbackupTest, unittest.TestCase):
|
||||
"\n Output: {0} \n CMD: {1}".format(
|
||||
repr(self.output), self.cmd))
|
||||
except ProbackupException as e:
|
||||
self.assertIn(
|
||||
'WARNING: Corruption detected in file',
|
||||
e.message,
|
||||
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
|
||||
repr(e.message), self.cmd))
|
||||
self.assertIn(
|
||||
'ERROR: Data file corruption',
|
||||
e.message,
|
||||
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
|
||||
repr(e.message), self.cmd))
|
||||
if self.remote:
|
||||
self.assertTrue(
|
||||
"ERROR: Failed to read file" in e.message and
|
||||
"data file checksum mismatch" in e.message,
|
||||
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
|
||||
repr(e.message), self.cmd))
|
||||
else:
|
||||
self.assertIn(
|
||||
'WARNING: Corruption detected in file',
|
||||
e.message,
|
||||
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
|
||||
repr(e.message), self.cmd))
|
||||
self.assertIn(
|
||||
'ERROR: Data file corruption',
|
||||
e.message,
|
||||
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
|
||||
repr(e.message), self.cmd))
|
||||
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
# @unittest.skip("skip")
|
||||
def test_backup_truncate_misaligned(self):
|
||||
"""
|
||||
make node, truncate file to size not even to BLCKSIZE,
|
||||
take backup
|
||||
"""
|
||||
fname = self.id().split('.')[3]
|
||||
node = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'node'),
|
||||
set_replication=True,
|
||||
initdb_params=['--data-checksums'])
|
||||
|
||||
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'node', node)
|
||||
node.slow_start()
|
||||
|
||||
node.safe_psql(
|
||||
"postgres",
|
||||
"create table t_heap as select 1 as id, md5(i::text) as text, "
|
||||
"md5(repeat(i::text,10))::tsvector as tsvector "
|
||||
"from generate_series(0,100000) i")
|
||||
|
||||
node.safe_psql(
|
||||
"postgres",
|
||||
"CHECKPOINT;")
|
||||
|
||||
heap_path = node.safe_psql(
|
||||
"postgres",
|
||||
"select pg_relation_filepath('t_heap')").rstrip()
|
||||
|
||||
heap_size = node.safe_psql(
|
||||
"postgres",
|
||||
"select pg_relation_size('t_heap')")
|
||||
|
||||
with open(os.path.join(node.data_dir, heap_path), "rb+", 0) as f:
|
||||
f.truncate(int(heap_size) - 4096)
|
||||
f.flush()
|
||||
f.close
|
||||
|
||||
output = self.backup_node(
|
||||
backup_dir, 'node', node, backup_type="full",
|
||||
options=["-j", "4", "--stream"], return_id=False)
|
||||
|
||||
self.assertIn("WARNING: File", output)
|
||||
self.assertIn("invalid file size", output)
|
||||
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
@ -1450,16 +1508,16 @@ class BackupTest(ProbackupTest, unittest.TestCase):
|
||||
|
||||
node.safe_psql(
|
||||
'backupdb',
|
||||
"REVOKE TEMPORARY ON DATABASE backupdb FROM PUBLIC;"
|
||||
"REVOKE ALL on SCHEMA public from PUBLIC; "
|
||||
"REVOKE ALL ON DATABASE backupdb from PUBLIC; "
|
||||
"REVOKE ALL ON SCHEMA public from PUBLIC; "
|
||||
"REVOKE ALL ON ALL TABLES IN SCHEMA public FROM PUBLIC; "
|
||||
"REVOKE ALL ON ALL FUNCTIONS IN SCHEMA public FROM PUBLIC; "
|
||||
"REVOKE ALL ON ALL SEQUENCES IN SCHEMA public FROM PUBLIC; "
|
||||
"REVOKE ALL on SCHEMA pg_catalog from PUBLIC; "
|
||||
"REVOKE ALL ON SCHEMA pg_catalog from PUBLIC; "
|
||||
"REVOKE ALL ON ALL TABLES IN SCHEMA pg_catalog FROM PUBLIC; "
|
||||
"REVOKE ALL ON ALL FUNCTIONS IN SCHEMA pg_catalog FROM PUBLIC; "
|
||||
"REVOKE ALL ON ALL SEQUENCES IN SCHEMA pg_catalog FROM PUBLIC; "
|
||||
"REVOKE ALL on SCHEMA information_schema from PUBLIC; "
|
||||
"REVOKE ALL ON SCHEMA information_schema from PUBLIC; "
|
||||
"REVOKE ALL ON ALL TABLES IN SCHEMA information_schema FROM PUBLIC; "
|
||||
"REVOKE ALL ON ALL FUNCTIONS IN SCHEMA information_schema FROM PUBLIC; "
|
||||
"REVOKE ALL ON ALL SEQUENCES IN SCHEMA information_schema FROM PUBLIC; "
|
||||
|
@ -532,49 +532,3 @@ class CompatibilityTest(ProbackupTest, unittest.TestCase):
|
||||
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
# @unittest.skip("skip")
|
||||
def test_backup_concurrent_drop_table(self):
|
||||
""""""
|
||||
fname = self.id().split('.')[3]
|
||||
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||
node = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'node'),
|
||||
set_replication=True,
|
||||
initdb_params=['--data-checksums'])
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'node', node, old_binary=True)
|
||||
node.slow_start()
|
||||
|
||||
node.pgbench_init(scale=1)
|
||||
|
||||
# FULL backup
|
||||
gdb = self.backup_node(
|
||||
backup_dir, 'node', node,
|
||||
options=['--stream', '--compress', '--log-level-file=VERBOSE'],
|
||||
gdb=True, old_binary=True)
|
||||
|
||||
gdb.set_breakpoint('backup_data_file')
|
||||
gdb.run_until_break()
|
||||
|
||||
node.safe_psql(
|
||||
'postgres',
|
||||
'DROP TABLE pgbench_accounts')
|
||||
|
||||
# do checkpoint to guarantee filenode removal
|
||||
node.safe_psql(
|
||||
'postgres',
|
||||
'CHECKPOINT')
|
||||
|
||||
gdb.remove_all_breakpoints()
|
||||
gdb.continue_execution_until_exit()
|
||||
|
||||
# show_backup = self.show_pb(backup_dir, 'node')[0]
|
||||
# self.assertEqual(show_backup['status'], "OK")
|
||||
|
||||
# validate with fresh binary, it MUST be successful
|
||||
self.validate_pb(backup_dir)
|
||||
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
@ -437,3 +437,91 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
|
||||
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
||||
# @unittest.skip("skip")
|
||||
def test_replica_promote(self):
|
||||
"""
|
||||
start backup from replica, during backup promote replica
|
||||
check that backup is failed
|
||||
"""
|
||||
fname = self.id().split('.')[3]
|
||||
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
|
||||
master = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'master'),
|
||||
set_replication=True,
|
||||
initdb_params=['--data-checksums'],
|
||||
pg_options={
|
||||
'archive_timeout': '10s',
|
||||
'checkpoint_timeout': '30s',
|
||||
'max_wal_size': '32MB'})
|
||||
|
||||
self.init_pb(backup_dir)
|
||||
self.add_instance(backup_dir, 'master', master)
|
||||
self.set_archiving(backup_dir, 'master', master)
|
||||
master.slow_start()
|
||||
|
||||
replica = self.make_simple_node(
|
||||
base_dir=os.path.join(module_name, fname, 'replica'))
|
||||
replica.cleanup()
|
||||
|
||||
self.backup_node(backup_dir, 'master', master)
|
||||
|
||||
master.psql(
|
||||
"postgres",
|
||||
"create table t_heap as select i as id, md5(i::text) as text, "
|
||||
"md5(repeat(i::text,10))::tsvector as tsvector "
|
||||
"from generate_series(0,165000) i")
|
||||
|
||||
self.restore_node(
|
||||
backup_dir, 'master', replica, options=['-R'])
|
||||
|
||||
# Settings for Replica
|
||||
self.add_instance(backup_dir, 'replica', replica)
|
||||
self.set_archiving(backup_dir, 'replica', replica, replica=True)
|
||||
self.set_replica(
|
||||
master, replica,
|
||||
replica_name='replica', synchronous=True)
|
||||
|
||||
replica.slow_start(replica=True)
|
||||
|
||||
master.psql(
|
||||
"postgres",
|
||||
"create table t_heap_1 as select i as id, md5(i::text) as text, "
|
||||
"md5(repeat(i::text,10))::tsvector as tsvector "
|
||||
"from generate_series(0,165000) i")
|
||||
|
||||
self.wait_until_replica_catch_with_master(master, replica)
|
||||
|
||||
# start backup from replica
|
||||
gdb = self.backup_node(
|
||||
backup_dir, 'replica', replica, gdb=True,
|
||||
options=['--log-level-file=verbose'])
|
||||
|
||||
gdb.set_breakpoint('backup_data_file')
|
||||
gdb.run_until_break()
|
||||
gdb.continue_execution_until_break(20)
|
||||
|
||||
replica.promote()
|
||||
|
||||
gdb.remove_all_breakpoints()
|
||||
gdb.continue_execution_until_exit()
|
||||
|
||||
backup_id = self.show_pb(
|
||||
backup_dir, 'replica')[0]["id"]
|
||||
|
||||
# read log file content
|
||||
with open(os.path.join(backup_dir, 'log', 'pg_probackup.log')) as f:
|
||||
log_content = f.read()
|
||||
f.close
|
||||
|
||||
self.assertIn(
|
||||
'ERROR: the standby was promoted during online backup',
|
||||
log_content)
|
||||
|
||||
self.assertIn(
|
||||
'WARNING: Backup {0} is running, '
|
||||
'setting its status to ERROR'.format(backup_id),
|
||||
log_content)
|
||||
|
||||
# Clean after yourself
|
||||
self.del_test_dir(module_name, fname)
|
||||
|
Loading…
x
Reference in New Issue
Block a user