1
0
mirror of https://github.com/postgrespro/pg_probackup.git synced 2025-02-13 14:58:35 +02:00

Merge branch 'master' into issue_92

This commit is contained in:
Grigory Smolkin 2019-07-03 21:11:39 +03:00
commit eac3d8a1d0
11 changed files with 239 additions and 77 deletions

View File

@ -107,7 +107,7 @@ Current version - 2.1.3
As compared to other backup solutions, pg_probackup offers the following benefits that can help you implement different backup strategies and deal with large amounts of data:
- Incremental backup: page-level incremental backup allows you save to disk space, speed up backup and restore. With three different incremental modes you can plan the backup strategy in accordance with your data flow
- Incremental backup: page-level incremental backup allows you to save disk space, speed up backup and restore. With three different incremental modes you can plan the backup strategy in accordance with your data flow
- Validation: Automatic data consistency checks and on-demand backup validation without actual data recovery
- Verification: On-demand verification of PostgreSQL instance via dedicated command `checkdb`
- Retention: Managing backups in accordance with retention policies - Time and/or Redundancy based, with two retention methods: `delete expired` and `merge expired`
@ -778,7 +778,7 @@ Specifies remote host user for SSH connection. If you omit this option, the curr
Specifies pg_probackup installation directory on the remote system.
--ssh-options
Specifies a string of SSH command-line options.
Specifies a string of SSH command-line options. For example, the following options can used to set keep-alive for ssh connections opened by pg_probackup: `--ssh-options='-o ServerAliveCountMax=5 -o ServerAliveInterval=60'`. Full list of possible options can be found here: (https://linux.die.net/man/5/ssh_config)[https://linux.die.net/man/5/ssh_config]
#### Replica Options

View File

@ -344,6 +344,9 @@ do_backup_instance(PGconn *backup_conn)
dir_list_file(backup_files_list, parray_get(external_dirs, i),
false, true, false, i+1, FIO_DB_HOST);
/* close ssh session in main thread */
fio_disconnect();
/* Sanity check for backup_files_list, thank you, Windows:
* https://github.com/postgrespro/pg_probackup/issues/48
*/
@ -512,6 +515,9 @@ do_backup_instance(PGconn *backup_conn)
parray_free(prev_backup_filelist);
}
/* Notify end of backup */
pg_stop_backup(&current, pg_startbackup_conn);
/* In case of backup from replica >= 9.6 we must fix minRecPoint,
* First we must find pg_control in backup_files_list.
*/
@ -532,13 +538,16 @@ do_backup_instance(PGconn *backup_conn)
break;
}
}
if (!pg_control)
elog(ERROR, "Failed to find file \"%s\" in backup filelist.",
pg_control_path);
set_min_recovery_point(pg_control, database_path, current.stop_lsn);
}
/* Notify end of backup */
pg_stop_backup(&current, pg_startbackup_conn);
if (current.from_replica && !exclusive_backup)
set_min_recovery_point(pg_control, database_path, current.stop_lsn);
/* close ssh session in main thread */
fio_disconnect();
/* Add archived xlog files into the list of files of this backup */
if (stream_wal)
@ -2143,6 +2152,9 @@ backup_files(void *arg)
elog(WARNING, "unexpected file type %d", buf.st_mode);
}
/* ssh connection to longer needed */
fio_disconnect();
/* Close connection */
if (arguments->conn_arg.conn)
pgut_disconnect(arguments->conn_arg.conn);

View File

@ -578,6 +578,7 @@ extern bool in_backup_list(parray *backup_list, pgBackup *target_backup);
extern int get_backup_index_number(parray *backup_list, pgBackup *backup);
extern bool launch_agent(void);
extern void launch_ssh(char* argv[]);
extern void wait_ssh(void);
#define COMPRESS_ALG_DEFAULT NOT_DEFINED_COMPRESS
#define COMPRESS_LEVEL_DEFAULT 1

View File

@ -333,6 +333,21 @@ int fio_open(char const* path, int mode, fio_location location)
return fd;
}
/* Close ssh session */
void
fio_disconnect(void)
{
if (fio_stdin)
{
SYS_CHECK(close(fio_stdin));
SYS_CHECK(close(fio_stdout));
fio_stdin = 0;
fio_stdout = 0;
wait_ssh();
}
}
/* Open stdio file */
FILE* fio_fopen(char const* path, char const* mode, fio_location location)
{
@ -340,14 +355,30 @@ FILE* fio_fopen(char const* path, char const* mode, fio_location location)
if (fio_is_remote(location))
{
int flags = O_RDWR|O_CREAT;
int flags = 0;
int fd;
if (strcmp(mode, PG_BINARY_W) == 0) {
flags |= O_TRUNC|PG_BINARY;
} else if (strncmp(mode, PG_BINARY_R, strlen(PG_BINARY_R)) == 0) {
flags |= PG_BINARY;
flags = O_TRUNC|PG_BINARY|O_RDWR|O_CREAT;
} else if (strcmp(mode, "w") == 0) {
flags = O_TRUNC|O_RDWR|O_CREAT;
} else if (strcmp(mode, PG_BINARY_R) == 0) {
flags = O_RDONLY|PG_BINARY;
} else if (strcmp(mode, "r") == 0) {
flags = O_RDONLY;
} else if (strcmp(mode, PG_BINARY_R "+") == 0) {
/* stdio fopen("rb+") actually doesn't create unexisted file, but probackup frequently
* needs to open existed file or create new one if not exists.
* In stdio it can be done using two fopen calls: fopen("r+") and if failed then fopen("w").
* But to eliminate extra call which especially critical in case of remote connection
* we change r+ semantic to create file if not exists.
*/
flags = O_RDWR|O_CREAT|PG_BINARY;
} else if (strcmp(mode, "r+") == 0) { /* see comment above */
flags |= O_RDWR|O_CREAT;
} else if (strcmp(mode, "a") == 0) {
flags |= O_APPEND;
flags |= O_CREAT|O_RDWR|O_APPEND;
} else {
Assert(false);
}
fd = fio_open(path, flags, location);
if (fd >= 0)

View File

@ -90,6 +90,7 @@ extern int fio_seek(int fd, off_t offs);
extern int fio_fstat(int fd, struct stat* st);
extern int fio_truncate(int fd, off_t size);
extern int fio_close(int fd);
extern void fio_disconnect(void);
extern int fio_rename(char const* old_path, char const* new_path, fio_location location);
extern int fio_symlink(char const* target, char const* link_path, fio_location location);

View File

@ -5,6 +5,12 @@
#include <sys/wait.h>
#include <signal.h>
#ifdef WIN32
#define __thread __declspec(thread)
#else
#include <pthread.h>
#endif
#include "pg_probackup.h"
#include "file.h"
@ -52,7 +58,8 @@ static int split_options(int argc, char* argv[], int max_options, char* options)
return argc;
}
static int child_pid;
static __thread int child_pid;
#if 0
static void kill_child(void)
{
@ -60,6 +67,14 @@ static void kill_child(void)
}
#endif
void wait_ssh(void)
{
int status;
waitpid(child_pid, &status, 0);
elog(LOG, "SSH process %d is terminated with status %d", child_pid, status);
}
#ifdef WIN32
void launch_ssh(char* argv[])
{

View File

@ -12,8 +12,9 @@ from . import init, merge, option, show, compatibility, \
def load_tests(loader, tests, pattern):
suite = unittest.TestSuite()
if os.environ['PG_PROBACKUP_TEST_BASIC'] == 'ON':
loader.testMethodPrefix = 'test_basic'
if 'PG_PROBACKUP_TEST_BASIC' in os.environ:
if os.environ['PG_PROBACKUP_TEST_BASIC'] == 'ON':
loader.testMethodPrefix = 'test_basic'
# suite.addTests(loader.loadTestsFromModule(auth_test))
suite.addTests(loader.loadTestsFromModule(archive))

View File

@ -452,22 +452,23 @@ class ArchiveTest(ProbackupTest, unittest.TestCase):
filename = filename_orig + '.partial'
file = os.path.join(wals_dir, filename)
# emulate stale .partial file
with open(file, 'a') as f:
f.write(b"blahblah")
f.flush()
f.close()
self.switch_wal_segment(node)
sleep(15)
sleep(20)
# check that segment is archived
if self.archive_compress:
filename_orig = filename_orig + '.gz'
file = os.path.join(wals_dir, filename_orig)
self.assertTrue(os.path.isfile(file))
# successful validate means that archive-push reused stale wal segment
self.validate_pb(
backup_dir, 'node',
options=['--recovery-target-xid={0}'.format(xid)])

View File

@ -469,16 +469,74 @@ class BackupTest(ProbackupTest, unittest.TestCase):
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
'WARNING: Corruption detected in file',
e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
self.assertIn(
'ERROR: Data file corruption',
e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
if self.remote:
self.assertTrue(
"ERROR: Failed to read file" in e.message and
"data file checksum mismatch" in e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
else:
self.assertIn(
'WARNING: Corruption detected in file',
e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
self.assertIn(
'ERROR: Data file corruption',
e.message,
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(e.message), self.cmd))
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_backup_truncate_misaligned(self):
"""
make node, truncate file to size not even to BLCKSIZE,
take backup
"""
fname = self.id().split('.')[3]
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'])
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
node.slow_start()
node.safe_psql(
"postgres",
"create table t_heap as select 1 as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,100000) i")
node.safe_psql(
"postgres",
"CHECKPOINT;")
heap_path = node.safe_psql(
"postgres",
"select pg_relation_filepath('t_heap')").rstrip()
heap_size = node.safe_psql(
"postgres",
"select pg_relation_size('t_heap')")
with open(os.path.join(node.data_dir, heap_path), "rb+", 0) as f:
f.truncate(int(heap_size) - 4096)
f.flush()
f.close
output = self.backup_node(
backup_dir, 'node', node, backup_type="full",
options=["-j", "4", "--stream"], return_id=False)
self.assertIn("WARNING: File", output)
self.assertIn("invalid file size", output)
# Clean after yourself
self.del_test_dir(module_name, fname)
@ -1450,16 +1508,16 @@ class BackupTest(ProbackupTest, unittest.TestCase):
node.safe_psql(
'backupdb',
"REVOKE TEMPORARY ON DATABASE backupdb FROM PUBLIC;"
"REVOKE ALL on SCHEMA public from PUBLIC; "
"REVOKE ALL ON DATABASE backupdb from PUBLIC; "
"REVOKE ALL ON SCHEMA public from PUBLIC; "
"REVOKE ALL ON ALL TABLES IN SCHEMA public FROM PUBLIC; "
"REVOKE ALL ON ALL FUNCTIONS IN SCHEMA public FROM PUBLIC; "
"REVOKE ALL ON ALL SEQUENCES IN SCHEMA public FROM PUBLIC; "
"REVOKE ALL on SCHEMA pg_catalog from PUBLIC; "
"REVOKE ALL ON SCHEMA pg_catalog from PUBLIC; "
"REVOKE ALL ON ALL TABLES IN SCHEMA pg_catalog FROM PUBLIC; "
"REVOKE ALL ON ALL FUNCTIONS IN SCHEMA pg_catalog FROM PUBLIC; "
"REVOKE ALL ON ALL SEQUENCES IN SCHEMA pg_catalog FROM PUBLIC; "
"REVOKE ALL on SCHEMA information_schema from PUBLIC; "
"REVOKE ALL ON SCHEMA information_schema from PUBLIC; "
"REVOKE ALL ON ALL TABLES IN SCHEMA information_schema FROM PUBLIC; "
"REVOKE ALL ON ALL FUNCTIONS IN SCHEMA information_schema FROM PUBLIC; "
"REVOKE ALL ON ALL SEQUENCES IN SCHEMA information_schema FROM PUBLIC; "

View File

@ -532,49 +532,3 @@ class CompatibilityTest(ProbackupTest, unittest.TestCase):
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_backup_concurrent_drop_table(self):
""""""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'])
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node, old_binary=True)
node.slow_start()
node.pgbench_init(scale=1)
# FULL backup
gdb = self.backup_node(
backup_dir, 'node', node,
options=['--stream', '--compress', '--log-level-file=VERBOSE'],
gdb=True, old_binary=True)
gdb.set_breakpoint('backup_data_file')
gdb.run_until_break()
node.safe_psql(
'postgres',
'DROP TABLE pgbench_accounts')
# do checkpoint to guarantee filenode removal
node.safe_psql(
'postgres',
'CHECKPOINT')
gdb.remove_all_breakpoints()
gdb.continue_execution_until_exit()
# show_backup = self.show_pb(backup_dir, 'node')[0]
# self.assertEqual(show_backup['status'], "OK")
# validate with fresh binary, it MUST be successful
self.validate_pb(backup_dir)
# Clean after yourself
self.del_test_dir(module_name, fname)

View File

@ -437,3 +437,91 @@ class ReplicaTest(ProbackupTest, unittest.TestCase):
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_replica_promote(self):
"""
start backup from replica, during backup promote replica
check that backup is failed
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
master = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'master'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
'archive_timeout': '10s',
'checkpoint_timeout': '30s',
'max_wal_size': '32MB'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
self.set_archiving(backup_dir, 'master', master)
master.slow_start()
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.backup_node(backup_dir, 'master', master)
master.psql(
"postgres",
"create table t_heap as select i as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,165000) i")
self.restore_node(
backup_dir, 'master', replica, options=['-R'])
# Settings for Replica
self.add_instance(backup_dir, 'replica', replica)
self.set_archiving(backup_dir, 'replica', replica, replica=True)
self.set_replica(
master, replica,
replica_name='replica', synchronous=True)
replica.slow_start(replica=True)
master.psql(
"postgres",
"create table t_heap_1 as select i as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,165000) i")
self.wait_until_replica_catch_with_master(master, replica)
# start backup from replica
gdb = self.backup_node(
backup_dir, 'replica', replica, gdb=True,
options=['--log-level-file=verbose'])
gdb.set_breakpoint('backup_data_file')
gdb.run_until_break()
gdb.continue_execution_until_break(20)
replica.promote()
gdb.remove_all_breakpoints()
gdb.continue_execution_until_exit()
backup_id = self.show_pb(
backup_dir, 'replica')[0]["id"]
# read log file content
with open(os.path.join(backup_dir, 'log', 'pg_probackup.log')) as f:
log_content = f.read()
f.close
self.assertIn(
'ERROR: the standby was promoted during online backup',
log_content)
self.assertIn(
'WARNING: Backup {0} is running, '
'setting its status to ERROR'.format(backup_id),
log_content)
# Clean after yourself
self.del_test_dir(module_name, fname)