1
0
mirror of https://github.com/postgrespro/pg_probackup.git synced 2024-11-24 08:52:38 +02:00
pg_probackup/tests/replica.py

1039 lines
35 KiB
Python
Raw Normal View History

2017-05-17 11:46:38 +02:00
import os
2017-06-27 11:43:45 +02:00
import unittest
2017-06-27 07:42:52 +02:00
from .helpers.ptrack_helpers import ProbackupTest, ProbackupException, idx_ptrack
2017-05-17 11:46:38 +02:00
from datetime import datetime, timedelta
import subprocess
import time
2019-09-05 08:30:45 +02:00
from distutils.dir_util import copy_tree
from testgres import ProcessType
from time import sleep
2017-05-17 11:46:38 +02:00
2017-07-12 16:28:28 +02:00
module_name = 'replica'
2017-05-17 11:46:38 +02:00
2017-07-12 16:28:28 +02:00
class ReplicaTest(ProbackupTest, unittest.TestCase):
2017-05-17 11:46:38 +02:00
# @unittest.skip("skip")
# @unittest.expectedFailure
2017-07-12 16:28:28 +02:00
def test_replica_stream_ptrack_backup(self):
"""
make node, take full backup, restore it and make replica from it,
take full stream backup from replica
"""
if not self.ptrack:
return unittest.skip('Skipped because ptrack support is disabled')
2017-05-17 11:46:38 +02:00
fname = self.id().split('.')[3]
2017-07-12 16:28:28 +02:00
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
master = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'master'),
2017-05-17 11:46:38 +02:00
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
2019-04-22 19:52:00 +02:00
'ptrack_enable': 'on'})
2018-12-25 16:48:49 +02:00
master.slow_start()
2017-06-07 16:52:07 +02:00
self.init_pb(backup_dir)
2017-06-20 12:57:23 +02:00
self.add_instance(backup_dir, 'master', master)
2017-06-07 16:52:07 +02:00
2017-07-12 16:28:28 +02:00
# CREATE TABLE
2017-05-17 11:46:38 +02:00
master.psql(
"postgres",
"create table t_heap as select i as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,256) i")
2017-07-12 16:28:28 +02:00
before = master.safe_psql("postgres", "SELECT * FROM t_heap")
2017-05-17 11:46:38 +02:00
2017-07-12 16:28:28 +02:00
# take full backup and restore it
2017-06-20 12:57:23 +02:00
self.backup_node(backup_dir, 'master', master, options=['--stream'])
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
2017-09-28 09:32:06 +02:00
replica.cleanup()
2017-07-12 16:28:28 +02:00
self.restore_node(backup_dir, 'master', replica)
self.set_replica(master, replica)
2017-05-17 11:46:38 +02:00
2017-07-12 16:28:28 +02:00
# Check data correctness on replica
replica.slow_start(replica=True)
2017-07-12 16:28:28 +02:00
after = replica.safe_psql("postgres", "SELECT * FROM t_heap")
self.assertEqual(before, after)
2017-05-17 11:46:38 +02:00
# Change data on master, take FULL backup from replica,
# restore taken backup and check that restored data equal
# to original data
2017-07-12 16:28:28 +02:00
master.psql(
"postgres",
"insert into t_heap as select i as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(256,512) i")
2017-07-12 16:28:28 +02:00
before = master.safe_psql("postgres", "SELECT * FROM t_heap")
self.add_instance(backup_dir, 'replica', replica)
2018-11-11 20:53:00 +02:00
backup_id = self.backup_node(
backup_dir, 'replica', replica,
options=[
'--stream',
'--master-host=localhost',
'--master-db=postgres',
'--master-port={0}'.format(master.port)])
2017-07-12 16:28:28 +02:00
self.validate_pb(backup_dir, 'replica')
self.assertEqual(
'OK', self.show_pb(backup_dir, 'replica', backup_id)['status'])
2017-07-12 16:28:28 +02:00
# RESTORE FULL BACKUP TAKEN FROM PREVIOUS STEP
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'))
2017-07-12 16:28:28 +02:00
node.cleanup()
self.restore_node(backup_dir, 'replica', data_dir=node.data_dir)
2018-11-11 20:53:00 +02:00
node.append_conf(
'postgresql.auto.conf', 'port = {0}'.format(node.port))
node.slow_start()
2018-11-11 20:53:00 +02:00
2017-07-12 16:28:28 +02:00
# CHECK DATA CORRECTNESS
after = node.safe_psql("postgres", "SELECT * FROM t_heap")
2017-05-17 11:46:38 +02:00
self.assertEqual(before, after)
# Change data on master, take PTRACK backup from replica,
# restore taken backup and check that restored data equal
# to original data
2017-07-12 16:28:28 +02:00
master.psql(
"postgres",
"insert into t_heap as select i as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(512,768) i")
2018-11-11 20:53:00 +02:00
2017-07-12 16:28:28 +02:00
before = master.safe_psql("postgres", "SELECT * FROM t_heap")
2018-11-11 20:53:00 +02:00
backup_id = self.backup_node(
backup_dir, 'replica', replica, backup_type='ptrack',
options=[
'--stream',
'--master-host=localhost',
'--master-db=postgres',
'--master-port={0}'.format(master.port)])
2017-07-12 16:28:28 +02:00
self.validate_pb(backup_dir, 'replica')
self.assertEqual(
'OK', self.show_pb(backup_dir, 'replica', backup_id)['status'])
2017-07-12 16:28:28 +02:00
# RESTORE PTRACK BACKUP TAKEN FROM replica
node.cleanup()
self.restore_node(
backup_dir, 'replica', data_dir=node.data_dir, backup_id=backup_id)
2018-11-11 20:53:00 +02:00
node.append_conf(
'postgresql.auto.conf', 'port = {0}'.format(node.port))
node.slow_start()
2018-11-11 20:53:00 +02:00
2017-07-12 16:28:28 +02:00
# CHECK DATA CORRECTNESS
after = node.safe_psql("postgres", "SELECT * FROM t_heap")
self.assertEqual(before, after)
2017-06-27 07:42:52 +02:00
# Clean after yourself
2017-07-12 16:28:28 +02:00
self.del_test_dir(module_name, fname)
2017-06-27 07:42:52 +02:00
2017-06-20 12:57:23 +02:00
# @unittest.skip("skip")
2017-07-12 16:28:28 +02:00
def test_replica_archive_page_backup(self):
"""
make archive master, take full and page archive backups from master,
set replica, make archive backup from replica
"""
fname = self.id().split('.')[3]
2017-07-12 16:28:28 +02:00
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
master = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'master'),
2017-09-28 09:32:06 +02:00
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
'archive_timeout': '10s',
'checkpoint_timeout': '30s',
2019-04-20 11:42:17 +02:00
'max_wal_size': '32MB'})
2017-06-20 12:57:23 +02:00
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
self.set_archiving(backup_dir, 'master', master)
master.slow_start()
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
2017-07-12 16:28:28 +02:00
replica.cleanup()
2017-06-20 12:57:23 +02:00
self.backup_node(backup_dir, 'master', master)
master.psql(
"postgres",
"create table t_heap as select i as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
2018-11-12 10:51:58 +02:00
"from generate_series(0,2560) i")
2017-07-12 16:28:28 +02:00
before = master.safe_psql("postgres", "SELECT * FROM t_heap")
backup_id = self.backup_node(
backup_dir, 'master', master, backup_type='page')
2017-07-12 16:28:28 +02:00
self.restore_node(backup_dir, 'master', replica)
# Settings for Replica
2018-11-16 08:35:41 +02:00
self.add_instance(backup_dir, 'replica', replica)
self.set_replica(master, replica, synchronous=True)
2017-07-12 16:28:28 +02:00
self.set_archiving(backup_dir, 'replica', replica, replica=True)
2018-11-12 10:51:58 +02:00
replica.slow_start(replica=True)
2017-07-12 16:28:28 +02:00
# Check data correctness on replica
after = replica.safe_psql("postgres", "SELECT * FROM t_heap")
self.assertEqual(before, after)
# Change data on master, take FULL backup from replica,
# restore taken backup and check that restored data
# equal to original data
2017-07-12 16:28:28 +02:00
master.psql(
"postgres",
"insert into t_heap as select i as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
2018-11-16 08:35:41 +02:00
"from generate_series(256,25120) i")
2018-11-11 20:53:00 +02:00
2017-07-12 16:28:28 +02:00
before = master.safe_psql("postgres", "SELECT * FROM t_heap")
2018-11-11 20:53:00 +02:00
2018-11-16 08:35:41 +02:00
master.psql(
"postgres",
"CHECKPOINT")
2018-11-12 10:51:58 +02:00
2018-11-16 08:35:41 +02:00
self.wait_until_replica_catch_with_master(master, replica)
2018-11-12 10:51:58 +02:00
backup_id = self.backup_node(
backup_dir, 'replica', replica,
options=[
2018-11-16 08:35:41 +02:00
'--archive-timeout=60',
'--master-host=localhost',
'--master-db=postgres',
2018-11-16 08:35:41 +02:00
'--master-port={0}'.format(master.port)])
2018-11-12 10:51:58 +02:00
2017-07-12 16:28:28 +02:00
self.validate_pb(backup_dir, 'replica')
self.assertEqual(
'OK', self.show_pb(backup_dir, 'replica', backup_id)['status'])
2017-07-12 16:28:28 +02:00
# RESTORE FULL BACKUP TAKEN FROM replica
node = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'node'))
2017-07-12 16:28:28 +02:00
node.cleanup()
self.restore_node(backup_dir, 'replica', data_dir=node.data_dir)
2018-11-16 08:35:41 +02:00
node.append_conf(
'postgresql.auto.conf', 'port = {0}'.format(node.port))
2018-11-16 08:35:41 +02:00
node.append_conf(
'postgresql.auto.conf', 'archive_mode = off'.format(node.port))
node.slow_start()
2018-11-11 20:53:00 +02:00
2017-07-12 16:28:28 +02:00
# CHECK DATA CORRECTNESS
after = node.safe_psql("postgres", "SELECT * FROM t_heap")
self.assertEqual(before, after)
2018-11-11 20:53:00 +02:00
node.cleanup()
2017-07-12 16:28:28 +02:00
# Change data on master, make PAGE backup from replica,
# restore taken backup and check that restored data equal
# to original data
2018-11-16 08:35:41 +02:00
master.pgbench_init(scale=5)
2018-11-11 20:53:00 +02:00
2018-11-16 08:35:41 +02:00
pgbench = master.pgbench(
options=['-T', '30', '-c', '2', '--no-vacuum'])
backup_id = self.backup_node(
2018-11-11 20:53:00 +02:00
backup_dir, 'replica',
replica, backup_type='page',
options=[
2018-11-16 08:35:41 +02:00
'--archive-timeout=60',
'--master-host=localhost',
'--master-db=postgres',
2018-11-16 08:35:41 +02:00
'--master-port={0}'.format(master.port)])
pgbench.wait()
self.switch_wal_segment(master)
before = master.safe_psql("postgres", "SELECT * FROM pgbench_accounts")
2018-11-11 20:53:00 +02:00
2017-07-12 16:28:28 +02:00
self.validate_pb(backup_dir, 'replica')
self.assertEqual(
'OK', self.show_pb(backup_dir, 'replica', backup_id)['status'])
2017-07-12 16:28:28 +02:00
# RESTORE PAGE BACKUP TAKEN FROM replica
self.restore_node(
2018-11-16 08:35:41 +02:00
backup_dir, 'replica', data_dir=node.data_dir,
backup_id=backup_id)
2018-11-11 20:53:00 +02:00
node.append_conf(
'postgresql.auto.conf', 'port = {0}'.format(node.port))
2018-11-16 08:35:41 +02:00
2018-11-11 20:53:00 +02:00
node.append_conf(
'postgresql.auto.conf', 'archive_mode = off')
2018-11-16 08:35:41 +02:00
node.slow_start()
2018-11-11 20:53:00 +02:00
2017-07-12 16:28:28 +02:00
# CHECK DATA CORRECTNESS
2018-11-16 08:35:41 +02:00
after = node.safe_psql("postgres", "SELECT * FROM pgbench_accounts")
self.assertEqual(
before, after, 'Restored data is not equal to original')
2017-06-27 07:42:52 +02:00
2018-11-11 20:53:00 +02:00
self.add_instance(backup_dir, 'node', node)
self.backup_node(
backup_dir, 'node', node, options=['--stream'])
2017-06-27 07:42:52 +02:00
# Clean after yourself
2017-07-12 16:28:28 +02:00
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
2019-05-28 11:41:03 +02:00
def test_basic_make_replica_via_restore(self):
"""
make archive master, take full and page archive backups from master,
set replica, make archive backup from replica
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
master = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'master'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
2019-04-22 19:52:00 +02:00
'archive_timeout': '10s'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
self.set_archiving(backup_dir, 'master', master)
# force more frequent wal switch
master.append_conf('postgresql.auto.conf', 'archive_timeout = 10')
master.slow_start()
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.backup_node(backup_dir, 'master', master)
master.psql(
"postgres",
"create table t_heap as select i as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
2018-11-16 08:35:41 +02:00
"from generate_series(0,8192) i")
before = master.safe_psql("postgres", "SELECT * FROM t_heap")
backup_id = self.backup_node(
backup_dir, 'master', master, backup_type='page')
self.restore_node(
2018-11-11 20:53:00 +02:00
backup_dir, 'master', replica, options=['-R'])
# Settings for Replica
2018-11-16 08:35:41 +02:00
self.add_instance(backup_dir, 'replica', replica)
self.set_archiving(backup_dir, 'replica', replica, replica=True)
replica.append_conf(
'postgresql.auto.conf', 'port = {0}'.format(replica.port))
2018-11-11 20:53:00 +02:00
replica.append_conf(
'postgresql.auto.conf', 'hot_standby = on')
replica.slow_start(replica=True)
2018-11-16 08:35:41 +02:00
self.backup_node(
backup_dir, 'replica', replica,
options=['--archive-timeout=30s', '--stream'])
# Clean after yourself
self.del_test_dir(module_name, fname)
2018-11-11 20:53:00 +02:00
# @unittest.skip("skip")
def test_take_backup_from_delayed_replica(self):
"""
make archive master, take full backups from master,
restore full backup as delayed replica, launch pgbench,
take FULL, PAGE and DELTA backups from replica
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
master = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'master'),
2018-11-11 20:53:00 +02:00
set_replication=True,
initdb_params=['--data-checksums'],
2019-09-05 08:30:45 +02:00
pg_options={'archive_timeout': '10s'})
2019-04-22 19:52:00 +02:00
2018-11-11 20:53:00 +02:00
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
self.set_archiving(backup_dir, 'master', master)
master.slow_start()
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
2018-11-11 20:53:00 +02:00
replica.cleanup()
self.backup_node(backup_dir, 'master', master)
2018-11-16 08:35:41 +02:00
master.psql(
"postgres",
"create table t_heap as select i as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,165000) i")
master.psql(
"postgres",
"CHECKPOINT")
master.psql(
"postgres",
"create table t_heap_1 as select i as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,165000) i")
2018-11-11 20:53:00 +02:00
self.restore_node(
backup_dir, 'master', replica, options=['-R'])
# Settings for Replica
self.add_instance(backup_dir, 'replica', replica)
self.set_archiving(backup_dir, 'replica', replica, replica=True)
replica.append_conf(
'postgresql.auto.conf', 'port = {0}'.format(replica.port))
2018-11-16 08:35:41 +02:00
replica.slow_start(replica=True)
self.wait_until_replica_catch_with_master(master, replica)
2018-11-11 20:53:00 +02:00
replica.append_conf(
'recovery.conf', "recovery_min_apply_delay = '300s'")
replica.stop()
replica.slow_start(replica=True)
2018-11-11 20:53:00 +02:00
master.pgbench_init(scale=10)
pgbench = master.pgbench(
2018-11-16 08:35:41 +02:00
options=['-T', '60', '-c', '2', '--no-vacuum'])
2018-11-11 20:53:00 +02:00
self.backup_node(
2018-11-16 08:35:41 +02:00
backup_dir, 'replica',
replica, options=['--archive-timeout=60s'])
2018-11-11 20:53:00 +02:00
self.backup_node(
backup_dir, 'replica', replica,
2018-11-16 08:35:41 +02:00
data_dir=replica.data_dir,
backup_type='page', options=['--archive-timeout=60s'])
2018-11-11 20:53:00 +02:00
self.backup_node(
2018-11-16 08:35:41 +02:00
backup_dir, 'replica', replica,
backup_type='delta', options=['--archive-timeout=60s'])
2018-11-11 20:53:00 +02:00
pgbench.wait()
pgbench = master.pgbench(
options=['-T', '30', '-c', '2', '--no-vacuum'])
self.backup_node(
backup_dir, 'replica', replica,
options=['--stream'])
self.backup_node(
backup_dir, 'replica', replica,
backup_type='page', options=['--stream'])
self.backup_node(
backup_dir, 'replica', replica,
backup_type='delta', options=['--stream'])
pgbench.wait()
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_replica_promote(self):
"""
start backup from replica, during backup promote replica
check that backup is failed
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
master = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'master'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
'archive_timeout': '10s',
'checkpoint_timeout': '30s',
'max_wal_size': '32MB'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
self.set_archiving(backup_dir, 'master', master)
master.slow_start()
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.backup_node(backup_dir, 'master', master)
master.psql(
"postgres",
"create table t_heap as select i as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,165000) i")
self.restore_node(
backup_dir, 'master', replica, options=['-R'])
# Settings for Replica
self.add_instance(backup_dir, 'replica', replica)
self.set_archiving(backup_dir, 'replica', replica, replica=True)
self.set_replica(
master, replica,
replica_name='replica', synchronous=True)
replica.slow_start(replica=True)
master.psql(
"postgres",
"create table t_heap_1 as select i as id, md5(i::text) as text, "
"md5(repeat(i::text,10))::tsvector as tsvector "
"from generate_series(0,165000) i")
self.wait_until_replica_catch_with_master(master, replica)
# start backup from replica
gdb = self.backup_node(
backup_dir, 'replica', replica, gdb=True,
options=['--log-level-file=verbose'])
gdb.set_breakpoint('backup_data_file')
gdb.run_until_break()
gdb.continue_execution_until_break(20)
replica.promote()
gdb.remove_all_breakpoints()
gdb.continue_execution_until_exit()
backup_id = self.show_pb(
backup_dir, 'replica')[0]["id"]
# read log file content
with open(os.path.join(backup_dir, 'log', 'pg_probackup.log')) as f:
log_content = f.read()
f.close
self.assertIn(
'ERROR: the standby was promoted during online backup',
log_content)
self.assertIn(
'WARNING: Backup {0} is running, '
'setting its status to ERROR'.format(backup_id),
log_content)
# Clean after yourself
self.del_test_dir(module_name, fname)
2019-09-05 08:30:45 +02:00
# @unittest.skip("skip")
def test_replica_stop_lsn_null_offset(self):
"""
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
master = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'master'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
'checkpoint_timeout': '1h',
'wal_level': 'replica'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
self.set_archiving(backup_dir, 'master', master)
master.slow_start()
# freeze bgwriter to get rid of RUNNING XACTS records
bgwriter_pid = master.auxiliary_pids[ProcessType.BackgroundWriter][0]
gdb_checkpointer = self.gdb_attach(bgwriter_pid)
self.backup_node(backup_dir, 'master', master)
# Create replica
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(backup_dir, 'master', replica)
# Settings for Replica
self.add_instance(backup_dir, 'replica', replica)
self.set_replica(master, replica, synchronous=True)
self.set_archiving(backup_dir, 'replica', replica, replica=True)
copy_tree(
os.path.join(backup_dir, 'wal', 'master'),
os.path.join(backup_dir, 'wal', 'replica'))
replica.slow_start(replica=True)
self.switch_wal_segment(master)
self.switch_wal_segment(master)
output = self.backup_node(
backup_dir, 'replica', replica,
options=[
'--archive-timeout=30',
'--log-level-console=verbose',
'--no-validate',
'--stream'],
return_id=False)
self.assertIn(
'LOG: Null offset in stop_backup_lsn value 0/3000000',
2019-09-05 08:30:45 +02:00
output)
self.assertIn(
'WARNING: WAL segment 000000010000000000000003 could not be streamed in 30 seconds',
output)
self.assertIn(
'WARNING: Failed to get next WAL record after 0/3000000, looking for previous WAL record',
output)
self.assertIn(
'LOG: Looking for LSN 0/3000000 in segment: 000000010000000000000002',
output)
self.assertIn(
'LOG: Record 0/2000160 has endpoint 0/3000000 which is '
'equal or greater than requested LSN 0/3000000',
output)
self.assertIn(
'LOG: Found prior LSN: 0/2000160',
output)
self.assertIn(
'LOG: current.stop_lsn: 0/2000160',
output)
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_replica_stop_lsn_null_offset_next_record(self):
"""
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
master = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'master'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
'checkpoint_timeout': '1h',
'wal_level': 'replica'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
self.set_archiving(backup_dir, 'master', master)
master.slow_start()
# freeze bgwriter to get rid of RUNNING XACTS records
bgwriter_pid = master.auxiliary_pids[ProcessType.BackgroundWriter][0]
gdb_checkpointer = self.gdb_attach(bgwriter_pid)
self.backup_node(backup_dir, 'master', master)
# Create replica
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(backup_dir, 'master', replica)
# Settings for Replica
self.add_instance(backup_dir, 'replica', replica)
self.set_replica(master, replica, synchronous=True)
self.set_archiving(backup_dir, 'replica', replica, replica=True)
copy_tree(
os.path.join(backup_dir, 'wal', 'master'),
os.path.join(backup_dir, 'wal', 'replica'))
replica.slow_start(replica=True)
self.switch_wal_segment(master)
self.switch_wal_segment(master)
# open connection to master
conn = master.connect()
gdb = self.backup_node(
backup_dir, 'replica', replica,
options=[
'--archive-timeout=40',
'--log-level-file=verbose',
'--no-validate',
'--stream'],
gdb=True)
gdb.set_breakpoint('pg_stop_backup')
gdb.run_until_break()
gdb.remove_all_breakpoints()
gdb.continue_execution_until_running()
sleep(5)
conn.execute("create table t1()")
conn.commit()
while 'RUNNING' in self.show_pb(backup_dir, 'replica')[0]['status']:
sleep(5)
file = os.path.join(backup_dir, 'log', 'pg_probackup.log')
with open(file) as f:
log_content = f.read()
self.assertIn(
'LOG: Null offset in stop_backup_lsn value 0/3000000',
2019-09-05 08:30:45 +02:00
log_content)
self.assertIn(
'LOG: Looking for segment: 000000010000000000000003',
log_content)
self.assertIn(
'LOG: First record in WAL segment "000000010000000000000003": 0/3000028',
log_content)
self.assertIn(
'LOG: current.stop_lsn: 0/3000028',
log_content)
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_archive_replica_null_offset(self):
"""
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
master = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'master'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
'checkpoint_timeout': '1h',
'wal_level': 'replica'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
self.set_archiving(backup_dir, 'master', master)
master.slow_start()
self.backup_node(backup_dir, 'master', master)
# Create replica
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(backup_dir, 'master', replica)
# Settings for Replica
self.add_instance(backup_dir, 'replica', replica)
self.set_replica(master, replica, synchronous=True)
self.set_archiving(backup_dir, 'replica', replica, replica=True)
2019-10-11 18:03:55 +02:00
# freeze bgwriter to get rid of RUNNING XACTS records
bgwriter_pid = master.auxiliary_pids[ProcessType.BackgroundWriter][0]
gdb_checkpointer = self.gdb_attach(bgwriter_pid)
2019-09-05 08:30:45 +02:00
copy_tree(
os.path.join(backup_dir, 'wal', 'master'),
os.path.join(backup_dir, 'wal', 'replica'))
replica.slow_start(replica=True)
self.switch_wal_segment(master)
self.switch_wal_segment(master)
# take backup from replica
output = self.backup_node(
backup_dir, 'replica', replica,
options=[
'--archive-timeout=30',
2019-10-11 18:03:55 +02:00
'--log-level-console=LOG',
2019-09-05 08:30:45 +02:00
'--no-validate'],
return_id=False)
self.assertIn(
'LOG: Null offset in stop_backup_lsn value 0/3000000',
2019-09-05 08:30:45 +02:00
output)
self.assertIn(
'WARNING: WAL segment 000000010000000000000003 could not be archived in 30 seconds',
output)
self.assertIn(
'WARNING: Failed to get next WAL record after 0/3000000, looking for previous WAL record',
output)
self.assertIn(
'LOG: Looking for LSN 0/3000000 in segment: 000000010000000000000002',
output)
self.assertIn(
'LOG: Record 0/2000160 has endpoint 0/3000000 which is '
'equal or greater than requested LSN 0/3000000',
output)
self.assertIn(
'LOG: Found prior LSN: 0/2000160',
output)
print(output)
2019-09-05 08:30:45 +02:00
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_archive_replica_not_null_offset(self):
"""
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
master = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'master'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
'checkpoint_timeout': '1h',
'wal_level': 'replica'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
self.set_archiving(backup_dir, 'master', master)
master.slow_start()
self.backup_node(backup_dir, 'master', master)
# Create replica
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(backup_dir, 'master', replica)
# Settings for Replica
self.add_instance(backup_dir, 'replica', replica)
self.set_replica(master, replica, synchronous=True)
self.set_archiving(backup_dir, 'replica', replica, replica=True)
copy_tree(
os.path.join(backup_dir, 'wal', 'master'),
os.path.join(backup_dir, 'wal', 'replica'))
replica.slow_start(replica=True)
# take backup from replica
self.backup_node(
backup_dir, 'replica', replica,
options=[
'--archive-timeout=30',
'--log-level-console=verbose',
'--no-validate'],
return_id=False)
try:
self.backup_node(
backup_dir, 'replica', replica,
options=[
'--archive-timeout=30',
'--log-level-console=verbose',
'--no-validate'])
# we should die here because exception is what we expect to happen
self.assertEqual(
1, 0,
"Expecting Error because of archive timeout. "
"\n Output: {0} \n CMD: {1}".format(
repr(self.output), self.cmd))
except ProbackupException as e:
self.assertIn(
'LOG: Looking for LSN 0/3000060 in segment: 000000010000000000000003',
e.message,
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
repr(e.message), self.cmd))
self.assertIn(
'INFO: Wait for LSN 0/3000060 in archived WAL segment',
e.message,
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
repr(e.message), self.cmd))
self.assertIn(
'ERROR: WAL segment 000000010000000000000003 could not be archived in 30 seconds',
e.message,
"\n Unexpected Error Message: {0}\n CMD: {1}".format(
repr(e.message), self.cmd))
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_replica_toast(self):
"""
make archive master, take full and page archive backups from master,
set replica, make archive backup from replica
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
master = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'master'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
'checkpoint_timeout': '1h',
'wal_level': 'replica'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
self.set_archiving(backup_dir, 'master', master)
master.slow_start()
# freeze bgwriter to get rid of RUNNING XACTS records
bgwriter_pid = master.auxiliary_pids[ProcessType.BackgroundWriter][0]
gdb_checkpointer = self.gdb_attach(bgwriter_pid)
self.backup_node(backup_dir, 'master', master)
# Create replica
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(backup_dir, 'master', replica)
# Settings for Replica
self.add_instance(backup_dir, 'replica', replica)
self.set_replica(master, replica, synchronous=True)
self.set_archiving(backup_dir, 'replica', replica, replica=True)
copy_tree(
os.path.join(backup_dir, 'wal', 'master'),
os.path.join(backup_dir, 'wal', 'replica'))
replica.slow_start(replica=True)
self.switch_wal_segment(master)
self.switch_wal_segment(master)
master.safe_psql(
'postgres',
'CREATE TABLE t1 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,10) i')
# open connection to master
output = self.backup_node(
backup_dir, 'replica', replica,
options=[
'--archive-timeout=30',
'--log-level-console=verbose',
'--log-level-file=verbose',
'--no-validate',
'--stream'],
return_id=False)
pgdata = self.pgdata_content(replica.data_dir)
self.assertIn(
'WARNING: Could not read WAL record at',
output)
self.assertIn(
'LOG: Found prior LSN:',
output)
print(output)
2019-09-05 08:30:45 +02:00
replica.cleanup()
self.restore_node(backup_dir, 'replica', replica)
pgdata_restored = self.pgdata_content(replica.data_dir)
self.compare_pgdata(pgdata, pgdata_restored)
# Clean after yourself
self.del_test_dir(module_name, fname)
2019-09-05 12:54:28 +02:00
# @unittest.skip("skip")
def test_replica_promote_1(self):
"""
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
master = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'master'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
'checkpoint_timeout': '1h',
'wal_level': 'replica'})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'master', master)
# set replica True, so archive_mode 'always' is used.
self.set_archiving(backup_dir, 'master', master, replica=True)
master.slow_start()
self.backup_node(backup_dir, 'master', master)
# Create replica
replica = self.make_simple_node(
base_dir=os.path.join(module_name, fname, 'replica'))
replica.cleanup()
self.restore_node(backup_dir, 'master', replica)
# Settings for Replica
self.set_replica(master, replica)
replica.slow_start(replica=True)
master.safe_psql(
'postgres',
'CREATE TABLE t1 AS '
'SELECT i, repeat(md5(i::text),5006056) AS fat_attr '
'FROM generate_series(0,10) i')
self.wait_until_replica_catch_with_master(master, replica)
wal_file = os.path.join(
backup_dir, 'wal', 'master', '000000010000000000000004')
wal_file_partial = os.path.join(
2019-09-05 14:57:21 +02:00
backup_dir, 'wal', 'master', '000000010000000000000004.partial')
2019-09-05 12:54:28 +02:00
self.assertFalse(os.path.exists(wal_file))
replica.promote()
while not os.path.exists(wal_file_partial):
sleep(1)
self.switch_wal_segment(master)
# sleep to be sure, that any partial timeout is expired
sleep(70)
2019-09-05 14:57:21 +02:00
self.assertTrue(
os.path.exists(wal_file_partial),
"File {0} disappeared".format(wal_file))
2019-09-05 12:54:28 +02:00
self.assertTrue(
2019-09-05 14:57:21 +02:00
os.path.exists(wal_file_partial),
"File {0} disappeared".format(wal_file_partial))
2019-09-05 12:54:28 +02:00
# Clean after yourself
self.del_test_dir(module_name, fname)
2019-09-05 08:30:45 +02:00
# TODO:
# null offset STOP LSN and latest record in previous segment is conrecord (manual only)
# archiving from promoted delayed replica