tests: PGPRO-1230 ptrack race condition tests

2025-02-08 14:28:36 +02:00 · 2018-01-10 16:03:33 +03:00 · 2018-01-10 16:03:33 +03:00 · 0de4ec09ab
commit 0de4ec09ab
parent f97fab6c89
3 changed files with 378 additions and 41 deletions
--- a/tests/Readme.md
+++ b/tests/Readme.md
@ -14,7 +14,7 @@ Check archive compression:
 export ARCHIVE_COMPRESSION=ON

 Usage:
- pip install testgres=0.4.0
+ pip install testgres==0.4.0
 export PG_CONFIG=/path/to/pg_config
 python -m unittest [-v] tests[.specific_module][.class.test]
 ```
--- a/tests/helpers/ptrack_helpers.py
+++ b/tests/helpers/ptrack_helpers.py
@ -8,6 +8,9 @@ import testgres
 import hashlib
 import re
 import pwd
+import select
+import psycopg2
+from time import sleep


 idx_ptrack = {
@ -946,6 +949,34 @@ class ProbackupTest(object):
                fail = True
        self.assertFalse(fail, error_message)

+    def get_asyc_connect(self, database=None, host=None, port=5432):
+        if not database:
+            database = 'postgres'
+        if not host:
+            host = '127.0.0.1'
+
+        return psycopg2.connect(
+            database="postgres",
+            host='127.0.0.1',
+            port=port,
+            async=True
+        )
+
+    def wait(self, connection):
+        while True:
+            state = connection.poll()
+            if state == psycopg2.extensions.POLL_OK:
+                break
+            elif state == psycopg2.extensions.POLL_WRITE:
+                select.select([], [connection.fileno()], [])
+            elif state == psycopg2.extensions.POLL_READ:
+                select.select([connection.fileno()], [], [])
+            else:
+                raise psycopg2.OperationalError("poll() returned %s" % state)
+
+    def gdb_attach(self, pid):
+        return GDBobj([str(pid)], self.verbose, attach=True)
+

 class GdbException(Exception):
    def __init__(self, message=False):
@ -956,7 +987,7 @@ class GdbException(Exception):


 class GDBobj(ProbackupTest):
-    def __init__(self, cmd, verbose):
+    def __init__(self, cmd, verbose, attach=False):
        self.verbose = verbose

        # Check gdb presense
@ -972,8 +1003,12 @@ class GDBobj(ProbackupTest):
            'gdb',
            '--interpreter',
            'mi2',
-            '--args'
-            ] + cmd
+            ]
+
+        if attach:
+            self.cmd = self.base_cmd + ['--pid'] + cmd
+        else:
+            self.cmd = self.base_cmd + ['--args'] + cmd

        # Get version
        gdb_version_number = re.search(
@ -981,15 +1016,19 @@ class GDBobj(ProbackupTest):
            gdb_version)
        self.major_version = int(gdb_version_number.group(1))
        self.minor_version = int(gdb_version_number.group(2))
+
        if self.verbose:
-            print([' '.join(map(str, self.base_cmd))])
+            print([' '.join(map(str, self.cmd))])
+
+        print(self.cmd)

        self.proc = subprocess.Popen(
-            self.base_cmd,
+            self.cmd,
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
-            bufsize=0, universal_newlines=True
+            bufsize=0,
+            universal_newlines=True
        )
        self.gdb_pid = self.proc.pid

@ -997,6 +1036,10 @@ class GDBobj(ProbackupTest):
        # is there a way to do it a less derpy way?
        while True:
            line = self.proc.stdout.readline()
+
+            if 'No such process' in line:
+                raise GdbException(line)
+
            if not line.startswith('(gdb)'):
                pass
            else:
@ -1004,52 +1047,115 @@ class GDBobj(ProbackupTest):

    def set_breakpoint(self, location):
        result = self._execute('break ' + location)
-        success = False
        for line in result:
+            print(line)
            if line.startswith('~"Breakpoint'):
-                success = True
-                break
-            if line.startswith('^error') or line.startswith('(gdb)'):
+                return
+
+            elif line.startswith('^error') or line.startswith('(gdb)'):
                break

-            if line.startswith('&"break'):
+            elif line.startswith('&"break'):
                pass

-            if line.startswith('&"Function'):
+            elif line.startswith('&"Function'):
                raise GdbException(line)

-            if line.startswith('&"No line'):
+            elif line.startswith('&"No line'):
                raise GdbException(line)
-        return success

-    def run(self):
-        result = self._execute('run')
+            elif line.startswith('~"Make breakpoint pending on future shared'):
+                raise GdbException(line)
+
+        raise GdbException(
+            'Failed to set breakpoint.\n Output:\n {0}'.format(result)
+        )
+
+    def run_until_break(self):
+        result = self._execute('run', False)
        for line in result:
            if line.startswith('*stopped,reason="breakpoint-hit"'):
-                return 'breakpoint-hit'
-            if line.startswith('*stopped,reason="exited-normally"'):
-                return 'exit correct'
+                return
+        raise GdbException(
+            'Failed to run until breakpoint.\n'
+        )

-    def continue_execution(self, sync=True):
+    def continue_execution_until_running(self):
        result = self._execute('continue')
+
+        running = False
        for line in result:
+            if line.startswith('*running'):
+                running = True
+                break
+            if line.startswith('*stopped,reason="breakpoint-hit"'):
+                running = False
+                continue
+            if line.startswith('*stopped,reason="exited-normally"'):
+                running = False
+                continue
+        return running
+
+    def continue_execution_until_exit(self):
+        result = self._execute('continue', False)
+
+        for line in result:
+            if line.startswith('*running'):
+                continue
+            if line.startswith('*stopped,reason="breakpoint-hit"'):
+                continue
+            if line.startswith('*stopped,reason="exited-normally"'):
+                return
+        raise GdbException(
+            'Failed to continue execution until exit.\n'
+        )
+
+    def continue_execution_until_break(self, ignore_count=0):
+        if ignore_count > 0:
+            result = self._execute(
+                'continue ' + str(ignore_count),
+                False
+            )
+        else:
+            result = self._execute('continue', False)
+
+        running = False
+        for line in result:
+            if line.startswith('*running'):
+                running = True
            if line.startswith('*stopped,reason="breakpoint-hit"'):
                return 'breakpoint-hit'
            if line.startswith('*stopped,reason="exited-normally"'):
-                return 'exit correct'
+                return 'exited-normally'
+        if running:
+            return 'running'
+
+    def stopped_in_breakpoint(self):
+        output = []
+        while True:
+            line = self.proc.stdout.readline()
+            output += [line]
+            if self.verbose:
+                print(line)
+            if line.startswith('*stopped,reason="breakpoint-hit"'):
+                return True
+        return False

    # use for breakpoint, run, continue
-    def _execute(self, cmd):
+    def _execute(self, cmd, running=True):
        output = []
        self.proc.stdin.flush()
        self.proc.stdin.write(cmd + '\n')
        self.proc.stdin.flush()

        while True:
+            sleep(1)
            line = self.proc.stdout.readline()
            output += [line]
            if self.verbose:
                print(line)
            if line == '^done\n' or line.startswith('*stopped'):
                break
+            if running and line.startswith('*running'):
+                break
        return output
--- a/tests/ptrack.py
+++ b/tests/ptrack.py
@ -128,6 +128,245 @@ class PtrackBackupTest(ProbackupTest, unittest.TestCase):
        # Clean after yourself
        self.del_test_dir(module_name, fname)

+    # @unittest.skip("skip")
+    def test_ptrack_uncommited_xact(self):
+        """make ptrack backup while there is uncommited open transaction"""
+        fname = self.id().split('.')[3]
+        backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
+        node = self.make_simple_node(
+            base_dir="{0}/{1}/node".format(module_name, fname),
+            set_replication=True,
+            initdb_params=['--data-checksums'],
+            pg_options={
+                'wal_level': 'replica',
+                'max_wal_senders': '2',
+                'checkpoint_timeout': '300s',
+                'ptrack_enable': 'on'
+            }
+        )
+        node_restored = self.make_simple_node(
+            base_dir="{0}/{1}/node_restored".format(module_name, fname),
+        )
+
+        self.init_pb(backup_dir)
+        self.add_instance(backup_dir, 'node', node)
+        self.set_archiving(backup_dir, 'node', node)
+        node_restored.cleanup()
+        node.start()
+
+        self.backup_node(backup_dir, 'node', node)
+        con = node.connect("postgres")
+        con.execute(
+            "create table t_heap as select i"
+            " as id from generate_series(0,1) i"
+            )
+
+        self.backup_node(
+            backup_dir, 'node', node, backup_type='ptrack',
+            options=['--stream', '--log-level-file=verbose']
+        )
+
+        self.backup_node(
+            backup_dir, 'node', node, backup_type='ptrack',
+            options=['--stream', '--log-level-file=verbose']
+        )
+
+        pgdata = self.pgdata_content(node.data_dir)
+
+        self.restore_node(
+            backup_dir, 'node', node_restored, options=["-j", "4"])
+        pgdata_restored = self.pgdata_content(node_restored.data_dir)
+        node_restored.append_conf(
+            "postgresql.auto.conf", "port = {0}".format(node_restored.port))
+
+        node_restored.start()
+
+        # Physical comparison
+        self.compare_pgdata(pgdata, pgdata_restored)
+
+        # Clean after yourself
+        self.del_test_dir(module_name, fname)
+
+    # @unittest.skip("skip")
+    def test_ptrack_vacuum_full(self):
+        """make node, make full and ptrack stream backups,
+          restore them and check data correctness"""
+        fname = self.id().split('.')[3]
+        backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
+        node = self.make_simple_node(
+            base_dir="{0}/{1}/node".format(module_name, fname),
+            set_replication=True,
+            initdb_params=['--data-checksums'],
+            pg_options={
+                'wal_level': 'replica',
+                'max_wal_senders': '2',
+                'checkpoint_timeout': '300s',
+                'ptrack_enable': 'on'
+            }
+        )
+        node_restored = self.make_simple_node(
+            base_dir="{0}/{1}/node_restored".format(module_name, fname),
+        )
+
+        self.init_pb(backup_dir)
+        self.add_instance(backup_dir, 'node', node)
+        self.set_archiving(backup_dir, 'node', node)
+        node_restored.cleanup()
+        node.start()
+        self.create_tblspace_in_node(node, 'somedata')
+
+        self.backup_node(backup_dir, 'node', node)
+
+        node.safe_psql(
+            "postgres",
+            "create table t_heap tablespace somedata as select i"
+            " as id from generate_series(0,1000000) i"
+            )
+
+        # create async connection
+        conn = self.get_asyc_connect(port=node.port)
+
+        self.wait(conn)
+
+        acurs = conn.cursor()
+        acurs.execute("select pg_backend_pid()")
+
+        self.wait(conn)
+        pid = acurs.fetchall()[0][0]
+        print(pid)
+
+        gdb = self.gdb_attach(pid)
+        gdb.set_breakpoint('reform_and_rewrite_tuple')
+
+        if not gdb.continue_execution_until_running():
+            print('Failed gdb continue')
+            exit(1)
+
+        acurs.execute("VACUUM FULL t_heap")
+
+        if gdb.stopped_in_breakpoint():
+            if gdb.continue_execution_until_break(20) != 'breakpoint-hit':
+                print('Failed to hit breakpoint')
+                exit(1)
+
+        self.backup_node(
+            backup_dir, 'node', node, backup_type='ptrack',
+            options=['--stream', '--log-level-file=verbose']
+        )
+
+        self.backup_node(
+            backup_dir, 'node', node, backup_type='ptrack',
+            options=['--stream', '--log-level-file=verbose']
+        )
+
+        pgdata = self.pgdata_content(node.data_dir)
+
+        old_tablespace = self.get_tblspace_path(node, 'somedata')
+        new_tablespace = self.get_tblspace_path(node_restored, 'somedata_new')
+
+        self.restore_node(
+            backup_dir, 'node', node_restored,
+            options=["-j", "4", "-T", "{0}={1}".format(
+                old_tablespace, new_tablespace)]
+        )
+        pgdata_restored = self.pgdata_content(node_restored.data_dir)
+        node_restored.append_conf(
+            "postgresql.auto.conf", "port = {0}".format(node_restored.port))
+
+        node_restored.start()
+
+        # Physical comparison
+        self.compare_pgdata(pgdata, pgdata_restored)
+
+        # Clean after yourself
+        self.del_test_dir(module_name, fname)
+
+    # @unittest.skip("skip")
+    def test_ptrack_vacuum_truncate(self):
+        """make node, create table, take full backup,
+           delete last 3 pages, vacuum relation,
+           take ptrack backup, take second ptrack backup,
+           restore last ptrack backup and check data correctness"""
+        fname = self.id().split('.')[3]
+        backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
+        node = self.make_simple_node(
+            base_dir="{0}/{1}/node".format(module_name, fname),
+            set_replication=True,
+            initdb_params=['--data-checksums'],
+            pg_options={
+                'wal_level': 'replica',
+                'max_wal_senders': '2',
+                'checkpoint_timeout': '300s',
+                'ptrack_enable': 'on'
+            }
+        )
+        node_restored = self.make_simple_node(
+            base_dir="{0}/{1}/node_restored".format(module_name, fname),
+        )
+
+        self.init_pb(backup_dir)
+        self.add_instance(backup_dir, 'node', node)
+        self.set_archiving(backup_dir, 'node', node)
+        node_restored.cleanup()
+        node.start()
+        self.create_tblspace_in_node(node, 'somedata')
+
+        node.safe_psql(
+            "postgres",
+            "create sequence t_seq; "
+            "create table t_heap tablespace somedata as select i as id, "
+            "md5(i::text) as text, "
+            "md5(repeat(i::text,10))::tsvector as tsvector "
+            "from generate_series(0,1024) i;"
+        )
+        node.safe_psql(
+            "postgres",
+            "vacuum t_heap"
+        )
+
+        self.backup_node(backup_dir, 'node', node)
+
+        node.safe_psql(
+            "postgres",
+            "delete from t_heap where ctid >= '(11,0)'"
+        )
+        node.safe_psql(
+            "postgres",
+            "vacuum t_heap"
+        )
+
+        self.backup_node(
+            backup_dir, 'node', node, backup_type='ptrack',
+            options=['--stream', '--log-level-file=verbose']
+        )
+
+        self.backup_node(
+            backup_dir, 'node', node, backup_type='ptrack',
+            options=['--stream', '--log-level-file=verbose']
+        )
+
+        pgdata = self.pgdata_content(node.data_dir)
+
+        old_tablespace = self.get_tblspace_path(node, 'somedata')
+        new_tablespace = self.get_tblspace_path(node_restored, 'somedata_new')
+
+        self.restore_node(
+            backup_dir, 'node', node_restored,
+            options=["-j", "4", "-T", "{0}={1}".format(
+                old_tablespace, new_tablespace)]
+        )
+        pgdata_restored = self.pgdata_content(node_restored.data_dir)
+        node_restored.append_conf(
+            "postgresql.auto.conf", "port = {0}".format(node_restored.port))
+
+        node_restored.start()
+
+        # Physical comparison
+        self.compare_pgdata(pgdata, pgdata_restored)
+
+        # Clean after yourself
+        self.del_test_dir(module_name, fname)
+
    # @unittest.skip("skip")
    def test_ptrack_simple(self):
        """make node, make full and ptrack stream backups,"
@ -179,9 +418,11 @@ class PtrackBackupTest(ProbackupTest, unittest.TestCase):
        pgdata = self.pgdata_content(node.data_dir)
        result = node.safe_psql("postgres", "SELECT * FROM t_heap")

-        self.restore_node(backup_dir, 'node', node_restored, options=["-j", "4"])
+        self.restore_node(
+            backup_dir, 'node', node_restored, options=["-j", "4"])
        pgdata_restored = self.pgdata_content(node_restored.data_dir)
-        node_restored.append_conf("postgresql.auto.conf", "port = {0}".format(node_restored.port))
+        node_restored.append_conf(
+            "postgresql.auto.conf", "port = {0}".format(node_restored.port))

        node_restored.start()
        # Logical comparison
@ -194,7 +435,7 @@ class PtrackBackupTest(ProbackupTest, unittest.TestCase):
        self.compare_pgdata(pgdata, pgdata_restored)

        # Clean after yourself
-        # self.del_test_dir(module_name, fname)
+        self.del_test_dir(module_name, fname)

    # @unittest.skip("skip")
    def test_ptrack_get_block(self):
@ -231,24 +472,14 @@ class PtrackBackupTest(ProbackupTest, unittest.TestCase):
            gdb=True
        )

-        if gdb.set_breakpoint('make_pagemap_from_ptrack'):
-            result = gdb.run()
-        else:
-            self.assertTrue(False, 'Failed to set gdb breakpoint')
-
-        if result != 'breakpoint-hit':
-            print('Error in hitting breaking point')
-            sys.exit(1)
+        gdb.set_breakpoint('make_pagemap_from_ptrack')
+        gdb.run_until_break()

        node.safe_psql(
            "postgres",
            "update t_heap set id = 100500")

-        if not gdb.continue_execution():
-            self.assertTrue(
-                False,
-                'Failed to continue execution after breakpoint'
-            )
+        gdb.continue_execution_until_exit()

        self.backup_node(
            backup_dir, 'node', node,
@ -271,7 +502,7 @@ class PtrackBackupTest(ProbackupTest, unittest.TestCase):
        self.compare_pgdata(pgdata, pgdata_restored)

        # Clean after yourself
-        # self.del_test_dir(module_name, fname)
+        self.del_test_dir(module_name, fname)

    # @unittest.skip("skip")
    def test_ptrack_stream(self):
@ -802,7 +1033,7 @@ class PtrackBackupTest(ProbackupTest, unittest.TestCase):
            self.compare_pgdata(pgdata, pgdata_restored)

        # Clean after yourself
-        # self.del_test_dir(module_name, fname)
+        self.del_test_dir(module_name, fname)

    # @unittest.skip("skip")
    def test_alter_database_set_tablespace_ptrack(self):