1
0
mirror of https://github.com/postgrespro/pg_probackup.git synced 2025-09-16 09:26:30 +02:00

[PBCKP-913] Fix WAL switching with huge XLogRecord

- Backport of PBCKP-859 bugfix
- increase current segment number when reader has already read it before
- avoid error if reader has to switch WAL again
- add python test for PAGE backup with huge XLog record
This commit is contained in:
oleg gurev
2024-02-14 10:51:47 +03:00
parent 287e7fc89f
commit 17037baea2
2 changed files with 55 additions and 0 deletions

View File

@@ -1588,9 +1588,14 @@ SwitchThreadToNextWal(XLogReaderState *xlogreader, xlog_thread_arg *arg)
reader_data = (XLogReaderData *) xlogreader->private_data;
reader_data->need_switch = false;
start:
/* Critical section */
pthread_lock(&wal_segment_mutex);
Assert(segno_next);
if (reader_data->xlogsegno > segno_next)
segno_next = reader_data->xlogsegno;
reader_data->xlogsegno = segno_next;
segnum_read++;
segno_next++;
@@ -1604,6 +1609,7 @@ SwitchThreadToNextWal(XLogReaderState *xlogreader, xlog_thread_arg *arg)
GetXLogRecPtr(reader_data->xlogsegno, 0, wal_seg_size, arg->startpoint);
/* We need to close previously opened file if it wasn't closed earlier */
CleanupXLogPageRead(xlogreader);
xlogreader->currRecPtr = InvalidXLogRecPtr;
/* Skip over the page header and contrecord if any */
found = XLogFindNextRecord(xlogreader, arg->startpoint);
@@ -1613,6 +1619,8 @@ SwitchThreadToNextWal(XLogReaderState *xlogreader, xlog_thread_arg *arg)
*/
if (XLogRecPtrIsInvalid(found))
{
if (reader_data->need_switch)
goto start;
/*
* Check if we need to stop reading. We stop if other thread found a
* target segment.

View File

@@ -1415,3 +1415,50 @@ class PageTest(ProbackupTest, unittest.TestCase):
#
# pgdata_restored = self.pgdata_content(node_restored.data_dir)
# self.compare_pgdata(pgdata, pgdata_restored)
def test_page_huge_xlog_record(self):
backup_dir = os.path.join(self.tmp_path, self.module_name, self.fname, 'backup')
node = self.make_simple_node(
base_dir=os.path.join(self.module_name, self.fname, 'node'),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={
'max_locks_per_transaction': '1000',
'work_mem': '100MB',
'temp_buffers': '100MB',
'wal_buffers': '128MB',
'wal_level' : 'logical',
})
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.slow_start()
node.pgbench_init(scale=3)
# Do full backup
self.backup_node(backup_dir, 'node', node, backup_type='full')
show_backup = self.show_pb(backup_dir,'node')[0]
self.assertEqual(show_backup['status'], "OK")
self.assertEqual(show_backup['backup-mode'], "FULL")
# Originally client had the problem at the transaction that (supposedly)
# deletes a lot of temporary tables (probably it was client disconnect).
# It generated ~40MB COMMIT WAL record.
#
# `pg_logical_emit_message` is much simpler and faster way to generate
# such huge record.
node.safe_psql(
"postgres",
"select pg_logical_emit_message(False, 'z', repeat('o', 60*1000*1000))")
# Do page backup
self.backup_node(backup_dir, 'node', node, backup_type='page')
show_backup = self.show_pb(backup_dir,'node')[1]
self.assertEqual(show_backup['status'], "OK")
self.assertEqual(show_backup['backup-mode'], "PAGE")