1
0
mirror of https://github.com/postgrespro/pg_probackup.git synced 2024-11-26 09:11:19 +02:00

fix PGPRO-1507: null-sized files were not copied to backup

This commit is contained in:
Grigory Smolkin 2018-03-16 11:53:43 +03:00
parent 1b61ca8299
commit 541195b597
7 changed files with 220 additions and 65 deletions

View File

@ -586,7 +586,7 @@ do_backup_instance(void)
* For backup from master wait for previous segment.
* For backup from replica wait for current segment.
*/
!from_replica);
!from_replica, backup_files_list);
}
if (current.backup_mode == BACKUP_MODE_DIFF_PTRACK)
@ -1948,7 +1948,7 @@ backup_files(void *arg)
if (S_ISREG(buf.st_mode))
{
/* Check that file exist in previous backup */
if (current.backup_mode == BACKUP_MODE_DIFF_DELTA)
if (current.backup_mode != BACKUP_MODE_FULL)
{
int p;
char *relative;
@ -1961,7 +1961,7 @@ backup_files(void *arg)
{
/* File exists in previous backup */
file->exists_in_prev = true;
elog(INFO, "File exists at the time of previous backup %s", relative);
// elog(VERBOSE, "File exists at the time of previous backup %s", relative);
break;
}
}
@ -1981,7 +1981,17 @@ backup_files(void *arg)
continue;
}
}
else if (!copy_file(arguments->from_root,
else
/* TODO:
* Check if file exists in previous backup
* If exists:
* if mtime > start_backup_time of parent backup,
* copy file to backup
* if mtime < start_backup_time
* calculate crc, compare crc to old file
* if crc is the same -> skip file
*/
if (!copy_file(arguments->from_root,
arguments->to_root,
file))
{

View File

@ -422,15 +422,21 @@ backup_data_file(backup_files_args* arguments,
int n_blocks_skipped = 0;
int n_blocks_read = 0;
/*
* Skip unchanged file only if it exists in previous backup.
* This way we can correctly handle null-sized files which are
* not tracked by pagemap and thus always marked as unchanged.
*/
if ((backup_mode == BACKUP_MODE_DIFF_PAGE ||
backup_mode == BACKUP_MODE_DIFF_PTRACK) &&
file->pagemap.bitmapsize == PageBitmapIsEmpty)
file->pagemap.bitmapsize == PageBitmapIsEmpty &&
file->exists_in_prev)
{
/*
* There are no changed blocks since last backup. We want make
* incremental backup, so we should exit.
*/
elog(VERBOSE, "Skipping the file because it didn`t changed: %s", file->path);
elog(VERBOSE, "Skipping the unchanged file: %s", file->path);
return false;
}
@ -485,10 +491,12 @@ backup_data_file(backup_files_args* arguments,
/*
* Read each page, verify checksum and write it to backup.
* If page map is empty backup all pages of the relation.
* If page map is empty or file is not present in previous backup
* backup all pages of the relation.
*/
if (file->pagemap.bitmapsize == PageBitmapIsEmpty
|| file->pagemap.bitmapsize == PageBitmapIsAbsent)
|| file->pagemap.bitmapsize == PageBitmapIsAbsent
|| !file->exists_in_prev)
{
for (blknum = 0; blknum < nblocks; blknum++)
{

View File

@ -156,7 +156,7 @@ pgFileInit(const char *path)
file->is_datafile = false;
file->linked = NULL;
file->pagemap.bitmap = NULL;
file->pagemap.bitmapsize = (current.backup_mode == BACKUP_MODE_DIFF_PAGE) ? PageBitmapIsEmpty : PageBitmapIsAbsent;
file->pagemap.bitmapsize = PageBitmapIsAbsent;
file->tblspcOid = 0;
file->dbOid = 0;
file->relOid = 0;

View File

@ -115,8 +115,9 @@ static int SimpleXLogPageRead(XLogReaderState *xlogreader,
*/
void
extractPageMap(const char *archivedir, XLogRecPtr startpoint, TimeLineID tli,
XLogRecPtr endpoint, bool prev_segno)
XLogRecPtr endpoint, bool prev_segno, parray *files)
{
size_t i;
XLogRecord *record;
XLogReaderState *xlogreader;
char *errormsg;
@ -187,6 +188,15 @@ extractPageMap(const char *archivedir, XLogRecPtr startpoint, TimeLineID tli,
xlogreadfd = -1;
xlogexists = false;
}
/* Mark every datafile with empty pagemap as unchanged */
for (i = 0; i < parray_num(files); i++)
{
pgFile *file = (pgFile *) parray_get(files, i);
if (file->is_datafile && file->pagemap.bitmap == NULL)
file->pagemap.bitmapsize = PageBitmapIsEmpty;
}
elog(LOG, "Pagemap compiled");
}

View File

@ -107,8 +107,8 @@ typedef struct pgFile
} pgFile;
/* Special values of datapagemap_t bitmapsize */
#define PageBitmapIsEmpty 0
#define PageBitmapIsAbsent -1
#define PageBitmapIsEmpty 0 /* Used to mark unchanged datafiles */
#define PageBitmapIsAbsent -1 /* Used to mark files with unknown state of pagemap, i.e. datafiles without _ptrack */
/* Current state of backup */
typedef enum BackupStatus
@ -468,7 +468,8 @@ extern bool calc_file_checksum(pgFile *file);
extern void extractPageMap(const char *datadir,
XLogRecPtr startpoint,
TimeLineID tli,
XLogRecPtr endpoint, bool prev_segno);
XLogRecPtr endpoint, bool prev_segno,
parray *backup_files_list);
extern void validate_wal(pgBackup *backup,
const char *archivedir,
time_t target_time,

View File

@ -14,10 +14,12 @@ class DeltaTest(ProbackupTest, unittest.TestCase):
# @unittest.skip("skip")
def test_delta_vacuum_truncate_1(self):
"""make node, create table, take full backup,
delete last 3 pages, vacuum relation,
take delta backup, take second delta backup,
restore latest delta backup and check data correctness"""
"""
make node, create table, take full backup,
delete last 3 pages, vacuum relation,
take delta backup, take second delta backup,
restore latest delta backup and check data correctness
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
@ -100,10 +102,12 @@ class DeltaTest(ProbackupTest, unittest.TestCase):
# @unittest.skip("skip")
def test_delta_vacuum_truncate_2(self):
"""make node, create table, take full backup,
delete last 3 pages, vacuum relation,
take delta backup, take second delta backup,
restore latest delta backup and check data correctness"""
"""
make node, create table, take full backup,
delete last 3 pages, vacuum relation,
take delta backup, take second delta backup,
restore latest delta backup and check data correctness
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
@ -191,10 +195,12 @@ class DeltaTest(ProbackupTest, unittest.TestCase):
# @unittest.skip("skip")
def test_delta_vacuum_truncate_3(self):
"""make node, create table, take full backup,
delete last 3 pages, vacuum relation,
take delta backup, take second delta backup,
restore latest delta backup and check data correctness"""
"""
make node, create table, take full backup,
delete last 3 pages, vacuum relation,
take delta backup, take second delta backup,
restore latest delta backup and check data correctness
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
@ -421,7 +427,7 @@ class DeltaTest(ProbackupTest, unittest.TestCase):
# Clean after yourself
self.del_test_dir(module_name, fname)
@unittest.skip("skip")
# @unittest.skip("skip")
def test_delta_multiple_segments(self):
"""
Make node, create table with multiple segments,
@ -508,8 +514,10 @@ class DeltaTest(ProbackupTest, unittest.TestCase):
# @unittest.skip("skip")
def test_delta_vacuum_full(self):
"""make node, make full and delta stream backups,
restore them and check data correctness"""
"""
make node, make full and delta stream backups,
restore them and check data correctness
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
@ -836,8 +844,10 @@ class DeltaTest(ProbackupTest, unittest.TestCase):
# @unittest.skip("skip")
def test_alter_table_set_tablespace_delta(self):
"""Make node, create tablespace with table, take full backup,
alter tablespace location, take delta backup, restore database."""
"""
Make node, create tablespace with table, take full backup,
alter tablespace location, take delta backup, restore database.
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
@ -1023,8 +1033,10 @@ class DeltaTest(ProbackupTest, unittest.TestCase):
# @unittest.skip("skip")
def test_delta_delete(self):
"""Make node, create tablespace with table, take full backup,
alter tablespace location, take delta backup, restore database."""
"""
Make node, create tablespace with table, take full backup,
alter tablespace location, take delta backup, restore database.
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(

View File

@ -13,10 +13,12 @@ class PageBackupTest(ProbackupTest, unittest.TestCase):
# @unittest.skip("skip")
def test_page_vacuum_truncate(self):
"""make node, create table, take full backup,
delete last 3 pages, vacuum relation,
take page backup, take second page backup,
restore last page backup and check data correctness"""
"""
make node, create table, take full backup,
delete last 3 pages, vacuum relation,
take page backup, take second page backup,
restore last page backup and check data correctness
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
@ -115,7 +117,10 @@ class PageBackupTest(ProbackupTest, unittest.TestCase):
# @unittest.skip("skip")
def test_page_stream(self):
"""make archive node, take full and page stream backups, restore them and check data correctness"""
"""
make archive node, take full and page stream backups,
restore them and check data correctness
"""
self.maxDiff = None
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
@ -192,14 +197,22 @@ class PageBackupTest(ProbackupTest, unittest.TestCase):
# @unittest.skip("skip")
def test_page_archive(self):
"""make archive node, take full and page archive backups, restore them and check data correctness"""
"""
make archive node, take full and page archive backups,
restore them and check data correctness
"""
self.maxDiff = None
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(base_dir="{0}/{1}/node".format(module_name, fname),
node = self.make_simple_node(
base_dir="{0}/{1}/node".format(module_name, fname),
set_replication=True,
initdb_params=['--data-checksums'],
pg_options={'wal_level': 'replica', 'max_wal_senders': '2', 'checkpoint_timeout': '30s', 'ptrack_enable': 'on'}
pg_options={
'wal_level': 'replica',
'max_wal_senders': '2',
'checkpoint_timeout': '30s',
'ptrack_enable': 'on'}
)
self.init_pb(backup_dir)
@ -210,33 +223,48 @@ class PageBackupTest(ProbackupTest, unittest.TestCase):
# FULL BACKUP
node.safe_psql(
"postgres",
"create table t_heap as select i as id, md5(i::text) as text, md5(i::text)::tsvector as tsvector from generate_series(0,1) i")
"create table t_heap as select i as id, md5(i::text) as text, "
"md5(i::text)::tsvector as tsvector from generate_series(0,1) i")
full_result = node.execute("postgres", "SELECT * FROM t_heap")
full_backup_id = self.backup_node(backup_dir, 'node', node, backup_type='full')
full_backup_id = self.backup_node(
backup_dir, 'node', node, backup_type='full')
#PAGE BACKUP
# PAGE BACKUP
node.safe_psql(
"postgres",
"insert into t_heap select i as id, md5(i::text) as text, md5(i::text)::tsvector as tsvector from generate_series(0,2) i")
"insert into t_heap select i as id, "
"md5(i::text) as text, md5(i::text)::tsvector as tsvector "
"from generate_series(0,2) i")
page_result = node.execute("postgres", "SELECT * FROM t_heap")
page_backup_id = self.backup_node(backup_dir, 'node', node, backup_type='page')
page_backup_id = self.backup_node(
backup_dir, 'node', node, backup_type='page')
# Drop Node
node.cleanup()
# Restore and check full backup
self.assertIn("INFO: Restore of backup {0} completed.".format(full_backup_id),
self.restore_node(backup_dir, 'node', node, backup_id=full_backup_id, options=["-j", "4"]),
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(self.output), self.cmd))
self.assertIn("INFO: Restore of backup {0} completed.".format(
full_backup_id),
self.restore_node(
backup_dir, 'node', node,
backup_id=full_backup_id,
options=["-j", "4"]),
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(self.output), self.cmd))
node.start()
full_result_new = node.execute("postgres", "SELECT * FROM t_heap")
self.assertEqual(full_result, full_result_new)
node.cleanup()
# Restore and check page backup
self.assertIn("INFO: Restore of backup {0} completed.".format(page_backup_id),
self.restore_node(backup_dir, 'node', node, backup_id=page_backup_id, options=["-j", "4"]),
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(repr(self.output), self.cmd))
self.assertIn(
"INFO: Restore of backup {0} completed.".format(page_backup_id),
self.restore_node(
backup_dir, 'node', node,
backup_id=page_backup_id,
options=["-j", "4"]),
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
repr(self.output), self.cmd))
node.start()
page_result_new = node.execute("postgres", "SELECT * FROM t_heap")
self.assertEqual(page_result, page_result_new)
@ -247,7 +275,10 @@ class PageBackupTest(ProbackupTest, unittest.TestCase):
# @unittest.skip("skip")
def test_page_multiple_segments(self):
"""Make node, create table with multiple segments, write some data to it, check page and data correctness"""
"""
Make node, create table with multiple segments,
write some data to it, check page and data correctness
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
@ -286,15 +317,19 @@ class PageBackupTest(ProbackupTest, unittest.TestCase):
# GET LOGICAL CONTENT FROM NODE
result = node.safe_psql("postgres", "select * from pgbench_accounts")
# PAGE BACKUP
self.backup_node(backup_dir, 'node', node, backup_type='page', options=["--log-level-file=verbose"])
self.backup_node(
backup_dir, 'node', node, backup_type='page',
options=["--log-level-file=verbose"])
# GET PHYSICAL CONTENT FROM NODE
pgdata = self.pgdata_content(node.data_dir)
# RESTORE NODE
restored_node = self.make_simple_node(base_dir="{0}/{1}/restored_node".format(module_name, fname))
restored_node = self.make_simple_node(
base_dir="{0}/{1}/restored_node".format(module_name, fname))
restored_node.cleanup()
tblspc_path = self.get_tblspace_path(node, 'somedata')
tblspc_path_new = self.get_tblspace_path(restored_node, 'somedata_restored')
tblspc_path_new = self.get_tblspace_path(
restored_node, 'somedata_restored')
self.restore_node(backup_dir, 'node', restored_node, options=[
"-j", "4", "-T", "{0}={1}".format(tblspc_path, tblspc_path_new)])
@ -324,8 +359,11 @@ class PageBackupTest(ProbackupTest, unittest.TestCase):
# @unittest.skip("skip")
def test_page_delete(self):
"""Make node, create tablespace with table, take full backup,
alter tablespace location, take page backup, restore database."""
"""
Make node, create tablespace with table, take full backup,
delete everything from table, vacuum table, take page backup,
restore page backup, compare .
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
@ -346,8 +384,7 @@ class PageBackupTest(ProbackupTest, unittest.TestCase):
self.create_tblspace_in_node(node, 'somedata')
# FULL backup
self.backup_node(backup_dir, 'node', node, options=["--stream"])
self.backup_node(backup_dir, 'node', node)
node.safe_psql(
"postgres",
"create table t_heap tablespace somedata as select i as id,"
@ -367,10 +404,7 @@ class PageBackupTest(ProbackupTest, unittest.TestCase):
# PAGE BACKUP
self.backup_node(
backup_dir, 'node', node,
backup_type='page',
options=["--stream"]
)
backup_dir, 'node', node, backup_type='page')
if self.paranoia:
pgdata = self.pgdata_content(node.data_dir)
@ -386,8 +420,88 @@ class PageBackupTest(ProbackupTest, unittest.TestCase):
"-j", "4",
"-T", "{0}={1}".format(
self.get_tblspace_path(node, 'somedata'),
self.get_tblspace_path(node_restored, 'somedata')
)
self.get_tblspace_path(node_restored, 'somedata'))
]
)
# GET RESTORED PGDATA AND COMPARE
if self.paranoia:
pgdata_restored = self.pgdata_content(node_restored.data_dir)
self.compare_pgdata(pgdata, pgdata_restored)
# START RESTORED NODE
node_restored.append_conf(
'postgresql.auto.conf', 'port = {0}'.format(node_restored.port))
node_restored.start()
# Clean after yourself
self.del_test_dir(module_name, fname)
# @unittest.skip("skip")
def test_page_delete_1(self):
"""
Make node, create tablespace with table, take full backup,
delete everything from table, vacuum table, take page backup,
restore page backup, compare .
"""
fname = self.id().split('.')[3]
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
node = self.make_simple_node(
base_dir="{0}/{1}/node".format(module_name, fname),
set_replication=True, initdb_params=['--data-checksums'],
pg_options={
'wal_level': 'replica',
'max_wal_senders': '2',
'checkpoint_timeout': '30s',
'autovacuum': 'off'
}
)
self.init_pb(backup_dir)
self.add_instance(backup_dir, 'node', node)
self.set_archiving(backup_dir, 'node', node)
node.start()
self.create_tblspace_in_node(node, 'somedata')
node.safe_psql(
"postgres",
"create table t_heap tablespace somedata as select i as id,"
" md5(i::text) as text, md5(i::text)::tsvector as tsvector"
" from generate_series(0,100) i"
)
# FULL backup
self.backup_node(backup_dir, 'node', node)
node.safe_psql(
"postgres",
"delete from t_heap"
)
node.safe_psql(
"postgres",
"vacuum t_heap"
)
# PAGE BACKUP
self.backup_node(
backup_dir, 'node', node, backup_type='page')
if self.paranoia:
pgdata = self.pgdata_content(node.data_dir)
# RESTORE
node_restored = self.make_simple_node(
base_dir="{0}/{1}/node_restored".format(module_name, fname)
)
node_restored.cleanup()
self.restore_node(
backup_dir, 'node', node_restored,
options=[
"-j", "4",
"-T", "{0}={1}".format(
self.get_tblspace_path(node, 'somedata'),
self.get_tblspace_path(node_restored, 'somedata'))
]
)