1
0
mirror of https://github.com/postgrespro/pg_probackup.git synced 2025-01-08 13:44:32 +02:00

Page-level backup using block tracking in WAL records

This commit improves the performance of page-level, or differential
backup, by not having to scan anymore all the pages of a relation file,
something that can be very long on large data sets, but by scanning the
list of blocks changed by WAL records since the last full or differential
backup.

As a restriction and to avoid potential data corruption should hint-bit
updates occur on a page, backups can only be taken from a server that has
wal_log_hints or data checksums enabled.

Base patch by Yury Zhuravlev, heavily modified by me.
This commit is contained in:
Michael Paquier 2016-01-15 23:47:38 +09:00
parent 47d0b60ebd
commit 04834e73c7
12 changed files with 587 additions and 128 deletions

5
.gitignore vendored
View File

@ -25,3 +25,8 @@
/regression.diffs
/regression.out
/results
# Extra files
/datapagemap.c
/datapagemap.h
/xlogreader.c

View File

@ -14,11 +14,16 @@ OBJS = backup.o \
util.o \
validate.o \
xlog.o \
datapagemap.o \
parsexlog.o \
xlogreader.o \
pgut/pgut.o \
pgut/pgut-port.o
DOCS = doc/pg_arman.txt
EXTRA_CLEAN = datapagemap.c datapagemap.h xlogreader.c
# asciidoc and xmlto are present, so install the html documentation and man
# pages as well. html is part of the vanilla documentation. Man pages need a
# special handling at installation.
@ -35,6 +40,27 @@ PG_LIBS = $(libpq_pgport)
REGRESS = init option show delete backup restore
all: checksrcdir docs datapagemap.h pg_arman
# This rule's only purpose is to give the user instructions on how to pass
# the path to PostgreSQL source tree to the makefile.
.PHONY: checksrcdir
checksrcdir:
ifndef top_srcdir
@echo "You must have PostgreSQL source tree available to compile."
@echo "Pass the path to the PostgreSQL source tree to make, in the top_srcdir"
@echo "variable: \"make top_srcdir=<path to PostgreSQL source tree>\""
@exit 1
endif
# Those files are symlinked from the PostgreSQL sources.
xlogreader.c: % : $(top_srcdir)/src/backend/access/transam/%
rm -f $@ && $(LN_S) $< .
datapagemap.c: % : $(top_srcdir)/src/bin/pg_rewind/%
rm -f $@ && $(LN_S) $< .
datapagemap.h: % : $(top_srcdir)/src/bin/pg_rewind/%
rm -f && $(LN_S) $< .
PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
@ -43,7 +69,6 @@ include $(PGXS)
# Compile documentation as well is ASCIIDOC and XMLTO are defined
ifneq ($(ASCIIDOC),)
ifneq ($(XMLTO),)
all: docs
docs:
$(MAKE) -C doc/
@ -53,7 +78,13 @@ install: install-man
install-man:
$(MKDIR_P) '$(DESTDIR)$(mandir)/man1/'
$(INSTALL_DATA) $(man_DOCS) '$(DESTDIR)$(mandir)/man1/'
else
docs:
@echo "No docs to build"
endif # XMLTO
else
docs:
@echo "No docs to build"
endif # ASCIIDOC
# Clean up documentation as well

138
backup.c
View File

@ -28,6 +28,9 @@ static int server_version = 0;
static bool in_backup = false; /* TODO: more robust logic */
/* list of files contained in backup */
parray *backup_files_list;
/*
* Backup routines
*/
@ -48,6 +51,7 @@ static void create_file_list(parray *files,
const char *subdir,
const char *prefix,
bool is_append);
static void wait_for_archive(pgBackup *backup, const char *sql);
/*
* Take a backup of database and return the list of files backed up.
@ -56,7 +60,6 @@ static parray *
do_backup_database(parray *backup_list, pgBackupOption bkupopt)
{
int i;
parray *files; /* backup file list from non-snapshot */
parray *prev_files = NULL; /* file list of previous database backup */
FILE *fp;
char path[MAXPGPATH];
@ -68,6 +71,7 @@ do_backup_database(parray *backup_list, pgBackupOption bkupopt)
/* repack the options */
bool smooth_checkpoint = bkupopt.smooth_checkpoint;
pgBackup *prev_backup = NULL;
/* Block backup operations on a standby */
if (pg_is_standby())
@ -78,6 +82,9 @@ do_backup_database(parray *backup_list, pgBackupOption bkupopt)
/* Initialize size summary */
current.data_bytes = 0;
/* do some checks on the node */
sanityChecks();
/*
* Obtain current timeline by scanning control file, theh LSN
* obtained at output of pg_start_backup or pg_stop_backup does
@ -123,8 +130,8 @@ do_backup_database(parray *backup_list, pgBackupOption bkupopt)
* List directories and symbolic links with the physical path to make
* mkdirs.sh, then sort them in order of path. Omit $PGDATA.
*/
files = parray_new();
dir_list_file(files, pgdata, NULL, false, false);
backup_files_list = parray_new();
dir_list_file(backup_files_list, pgdata, NULL, false, false);
if (!check)
{
@ -133,7 +140,7 @@ do_backup_database(parray *backup_list, pgBackupOption bkupopt)
if (fp == NULL)
elog(ERROR_SYSTEM, "can't open make directory script \"%s\": %s",
path, strerror(errno));
dir_print_mkdirs_sh(fp, files, pgdata);
dir_print_mkdirs_sh(fp, backup_files_list, pgdata);
fclose(fp);
if (chmod(path, DIR_PERMISSION) == -1)
elog(ERROR_SYSTEM, "can't change mode of \"%s\": %s", path,
@ -141,9 +148,9 @@ do_backup_database(parray *backup_list, pgBackupOption bkupopt)
}
/* clear directory list */
parray_walk(files, pgFileFree);
parray_free(files);
files = NULL;
parray_walk(backup_files_list, pgFileFree);
parray_free(backup_files_list);
backup_files_list = NULL;
/*
* To take differential backup, the file list of the last completed database
@ -151,8 +158,6 @@ do_backup_database(parray *backup_list, pgBackupOption bkupopt)
*/
if (current.backup_mode == BACKUP_MODE_DIFF_PAGE)
{
pgBackup *prev_backup;
/* find last completed database backup */
prev_backup = catalog_get_last_data_backup(backup_list, current.tli);
pgBackupGetPath(prev_backup, prev_file_txt, lengthof(prev_file_txt),
@ -167,26 +172,55 @@ do_backup_database(parray *backup_list, pgBackupOption bkupopt)
(uint32) (*lsn >> 32), (uint32) *lsn);
}
/* initialize backup list from non-snapshot */
files = parray_new();
/* initialize backup list */
backup_files_list = parray_new();
/* list files with the logical path. omit $PGDATA */
add_files(files, pgdata, false, true);
add_files(backup_files_list, pgdata, false, true);
/* backup files */
pgBackupGetPath(&current, path, lengthof(path), DATABASE_DIR);
backup_files(pgdata, path, files, prev_files, lsn, NULL);
/*
* Build page mapping in differential mode. When using this mode, the
* list of blocks to be taken is known by scanning the WAL segments
* present in archives up to the point where start backup has begun.
* However, normally this segment is not yet available in the archives,
* leading to failures when building the page map. Hence before doing
* anything and in order to ensure that all the segments needed for the
* scan are here, for a switch of the last segment with pg_switch_xlog.
*/
if (current.backup_mode == BACKUP_MODE_DIFF_PAGE)
{
/* Enforce archiving of last segment and wait for it to be here */
wait_for_archive(&current, "SELECT * FROM pg_switch_xlog()");
/* Now build the page map */
parray_qsort(backup_files_list, pgFileComparePathDesc);
elog(LOG, "extractPageMap");
elog(LOG, "current_tli:%X", current.tli);
elog(LOG, "prev_backup->start_lsn: %X/%X",
(uint32) (prev_backup->start_lsn >> 32),
(uint32) (prev_backup->start_lsn));
elog(LOG, "current.start_lsn: %X/%X",
(uint32) (current.start_lsn >> 32),
(uint32) (current.start_lsn));
extractPageMap(arclog_path, prev_backup->start_lsn, current.tli,
current.start_lsn);
}
backup_files(pgdata, path, backup_files_list, prev_files, lsn, NULL);
/* notify end of backup */
pg_stop_backup(&current);
/* create file list */
create_file_list(files, pgdata, DATABASE_FILE_LIST, NULL, false);
create_file_list(backup_files_list, pgdata, DATABASE_FILE_LIST, NULL, false);
/* print summary of size of backup mode files */
for (i = 0; i < parray_num(files); i++)
for (i = 0; i < parray_num(backup_files_list); i++)
{
pgFile *file = (pgFile *) parray_get(files, i);
pgFile *file = (pgFile *) parray_get(backup_files_list, i);
if (!S_ISREG(file->mode))
continue;
/*
@ -204,7 +238,7 @@ do_backup_database(parray *backup_list, pgBackupOption bkupopt)
current.data_bytes);
elog(LOG, "========================================");
return files;
return backup_files_list;
}
@ -654,7 +688,6 @@ backup_files(const char *from_root,
}
else
{
elog(LOG, "\n");
elog(ERROR_SYSTEM,
"can't stat backup mode. \"%s\": %s",
file->path, strerror(errno));
@ -825,3 +858,72 @@ create_file_list(parray *files,
fclose(fp);
}
}
/*
* A helper function to create the path of a relation file and segment.
*
* The returned path is palloc'd
*/
static char *
datasegpath(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
{
char *path;
char *segpath;
path = relpathperm(rnode, forknum);
if (segno > 0)
{
segpath = psprintf("%s.%u", path, segno);
pfree(path);
return segpath;
}
else
return path;
}
/*
* This routine gets called while reading WAL segments from the WAL archive,
* for every block that have changed in the target system. It makes note of
* all the changed blocks in the pagemap of the file and adds them in the
* things to track for the backup.
*/
void
process_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno)
{
char *path;
char *rel_path;
BlockNumber blkno_inseg;
int segno;
pgFile *file_item = NULL;
int j;
segno = blkno / RELSEG_SIZE;
blkno_inseg = blkno % RELSEG_SIZE;
rel_path = datasegpath(rnode, forknum, segno);
path = pg_malloc(strlen(rel_path) + strlen(pgdata) + 2);
sprintf(path, "%s/%s", pgdata, rel_path);
for (j = 0; j < parray_num(backup_files_list); j++)
{
pgFile *p = (pgFile *) parray_get(backup_files_list, j);
if (strcmp(p->path, path) == 0)
{
file_item = p;
break;
}
}
/*
* If we don't have any record of this file in the file map, it means
* that it's a relation that did not have much activity since the last
* backup. We can safely ignore it. If it is a new relation file, the
* backup would simply copy it as-is.
*/
if (file_item)
datapagemap_add(&file_item->pagemap, blkno_inseg);
pg_free(path);
pg_free(rel_path);
}

224
data.c
View File

@ -74,10 +74,10 @@ backup_data_file(const char *from_root, const char *to_root,
FILE *out;
BackupPageHeader header;
DataPage page; /* used as read buffer */
BlockNumber blknum;
size_t read_len;
int errno_tmp;
BlockNumber blknum = 0;
size_t read_len = 0;
pg_crc32 crc;
off_t offset;
INIT_CRC32C(crc);
@ -117,117 +117,139 @@ backup_data_file(const char *from_root, const char *to_root,
/* confirm server version */
check_server_version();
/* read each page and write the page excluding hole */
for (blknum = 0;
(read_len = fread(&page, 1, sizeof(page), in)) == sizeof(page);
++blknum)
{
XLogRecPtr page_lsn;
int upper_offset;
int upper_length;
header.block = blknum;
/*
* If a invalid data page was found, fallback to simple copy to ensure
* all pages in the file don't have BackupPageHeader.
*/
if (!parse_page(&page, &page_lsn,
&header.hole_offset, &header.hole_length))
{
elog(LOG, "%s fall back to simple copy", file->path);
fclose(in);
fclose(out);
file->is_datafile = false;
return copy_file(from_root, to_root, file);
}
file->read_size += read_len;
/* if the page has not been modified since last backup, skip it */
if (lsn && !XLogRecPtrIsInvalid(page_lsn) && page_lsn < *lsn)
continue;
upper_offset = header.hole_offset + header.hole_length;
upper_length = BLCKSZ - upper_offset;
/* write data page excluding hole */
if (fwrite(&header, 1, sizeof(header), out) != sizeof(header) ||
fwrite(page.data, 1, header.hole_offset, out) != header.hole_offset ||
fwrite(page.data + upper_offset, 1, upper_length, out) != upper_length)
{
int errno_tmp = errno;
/* oops */
fclose(in);
fclose(out);
elog(ERROR_SYSTEM, "cannot write at block %u of \"%s\": %s",
blknum, to_path, strerror(errno_tmp));
}
/* update CRC */
COMP_CRC32C(crc, &header, sizeof(header));
COMP_CRC32C(crc, page.data, header.hole_offset);
COMP_CRC32C(crc, page.data + upper_offset, upper_length);
file->write_size += sizeof(header) + read_len - header.hole_length;
}
errno_tmp = errno;
if (!feof(in))
{
fclose(in);
fclose(out);
elog(ERROR_SYSTEM, "cannot read backup mode file \"%s\": %s",
file->path, strerror(errno_tmp));
}
/*
* The odd size page at the tail is probably a page exactly written now, so
* write whole of it.
* Read each page and write the page excluding hole. If it has been
* determined that the page can be copied safely, but no page map
* has been built, it means that we are in presence of a relation
* file that needs to be completely scanned. If a page map is present
* only scan the blocks needed. In each case, pages are copied without
* their hole to ensure some basic level of compression.
*/
if (read_len > 0)
if (file->pagemap.bitmapsize == 0)
{
/*
* If the odd size page is the 1st page, fallback to simple copy because
* the file is not a datafile.
* Otherwise treat the page as a datapage with no hole.
*/
if (blknum == 0)
file->is_datafile = false;
else
for (blknum = 0;
(read_len = fread(&page, 1, sizeof(page), in)) == sizeof(page);
++blknum)
{
header.block = blknum;
header.hole_offset = 0;
header.hole_length = 0;
XLogRecPtr page_lsn;
int upper_offset;
int upper_length;
if (fwrite(&header, 1, sizeof(header), out) != sizeof(header))
header.block = blknum;
/*
* If an invalid data page was found, fallback to simple copy to ensure
* all pages in the file don't have BackupPageHeader.
*/
if (!parse_page(&page, &page_lsn,
&header.hole_offset, &header.hole_length))
{
elog(LOG, "%s fall back to simple copy", file->path);
fclose(in);
fclose(out);
file->is_datafile = false;
return copy_file(from_root, to_root, file);
}
file->read_size += read_len;
/* if the page has not been modified since last backup, skip it */
if (lsn && !XLogRecPtrIsInvalid(page_lsn) && page_lsn < *lsn)
continue;
upper_offset = header.hole_offset + header.hole_length;
upper_length = BLCKSZ - upper_offset;
/* write data page excluding hole */
if (fwrite(&header, 1, sizeof(header), out) != sizeof(header) ||
fwrite(page.data, 1, header.hole_offset, out) != header.hole_offset ||
fwrite(page.data + upper_offset, 1, upper_length, out) != upper_length)
{
int errno_tmp = errno;
/* oops */
fclose(in);
fclose(out);
elog(ERROR_SYSTEM,
"cannot write at block %u of \"%s\": %s",
elog(ERROR_SYSTEM, "cannot write at block %u of \"%s\": %s",
blknum, to_path, strerror(errno_tmp));
}
/* update CRC */
COMP_CRC32C(crc, &header, sizeof(header));
file->write_size += sizeof(header);
}
COMP_CRC32C(crc, page.data, header.hole_offset);
COMP_CRC32C(crc, page.data + upper_offset, upper_length);
/* write odd size page image */
if (fwrite(page.data, 1, read_len, out) != read_len)
file->write_size += sizeof(header) + read_len - header.hole_length;
}
}
else
{
datapagemap_iterator_t *iter;
iter = datapagemap_iterate(&file->pagemap);
while (datapagemap_next(iter, &blknum))
{
int errno_tmp = errno;
/* oops */
fclose(in);
fclose(out);
elog(ERROR_SYSTEM, "cannot write at block %u of \"%s\": %s",
blknum, to_path, strerror(errno_tmp));
XLogRecPtr page_lsn;
int upper_offset;
int upper_length;
int ret;
offset = blknum * BLCKSZ;
if (offset > 0)
{
ret = fseek(in, offset, SEEK_SET);
if (ret != 0)
elog(ERROR_PG_INCOMPATIBLE,
"Can't seek in file offset: %llu ret:%i\n",
(long long unsigned int) offset, ret);
}
read_len = fread(&page, 1, sizeof(page), in);
header.block = blknum;
/*
* If an invalid data page was found, fallback to simple copy to ensure
* all pages in the file don't have BackupPageHeader.
*/
if (!parse_page(&page, &page_lsn,
&header.hole_offset, &header.hole_length))
{
elog(LOG, "%s fall back to simple copy", file->path);
fclose(in);
fclose(out);
file->is_datafile = false;
return copy_file(from_root, to_root, file);
}
file->read_size += read_len;
/* if the page has not been modified since last backup, skip it */
if (lsn && !XLogRecPtrIsInvalid(page_lsn) && page_lsn < *lsn)
continue;
upper_offset = header.hole_offset + header.hole_length;
upper_length = BLCKSZ - upper_offset;
/* write data page excluding hole */
if (fwrite(&header, 1, sizeof(header), out) != sizeof(header) ||
fwrite(page.data, 1, header.hole_offset, out) != header.hole_offset ||
fwrite(page.data + upper_offset, 1, upper_length, out) != upper_length)
{
int errno_tmp = errno;
/* oops */
fclose(in);
fclose(out);
elog(ERROR_SYSTEM, "cannot write at block %u of \"%s\": %s",
blknum, to_path, strerror(errno_tmp));
}
/* update CRC */
COMP_CRC32C(crc, &header, sizeof(header));
COMP_CRC32C(crc, page.data, header.hole_offset);
COMP_CRC32C(crc, page.data + upper_offset, upper_length);
file->write_size += sizeof(header) + read_len - header.hole_length;
}
COMP_CRC32C(crc, page.data, read_len);
file->write_size += read_len;
file->read_size += read_len;
pg_free(iter);
}
/*
@ -344,6 +366,11 @@ restore_data_file(const char *from_root,
}
}
elog(LOG, "header block: %i, blknum: %i, hole_offset: %i, BLCKSZ:%i",
header.block,
blknum,
header.hole_offset,
BLCKSZ);
if (header.block < blknum || header.hole_offset > BLCKSZ ||
(int) header.hole_offset + (int) header.hole_length > BLCKSZ)
{
@ -381,6 +408,7 @@ restore_data_file(const char *from_root,
if (chmod(to_path, file->mode) == -1)
{
int errno_tmp = errno;
fclose(in);
fclose(out);
elog(ERROR_SYSTEM, "cannot change mode of \"%s\": %s", to_path,

View File

@ -97,7 +97,7 @@ pgBackupDelete(int keep_generations, int keep_days)
else
snprintf(days_str, lengthof(days_str), "%d", keep_days);
elog(LOG, "deleted old backups (generations=%s, days=%s)\n",
elog(LOG, "deleted old backups (generations=%s, days=%s)",
generations_str, days_str);
}

13
dir.c
View File

@ -17,6 +17,7 @@
#include <time.h>
#include "pgut/pgut-port.h"
#include "datapagemap.h"
/* directory exclusion list for backup mode listing */
const char *pgdata_exclude[] =
@ -78,7 +79,7 @@ pgFileNew(const char *path, bool omit_symlink)
strerror(errno));
}
file = (pgFile *) pgut_malloc(offsetof(pgFile, path) + strlen(path) + 1);
file = (pgFile *) pgut_malloc(sizeof(pgFile));
file->mtime = st.st_mtime;
file->size = st.st_size;
@ -88,6 +89,9 @@ pgFileNew(const char *path, bool omit_symlink)
file->crc = 0;
file->is_datafile = false;
file->linked = NULL;
file->pagemap.bitmap = NULL;
file->pagemap.bitmapsize = 0;
file->path = pgut_malloc(strlen(path) + 1);
strcpy(file->path, path); /* enough buffer size guaranteed */
return file;
@ -167,6 +171,7 @@ pgFileFree(void *file)
if (file == NULL)
return;
free(((pgFile *)file)->linked);
free(((pgFile *)file)->path);
free(file);
}
@ -540,8 +545,10 @@ dir_read_file_list(const char *root, const char *file_txt)
}
tm.tm_isdst = -1;
file = (pgFile *) pgut_malloc(offsetof(pgFile, path) +
(root ? strlen(root) + 1 : 0) + strlen(path) + 1);
file = (pgFile *) pgut_malloc(sizeof(pgFile));
file->path = pgut_malloc((root ? strlen(root) + 1 : 0) + strlen(path) + 1);
file->pagemap.bitmap = NULL;
file->pagemap.bitmapsize = 0;
tm.tm_year -= 1900;
tm.tm_mon -= 1;

View File

@ -72,7 +72,11 @@ Backup target can be one of the following types:
- Full backup, backup a whole database cluster.
- Differential backup, backup only files or pages modified after the last
verified backup.
verified backup. A scan of the WAL records since the last backup up to the
LSN position of pg_start_backup is done and all the blocks touched are
recorded and tracked as part of the backup. As the WAL segments scanned
need to be located in the WAL archive, the last segment after pg_start_backup
has been run needs to be forcibly switched.
It is recommended to verify backup files as soon as possible after backup.
Unverified backup cannot be used in restore and in differential backup.

244
parsexlog.c Normal file
View File

@ -0,0 +1,244 @@
/*-------------------------------------------------------------------------
*
* parsexlog.c
* Functions for reading Write-Ahead-Log
*
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*-------------------------------------------------------------------------
*/
#include "postgres_fe.h"
#include "pg_arman.h"
#include <unistd.h>
#include "commands/dbcommands_xlog.h"
#include "catalog/storage_xlog.h"
/*
* RmgrNames is an array of resource manager names, to make error messages
* a bit nicer.
*/
#define PG_RMGR(symname,name,redo,desc,identify,startup,cleanup) \
name,
static const char *RmgrNames[RM_MAX_ID + 1] = {
#include "access/rmgrlist.h"
};
static void extractPageInfo(XLogReaderState *record);
static int xlogreadfd = -1;
static XLogSegNo xlogreadsegno = -1;
static char xlogfpath[MAXPGPATH];
typedef struct XLogPageReadPrivate
{
const char *archivedir;
TimeLineID tli;
} XLogPageReadPrivate;
static int SimpleXLogPageRead(XLogReaderState *xlogreader,
XLogRecPtr targetPagePtr,
int reqLen, XLogRecPtr targetRecPtr, char *readBuf,
TimeLineID *pageTLI);
/*
* Read WAL from the archive directory, starting from 'startpoint' on the
* given timeline, until 'endpoint'. Make note of the data blocks touched
* by the WAL records, and return them in a page map.
*/
void
extractPageMap(const char *archivedir, XLogRecPtr startpoint, TimeLineID tli,
XLogRecPtr endpoint)
{
XLogRecord *record;
XLogReaderState *xlogreader;
char *errormsg;
XLogPageReadPrivate private;
private.archivedir = archivedir;
private.tli = tli;
xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private);
if (xlogreader == NULL)
elog(ERROR, "out of memory");
do
{
record = XLogReadRecord(xlogreader, startpoint, &errormsg);
if (record == NULL)
{
XLogRecPtr errptr;
errptr = startpoint ? startpoint : xlogreader->EndRecPtr;
if (errormsg)
elog(ERROR, "could not read WAL record at %X/%X: %s",
(uint32) (errptr >> 32), (uint32) (errptr),
errormsg);
else
elog(ERROR, "could not read WAL record at %X/%X",
(uint32) (startpoint >> 32),
(uint32) (startpoint));
}
extractPageInfo(xlogreader);
startpoint = InvalidXLogRecPtr; /* continue reading at next record */
} while (xlogreader->ReadRecPtr != endpoint);
XLogReaderFree(xlogreader);
if (xlogreadfd != -1)
{
close(xlogreadfd);
xlogreadfd = -1;
}
}
/* XLogreader callback function, to read a WAL page */
static int
SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
int reqLen, XLogRecPtr targetRecPtr, char *readBuf,
TimeLineID *pageTLI)
{
XLogPageReadPrivate *private = (XLogPageReadPrivate *) xlogreader->private_data;
uint32 targetPageOff;
XLogRecPtr targetSegEnd;
XLogSegNo targetSegNo;
XLByteToSeg(targetPagePtr, targetSegNo);
XLogSegNoOffsetToRecPtr(targetSegNo + 1, 0, targetSegEnd);
targetPageOff = targetPagePtr % XLogSegSize;
/*
* See if we need to switch to a new segment because the requested record
* is not in the currently open one.
*/
if (xlogreadfd >= 0 && !XLByteInSeg(targetPagePtr, xlogreadsegno))
{
close(xlogreadfd);
xlogreadfd = -1;
}
XLByteToSeg(targetPagePtr, xlogreadsegno);
if (xlogreadfd < 0)
{
char xlogfname[MAXFNAMELEN];
XLogFileName(xlogfname, private->tli, xlogreadsegno);
snprintf(xlogfpath, MAXPGPATH, "%s/%s", private->archivedir,
xlogfname);
elog(LOG, "opening WAL segment \"%s\"", xlogfpath);
xlogreadfd = open(xlogfpath, O_RDONLY | PG_BINARY, 0);
if (xlogreadfd < 0)
{
elog(WARNING, "could not open WAL segment \"%s\": %s",
xlogfpath, strerror(errno));
return -1;
}
}
/*
* At this point, we have the right segment open.
*/
Assert(xlogreadfd != -1);
/* Read the requested page */
if (lseek(xlogreadfd, (off_t) targetPageOff, SEEK_SET) < 0)
{
elog(WARNING, "could not seek in file \"%s\": %s", xlogfpath,
strerror(errno));
return -1;
}
if (read(xlogreadfd, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
{
elog(WARNING, "could not read from file \"%s\": %s",
xlogfpath, strerror(errno));
return -1;
}
Assert(targetSegNo == xlogreadsegno);
*pageTLI = private->tli;
return XLOG_BLCKSZ;
}
/*
* Extract information on which blocks the current record modifies.
*/
static void
extractPageInfo(XLogReaderState *record)
{
int block_id;
RmgrId rmid = XLogRecGetRmid(record);
uint8 info = XLogRecGetInfo(record);
uint8 rminfo = info & ~XLR_INFO_MASK;
/* Is this a special record type that I recognize? */
if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_CREATE)
{
/*
* New databases can be safely ignored. They would be completely
* copied if found.
*/
}
else if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_DROP)
{
/*
* An existing database was dropped. It is fine to ignore that
* they will be removed appropriately.
*/
}
else if (rmid == RM_SMGR_ID && rminfo == XLOG_SMGR_CREATE)
{
/*
* We can safely ignore these. The file will be removed when
* combining the backups in the case of differential on.
*/
}
else if (rmid == RM_SMGR_ID && rminfo == XLOG_SMGR_TRUNCATE)
{
/*
* We can safely ignore these. When we compare the sizes later on,
* we'll notice that they differ, and copy the missing tail from
* source system.
*/
}
else if (info & XLR_SPECIAL_REL_UPDATE)
{
/*
* This record type modifies a relation file in some special way, but
* we don't recognize the type. That's bad - we don't know how to
* track that change.
*/
elog(ERROR, "WAL record modifies a relation, but record type is not recognized\n"
"lsn: %X/%X, rmgr: %s, info: %02X",
(uint32) (record->ReadRecPtr >> 32), (uint32) (record->ReadRecPtr),
RmgrNames[rmid], info);
}
for (block_id = 0; block_id <= record->max_block_id; block_id++)
{
RelFileNode rnode;
ForkNumber forknum;
BlockNumber blkno;
if (!XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno))
continue;
/* We only care about the main fork; others are copied in toto */
if (forknum != MAIN_FORKNUM)
continue;
process_block_change(forknum, rnode, blkno);
}
}

View File

@ -17,8 +17,10 @@
#include "pgut/pgut.h"
#include "access/xlogdefs.h"
#include "access/xlog_internal.h"
#include "catalog/pg_control.h"
#include "utils/pg_crc.h"
#include "parray.h"
#include "datapagemap.h"
/* Query to fetch current transaction ID */
#define TXID_CURRENT_SQL "SELECT txid_current();"
@ -62,7 +64,8 @@ typedef struct pgFile
pg_crc32 crc; /* CRC value of the file, regular file only */
char *linked; /* path of the linked file */
bool is_datafile; /* true if the file is PostgreSQL data file */
char path[1]; /* path of the file */
char *path; /* path of the file */
datapagemap_t pagemap;
} pgFile;
typedef struct pgBackupRange
@ -196,11 +199,16 @@ extern pgBackup current;
/* exclude directory list for $PGDATA file listing */
extern const char *pgdata_exclude[];
/* backup file list from non-snapshot */
extern parray *backup_files_list;
/* in backup.c */
extern int do_backup(pgBackupOption bkupopt);
extern BackupMode parse_backup_mode(const char *value);
extern void check_server_version(void);
extern bool fileExists(const char *path);
extern void process_block_change(ForkNumber forknum, RelFileNode rnode,
BlockNumber blkno);
/* in restore.c */
extern int do_restore(const char *target_time,
@ -279,8 +287,13 @@ extern void restore_data_file(const char *from_root, const char *to_root,
extern bool copy_file(const char *from_root, const char *to_root,
pgFile *file);
/* parsexlog.c */
extern void extractPageMap(const char *datadir, XLogRecPtr startpoint,
TimeLineID tli, XLogRecPtr endpoint);
/* in util.c */
extern TimeLineID get_current_timeline(void);
extern void sanityChecks(void);
extern void time2iso(char *buf, size_t len, time_t time);
extern const char *status2str(BackupStatus status);
extern void remove_trailing_space(char *buf, int comment_mark);

View File

@ -155,13 +155,14 @@ base_backup_found:
/* restore following differential backup */
elog(LOG, "searching differential backup...");
for (i = base_index - 1; i >= 0; i--)
{
pgBackup *backup = (pgBackup *) parray_get(backups, i);
/* don't use incomplete nor different timeline backup */
if (backup->status != BACKUP_STATUS_OK ||
backup->tli != base_backup->tli)
backup->tli != base_backup->tli)
continue;
/* use database backup only */

View File

@ -70,6 +70,7 @@ function init_backup()
port = ${TEST_PGPORT}
logging_collector = on
wal_level = hot_standby
wal_log_hints = on
archive_mode = on
archive_command = 'cp %p ${ARCLOG_PATH}/%f'
EOF

31
util.c
View File

@ -11,7 +11,7 @@
#include <time.h>
#include "catalog/pg_control.h"
#include "storage/bufpage.h"
static void
checkControlFile(ControlFileData *ControlFile)
@ -27,13 +27,13 @@ checkControlFile(ControlFileData *ControlFile)
if (!EQ_CRC32C(crc, ControlFile->crc))
elog(ERROR_CORRUPTED, "Calculated CRC checksum does not match value stored in file.\n"
"Either the file is corrupt, or it has a different layout than this program\n"
"is expecting. The results below are untrustworthy.\n");
"is expecting. The results below are untrustworthy.");
if (ControlFile->pg_control_version % 65536 == 0 && ControlFile->pg_control_version / 65536 != 0)
elog(ERROR_CORRUPTED, "possible byte ordering mismatch\n"
"The byte ordering used to store the pg_control file might not match the one\n"
"used by this program. In that case the results below would be incorrect, and\n"
"the PostgreSQL installation would be incompatible with this data directory.\n");
"the PostgreSQL installation would be incompatible with this data directory.");
}
/*
@ -43,7 +43,7 @@ static void
digestControlFile(ControlFileData *ControlFile, char *src, size_t size)
{
if (size != PG_CONTROL_SIZE)
elog(ERROR_PG_INCOMPATIBLE, "unexpected control file size %d, expected %d\n",
elog(ERROR_PG_INCOMPATIBLE, "unexpected control file size %d, expected %d",
(int) size, PG_CONTROL_SIZE);
memcpy(ControlFile, src, sizeof(ControlFileData));
@ -52,6 +52,29 @@ digestControlFile(ControlFileData *ControlFile, char *src, size_t size)
checkControlFile(ControlFile);
}
void
sanityChecks(void)
{
ControlFileData ControlFile;
char *buffer;
size_t size;
/* First fetch file... */
buffer = slurpFile(pgdata, "global/pg_control", &size);
digestControlFile(&ControlFile, buffer, size);
pg_free(buffer);
/*
* Node work is done on need to use checksums or hint bit wal-logging
* this to prevent from data corruption that could occur because of
* hint bits.
*/
if (ControlFile.data_checksum_version != PG_DATA_CHECKSUM_VERSION &&
!ControlFile.wal_log_hints)
elog(ERROR_PG_INCOMPATIBLE,
"target master need to use either data checksums or \"wal_log_hints = on\".");
}
/*
* Utility shared by backup and restore to fetch the current timeline
* used by a node.